From f94ab1f4d0affa08d0af9e8b3ed5d15217245ebc Mon Sep 17 00:00:00 2001 From: Matthew Vernon Date: Fri, 8 Nov 2024 16:27:46 +0000 Subject: [PATCH] Import pcre2_10.44.orig.tar.gz [dgit import orig pcre2_10.44.orig.tar.gz] --- 132html | 317 + AUTHORS | 36 + CMakeLists.txt | 1233 + COPYING | 5 + ChangeLog | 3097 +++ CheckMan | 78 + CleanTxt | 113 + Detrail | 35 + HACKING | 853 + INSTALL | 368 + LICENCE | 94 + Makefile.am | 967 + Makefile.in | 3967 ++++ NEWS | 492 + NON-AUTOTOOLS-BUILD | 430 + PrepareRelease | 257 + README | 956 + RunGrepTest | 1067 + RunGrepTest.bat | 699 + RunTest | 916 + RunTest.bat | 528 + aclocal.m4 | 1561 ++ ar-lib | 271 + cmake/COPYING-CMAKE-SCRIPTS | 22 + cmake/FindEditline.cmake | 16 + cmake/FindPackageHandleStandardArgs.cmake | 58 + cmake/FindReadline.cmake | 29 + cmake/pcre2-config-version.cmake.in | 15 + cmake/pcre2-config.cmake.in | 148 + compile | 348 + config-cmake.h.in | 56 + config.guess | 1748 ++ config.sub | 1884 ++ configure | 19255 ++++++++++++++++ configure.ac | 1190 + depcomp | 791 + doc/html/NON-AUTOTOOLS-BUILD.txt | 430 + doc/html/README.txt | 956 + doc/html/index.html | 318 + doc/html/pcre2-config.html | 102 + doc/html/pcre2.html | 214 + doc/html/pcre2_callout_enumerate.html | 63 + doc/html/pcre2_code_copy.html | 43 + doc/html/pcre2_code_copy_with_tables.html | 44 + doc/html/pcre2_code_free.html | 42 + doc/html/pcre2_compile.html | 119 + doc/html/pcre2_compile_context_copy.html | 41 + doc/html/pcre2_compile_context_create.html | 42 + doc/html/pcre2_compile_context_free.html | 41 + doc/html/pcre2_config.html | 84 + doc/html/pcre2_convert_context_copy.html | 40 + doc/html/pcre2_convert_context_create.html | 41 + doc/html/pcre2_convert_context_free.html | 40 + doc/html/pcre2_converted_pattern_free.html | 40 + doc/html/pcre2_dfa_match.html | 86 + doc/html/pcre2_general_context_copy.html | 42 + doc/html/pcre2_general_context_create.html | 44 + doc/html/pcre2_general_context_free.html | 40 + doc/html/pcre2_get_error_message.html | 51 + doc/html/pcre2_get_mark.html | 47 + .../pcre2_get_match_data_heapframes_size.html | 40 + doc/html/pcre2_get_match_data_size.html | 39 + doc/html/pcre2_get_ovector_count.html | 39 + doc/html/pcre2_get_ovector_pointer.html | 40 + doc/html/pcre2_get_startchar.html | 44 + doc/html/pcre2_jit_compile.html | 63 + doc/html/pcre2_jit_free_unused_memory.html | 43 + doc/html/pcre2_jit_match.html | 70 + doc/html/pcre2_jit_stack_assign.html | 75 + doc/html/pcre2_jit_stack_create.html | 50 + doc/html/pcre2_jit_stack_free.html | 43 + doc/html/pcre2_maketables.html | 48 + doc/html/pcre2_maketables_free.html | 44 + doc/html/pcre2_match.html | 87 + doc/html/pcre2_match_context_copy.html | 41 + doc/html/pcre2_match_context_create.html | 42 + doc/html/pcre2_match_context_free.html | 41 + doc/html/pcre2_match_data_create.html | 50 + .../pcre2_match_data_create_from_pattern.html | 53 + doc/html/pcre2_match_data_free.html | 48 + doc/html/pcre2_pattern_convert.html | 70 + doc/html/pcre2_pattern_info.html | 109 + doc/html/pcre2_serialize_decode.html | 65 + doc/html/pcre2_serialize_encode.html | 66 + doc/html/pcre2_serialize_free.html | 41 + .../pcre2_serialize_get_number_of_codes.html | 49 + doc/html/pcre2_set_bsr.html | 42 + doc/html/pcre2_set_callout.html | 43 + doc/html/pcre2_set_character_tables.html | 45 + doc/html/pcre2_set_compile_extra_options.html | 54 + .../pcre2_set_compile_recursion_guard.html | 46 + doc/html/pcre2_set_depth_limit.html | 40 + doc/html/pcre2_set_glob_escape.html | 43 + doc/html/pcre2_set_glob_separator.html | 42 + doc/html/pcre2_set_heap_limit.html | 40 + doc/html/pcre2_set_match_limit.html | 40 + ...pcre2_set_max_pattern_compiled_length.html | 44 + doc/html/pcre2_set_max_pattern_length.html | 43 + doc/html/pcre2_set_max_varlookbehind.html | 42 + doc/html/pcre2_set_newline.html | 51 + doc/html/pcre2_set_offset_limit.html | 40 + doc/html/pcre2_set_parens_nest_limit.html | 40 + doc/html/pcre2_set_recursion_limit.html | 40 + ...pcre2_set_recursion_memory_management.html | 42 + doc/html/pcre2_set_substitute_callout.html | 43 + doc/html/pcre2_substitute.html | 111 + doc/html/pcre2_substring_copy_byname.html | 58 + doc/html/pcre2_substring_copy_bynumber.html | 57 + doc/html/pcre2_substring_free.html | 41 + doc/html/pcre2_substring_get_byname.html | 60 + doc/html/pcre2_substring_get_bynumber.html | 58 + doc/html/pcre2_substring_length_byname.html | 46 + doc/html/pcre2_substring_length_bynumber.html | 48 + doc/html/pcre2_substring_list_free.html | 41 + doc/html/pcre2_substring_list_get.html | 56 + doc/html/pcre2_substring_nametable_scan.html | 53 + .../pcre2_substring_number_from_name.html | 50 + doc/html/pcre2api.html | 4186 ++++ doc/html/pcre2build.html | 652 + doc/html/pcre2callout.html | 480 + doc/html/pcre2compat.html | 276 + doc/html/pcre2convert.html | 191 + doc/html/pcre2demo.html | 518 + doc/html/pcre2grep.html | 1125 + doc/html/pcre2jit.html | 496 + doc/html/pcre2limits.html | 105 + doc/html/pcre2matching.html | 253 + doc/html/pcre2partial.html | 408 + doc/html/pcre2pattern.html | 3855 ++++ doc/html/pcre2perform.html | 280 + doc/html/pcre2posix.html | 379 + doc/html/pcre2sample.html | 110 + doc/html/pcre2serialize.html | 212 + doc/html/pcre2syntax.html | 635 + doc/html/pcre2test.html | 2213 ++ doc/html/pcre2unicode.html | 522 + doc/index.html.src | 318 + doc/pcre2-config.1 | 86 + doc/pcre2-config.txt | 85 + doc/pcre2.3 | 208 + doc/pcre2.txt | 11980 ++++++++++ doc/pcre2_callout_enumerate.3 | 51 + doc/pcre2_code_copy.3 | 31 + doc/pcre2_code_copy_with_tables.3 | 32 + doc/pcre2_code_free.3 | 30 + doc/pcre2_compile.3 | 106 + doc/pcre2_compile_context_copy.3 | 29 + doc/pcre2_compile_context_create.3 | 30 + doc/pcre2_compile_context_free.3 | 29 + doc/pcre2_config.3 | 76 + doc/pcre2_convert_context_copy.3 | 26 + doc/pcre2_convert_context_create.3 | 27 + doc/pcre2_convert_context_free.3 | 26 + doc/pcre2_converted_pattern_free.3 | 26 + doc/pcre2_dfa_match.3 | 86 + doc/pcre2_general_context_copy.3 | 30 + doc/pcre2_general_context_create.3 | 32 + doc/pcre2_general_context_free.3 | 28 + doc/pcre2_get_error_message.3 | 39 + doc/pcre2_get_mark.3 | 34 + doc/pcre2_get_match_data_heapframes_size.3 | 28 + doc/pcre2_get_match_data_size.3 | 27 + doc/pcre2_get_ovector_count.3 | 27 + doc/pcre2_get_ovector_pointer.3 | 28 + doc/pcre2_get_startchar.3 | 32 + doc/pcre2_jit_compile.3 | 51 + doc/pcre2_jit_free_unused_memory.3 | 31 + doc/pcre2_jit_match.3 | 58 + doc/pcre2_jit_stack_assign.3 | 59 + doc/pcre2_jit_stack_create.3 | 40 + doc/pcre2_jit_stack_free.3 | 32 + doc/pcre2_maketables.3 | 36 + doc/pcre2_maketables_free.3 | 31 + doc/pcre2_match.3 | 86 + doc/pcre2_match_context_copy.3 | 29 + doc/pcre2_match_context_create.3 | 30 + doc/pcre2_match_context_free.3 | 29 + doc/pcre2_match_data_create.3 | 37 + doc/pcre2_match_data_create_from_pattern.3 | 40 + doc/pcre2_match_data_free.3 | 35 + doc/pcre2_pattern_convert.3 | 55 + doc/pcre2_pattern_info.3 | 108 + doc/pcre2_serialize_decode.3 | 53 + doc/pcre2_serialize_encode.3 | 54 + doc/pcre2_serialize_free.3 | 29 + doc/pcre2_serialize_get_number_of_codes.3 | 37 + doc/pcre2_set_bsr.3 | 30 + doc/pcre2_set_callout.3 | 31 + doc/pcre2_set_character_tables.3 | 35 + doc/pcre2_set_compile_extra_options.3 | 55 + doc/pcre2_set_compile_recursion_guard.3 | 34 + doc/pcre2_set_depth_limit.3 | 28 + doc/pcre2_set_glob_escape.3 | 29 + doc/pcre2_set_glob_separator.3 | 28 + doc/pcre2_set_heap_limit.3 | 28 + doc/pcre2_set_match_limit.3 | 28 + doc/pcre2_set_max_pattern_compiled_length.3 | 32 + doc/pcre2_set_max_pattern_length.3 | 31 + doc/pcre2_set_max_varlookbehind.3 | 30 + doc/pcre2_set_newline.3 | 39 + doc/pcre2_set_offset_limit.3 | 28 + doc/pcre2_set_parens_nest_limit.3 | 28 + doc/pcre2_set_recursion_limit.3 | 28 + doc/pcre2_set_recursion_memory_management.3 | 30 + doc/pcre2_set_substitute_callout.3 | 31 + doc/pcre2_substitute.3 | 110 + doc/pcre2_substring_copy_byname.3 | 46 + doc/pcre2_substring_copy_bynumber.3 | 44 + doc/pcre2_substring_free.3 | 28 + doc/pcre2_substring_get_byname.3 | 48 + doc/pcre2_substring_get_bynumber.3 | 45 + doc/pcre2_substring_length_byname.3 | 34 + doc/pcre2_substring_length_bynumber.3 | 36 + doc/pcre2_substring_list_free.3 | 28 + doc/pcre2_substring_list_get.3 | 44 + doc/pcre2_substring_nametable_scan.3 | 41 + doc/pcre2_substring_number_from_name.3 | 38 + doc/pcre2api.3 | 4187 ++++ doc/pcre2build.3 | 665 + doc/pcre2callout.3 | 457 + doc/pcre2compat.3 | 236 + doc/pcre2convert.3 | 164 + doc/pcre2demo.3 | 526 + doc/pcre2grep.1 | 1018 + doc/pcre2grep.txt | 1095 + doc/pcre2jit.3 | 481 + doc/pcre2limits.3 | 81 + doc/pcre2matching.3 | 222 + doc/pcre2partial.3 | 373 + doc/pcre2pattern.3 | 3896 ++++ doc/pcre2perform.3 | 260 + doc/pcre2posix.3 | 348 + doc/pcre2sample.3 | 99 + doc/pcre2serialize.3 | 198 + doc/pcre2syntax.3 | 618 + doc/pcre2test.1 | 2186 ++ doc/pcre2test.txt | 2014 ++ doc/pcre2unicode.3 | 485 + install-sh | 541 + libpcre2-16.pc.in | 13 + libpcre2-32.pc.in | 13 + libpcre2-8.pc.in | 13 + libpcre2-posix.pc.in | 13 + ltmain.sh | 11338 +++++++++ m4/ax_pthread.m4 | 309 + m4/libtool.m4 | 8333 +++++++ m4/ltoptions.m4 | 437 + m4/ltsugar.m4 | 124 + m4/ltversion.m4 | 24 + m4/lt~obsolete.m4 | 99 + m4/pcre2_visibility.m4 | 88 + missing | 215 + pcre2-config.in | 121 + perltest.sh | 402 + src/config.h.generic | 483 + src/config.h.in | 460 + src/pcre2.h.generic | 1010 + src/pcre2.h.in | 1010 + src/pcre2_auto_possess.c | 1371 ++ src/pcre2_chartables.c.dist | 196 + src/pcre2_chkdint.c | 96 + src/pcre2_compile.c | 11001 +++++++++ src/pcre2_config.c | 252 + src/pcre2_context.c | 510 + src/pcre2_convert.c | 1189 + src/pcre2_dfa_match.c | 4119 ++++ src/pcre2_dftables.c | 297 + src/pcre2_error.c | 345 + src/pcre2_extuni.c | 162 + src/pcre2_find_bracket.c | 219 + src/pcre2_fuzzsupport.c | 792 + src/pcre2_internal.h | 2084 ++ src/pcre2_intmodedep.h | 940 + src/pcre2_jit_compile.c | 14972 ++++++++++++ src/pcre2_jit_match.c | 200 + src/pcre2_jit_misc.c | 234 + src/pcre2_jit_neon_inc.h | 354 + src/pcre2_jit_simd_inc.h | 2355 ++ src/pcre2_jit_test.c | 2528 ++ src/pcre2_maketables.c | 165 + src/pcre2_match.c | 7777 +++++++ src/pcre2_match_data.c | 185 + src/pcre2_newline.c | 243 + src/pcre2_ord2utf.c | 120 + src/pcre2_pattern_info.c | 432 + src/pcre2_printint.c | 886 + src/pcre2_script_run.c | 344 + src/pcre2_serialize.c | 286 + src/pcre2_string_utils.c | 237 + src/pcre2_study.c | 1915 ++ src/pcre2_substitute.c | 1009 + src/pcre2_substring.c | 550 + src/pcre2_tables.c | 234 + src/pcre2_ucd.c | 5460 +++++ src/pcre2_ucp.h | 396 + src/pcre2_ucptables.c | 1533 ++ src/pcre2_valid_utf.c | 398 + src/pcre2_xclass.c | 308 + src/pcre2demo.c | 497 + src/pcre2grep.c | 4606 ++++ src/pcre2posix.c | 425 + src/pcre2posix.h | 187 + src/pcre2posix_test.c | 209 + src/pcre2test.c | 9708 ++++++++ .../allocator_src/sljitExecAllocatorApple.c | 137 + .../allocator_src/sljitExecAllocatorCore.c | 327 + .../allocator_src/sljitExecAllocatorFreeBSD.c | 89 + .../allocator_src/sljitExecAllocatorPosix.c | 62 + .../allocator_src/sljitExecAllocatorWindows.c | 40 + .../sljitProtExecAllocatorNetBSD.c | 72 + .../sljitProtExecAllocatorPosix.c | 172 + .../allocator_src/sljitWXExecAllocatorPosix.c | 141 + .../sljitWXExecAllocatorWindows.c | 102 + src/sljit/sljitConfig.h | 142 + src/sljit/sljitConfigCPU.h | 188 + src/sljit/sljitConfigInternal.h | 907 + src/sljit/sljitLir.c | 3561 +++ src/sljit/sljitLir.h | 2466 ++ src/sljit/sljitNativeARM_32.c | 4636 ++++ src/sljit/sljitNativeARM_64.c | 3491 +++ src/sljit/sljitNativeARM_T2_32.c | 4302 ++++ src/sljit/sljitNativeLOONGARCH_64.c | 3765 +++ src/sljit/sljitNativeMIPS_32.c | 472 + src/sljit/sljitNativeMIPS_64.c | 387 + src/sljit/sljitNativeMIPS_common.c | 4259 ++++ src/sljit/sljitNativePPC_32.c | 485 + src/sljit/sljitNativePPC_64.c | 719 + src/sljit/sljitNativePPC_common.c | 3161 +++ src/sljit/sljitNativeRISCV_32.c | 142 + src/sljit/sljitNativeRISCV_64.c | 222 + src/sljit/sljitNativeRISCV_common.c | 3121 +++ src/sljit/sljitNativeS390X.c | 4526 ++++ src/sljit/sljitNativeX86_32.c | 1685 ++ src/sljit/sljitNativeX86_64.c | 1398 ++ src/sljit/sljitNativeX86_common.c | 5001 ++++ src/sljit/sljitSerialize.c | 516 + src/sljit/sljitUtils.c | 344 + test-driver | 153 + testdata/grepbinary | Bin 0 -> 45 bytes testdata/grepfilelist | 3 + testdata/grepinput | 624 + testdata/grepinput3 | 15 + testdata/grepinput8 | 17 + testdata/grepinputC.bz2 | Bin 0 -> 313 bytes testdata/grepinputC.gz | Bin 0 -> 311 bytes testdata/grepinputM | 17 + testdata/grepinputv | 9 + testdata/grepinputx | 43 + testdata/greplist | 7 + testdata/grepnot.bz2 | 43 + testdata/grepoutput | 1255 + testdata/grepoutput8 | 47 + testdata/grepoutputC | 62 + testdata/grepoutputCN | 38 + testdata/grepoutputCNU | 18 + testdata/grepoutputCU | 30 + testdata/grepoutputCbz2 | 6 + testdata/grepoutputCgz | 3 + testdata/grepoutputN | 27 + testdata/grepoutputUN | 3 + testdata/greppatN4 | 2 + testdata/testbtables | Bin 0 -> 1088 bytes testdata/testinput1 | 6657 ++++++ testdata/testinput10 | 648 + testdata/testinput11 | 374 + testdata/testinput12 | 606 + testdata/testinput13 | 22 + testdata/testinput14 | 108 + testdata/testinput15 | 253 + testdata/testinput16 | 9 + testdata/testinput17 | 316 + testdata/testinput18 | 147 + testdata/testinput19 | 25 + testdata/testinput2 | 6114 +++++ testdata/testinput20 | 108 + testdata/testinput21 | 16 + testdata/testinput22 | 107 + testdata/testinput23 | 7 + testdata/testinput24 | 396 + testdata/testinput25 | 22 + testdata/testinput26 | 2750 +++ testdata/testinput3 | 109 + testdata/testinput4 | 2874 +++ testdata/testinput5 | 2534 ++ testdata/testinput6 | 5045 ++++ testdata/testinput7 | 2534 ++ testdata/testinput8 | 189 + testdata/testinput9 | 266 + testdata/testinputEBC | 137 + testdata/testinputheap | 35 + testdata/testoutput1 | 10506 +++++++++ testdata/testoutput10 | 1928 ++ testdata/testoutput11-16 | 668 + testdata/testoutput11-32 | 674 + testdata/testoutput12-16 | 1868 ++ testdata/testoutput12-32 | 1858 ++ testdata/testoutput13 | 27 + testdata/testoutput14-16 | 163 + testdata/testoutput14-32 | 159 + testdata/testoutput14-8 | 163 + testdata/testoutput15 | 540 + testdata/testoutput16 | 18 + testdata/testoutput17 | 570 + testdata/testoutput18 | 230 + testdata/testoutput19 | 30 + testdata/testoutput2 | 18070 +++++++++++++++ testdata/testoutput20 | 161 + testdata/testoutput21 | 94 + testdata/testoutput22-16 | 182 + testdata/testoutput22-32 | 180 + testdata/testoutput22-8 | 184 + testdata/testoutput23 | 8 + testdata/testoutput24 | 624 + testdata/testoutput25 | 25 + testdata/testoutput26 | 3511 +++ testdata/testoutput3 | 170 + testdata/testoutput3A | 170 + testdata/testoutput3B | 170 + testdata/testoutput4 | 4605 ++++ testdata/testoutput5 | 5518 +++++ testdata/testoutput6 | 7923 +++++++ testdata/testoutput7 | 4230 ++++ testdata/testoutput8-16-2 | 1076 + testdata/testoutput8-16-3 | 1074 + testdata/testoutput8-16-4 | 1074 + testdata/testoutput8-32-2 | 1074 + testdata/testoutput8-32-3 | 1074 + testdata/testoutput8-32-4 | 1074 + testdata/testoutput8-8-2 | 1076 + testdata/testoutput8-8-3 | 1074 + testdata/testoutput8-8-4 | 1074 + testdata/testoutput9 | 374 + testdata/testoutputEBC | 206 + testdata/testoutputheap-16 | 88 + testdata/testoutputheap-32 | 88 + testdata/testoutputheap-8 | 88 + testdata/valgrind-jit.supp | 15 + testdata/wintestinput3 | 104 + testdata/wintestoutput3 | 175 + vms/configure.com | 1144 + vms/openvms_readme.txt | 20 + vms/pcre2.h_patch | 12 + vms/stdint.h | 9 + 443 files changed, 372466 insertions(+) create mode 100755 132html create mode 100644 AUTHORS create mode 100644 CMakeLists.txt create mode 100644 COPYING create mode 100644 ChangeLog create mode 100755 CheckMan create mode 100755 CleanTxt create mode 100755 Detrail create mode 100644 HACKING create mode 100644 INSTALL create mode 100644 LICENCE create mode 100644 Makefile.am create mode 100644 Makefile.in create mode 100644 NEWS create mode 100644 NON-AUTOTOOLS-BUILD create mode 100755 PrepareRelease create mode 100644 README create mode 100755 RunGrepTest create mode 100644 RunGrepTest.bat create mode 100755 RunTest create mode 100644 RunTest.bat create mode 100644 aclocal.m4 create mode 100755 ar-lib create mode 100644 cmake/COPYING-CMAKE-SCRIPTS create mode 100644 cmake/FindEditline.cmake create mode 100644 cmake/FindPackageHandleStandardArgs.cmake create mode 100644 cmake/FindReadline.cmake create mode 100644 cmake/pcre2-config-version.cmake.in create mode 100644 cmake/pcre2-config.cmake.in create mode 100755 compile create mode 100644 config-cmake.h.in create mode 100755 config.guess create mode 100755 config.sub create mode 100755 configure create mode 100644 configure.ac create mode 100755 depcomp create mode 100644 doc/html/NON-AUTOTOOLS-BUILD.txt create mode 100644 doc/html/README.txt create mode 100644 doc/html/index.html create mode 100644 doc/html/pcre2-config.html create mode 100644 doc/html/pcre2.html create mode 100644 doc/html/pcre2_callout_enumerate.html create mode 100644 doc/html/pcre2_code_copy.html create mode 100644 doc/html/pcre2_code_copy_with_tables.html create mode 100644 doc/html/pcre2_code_free.html create mode 100644 doc/html/pcre2_compile.html create mode 100644 doc/html/pcre2_compile_context_copy.html create mode 100644 doc/html/pcre2_compile_context_create.html create mode 100644 doc/html/pcre2_compile_context_free.html create mode 100644 doc/html/pcre2_config.html create mode 100644 doc/html/pcre2_convert_context_copy.html create mode 100644 doc/html/pcre2_convert_context_create.html create mode 100644 doc/html/pcre2_convert_context_free.html create mode 100644 doc/html/pcre2_converted_pattern_free.html create mode 100644 doc/html/pcre2_dfa_match.html create mode 100644 doc/html/pcre2_general_context_copy.html create mode 100644 doc/html/pcre2_general_context_create.html create mode 100644 doc/html/pcre2_general_context_free.html create mode 100644 doc/html/pcre2_get_error_message.html create mode 100644 doc/html/pcre2_get_mark.html create mode 100644 doc/html/pcre2_get_match_data_heapframes_size.html create mode 100644 doc/html/pcre2_get_match_data_size.html create mode 100644 doc/html/pcre2_get_ovector_count.html create mode 100644 doc/html/pcre2_get_ovector_pointer.html create mode 100644 doc/html/pcre2_get_startchar.html create mode 100644 doc/html/pcre2_jit_compile.html create mode 100644 doc/html/pcre2_jit_free_unused_memory.html create mode 100644 doc/html/pcre2_jit_match.html create mode 100644 doc/html/pcre2_jit_stack_assign.html create mode 100644 doc/html/pcre2_jit_stack_create.html create mode 100644 doc/html/pcre2_jit_stack_free.html create mode 100644 doc/html/pcre2_maketables.html create mode 100644 doc/html/pcre2_maketables_free.html create mode 100644 doc/html/pcre2_match.html create mode 100644 doc/html/pcre2_match_context_copy.html create mode 100644 doc/html/pcre2_match_context_create.html create mode 100644 doc/html/pcre2_match_context_free.html create mode 100644 doc/html/pcre2_match_data_create.html create mode 100644 doc/html/pcre2_match_data_create_from_pattern.html create mode 100644 doc/html/pcre2_match_data_free.html create mode 100644 doc/html/pcre2_pattern_convert.html create mode 100644 doc/html/pcre2_pattern_info.html create mode 100644 doc/html/pcre2_serialize_decode.html create mode 100644 doc/html/pcre2_serialize_encode.html create mode 100644 doc/html/pcre2_serialize_free.html create mode 100644 doc/html/pcre2_serialize_get_number_of_codes.html create mode 100644 doc/html/pcre2_set_bsr.html create mode 100644 doc/html/pcre2_set_callout.html create mode 100644 doc/html/pcre2_set_character_tables.html create mode 100644 doc/html/pcre2_set_compile_extra_options.html create mode 100644 doc/html/pcre2_set_compile_recursion_guard.html create mode 100644 doc/html/pcre2_set_depth_limit.html create mode 100644 doc/html/pcre2_set_glob_escape.html create mode 100644 doc/html/pcre2_set_glob_separator.html create mode 100644 doc/html/pcre2_set_heap_limit.html create mode 100644 doc/html/pcre2_set_match_limit.html create mode 100644 doc/html/pcre2_set_max_pattern_compiled_length.html create mode 100644 doc/html/pcre2_set_max_pattern_length.html create mode 100644 doc/html/pcre2_set_max_varlookbehind.html create mode 100644 doc/html/pcre2_set_newline.html create mode 100644 doc/html/pcre2_set_offset_limit.html create mode 100644 doc/html/pcre2_set_parens_nest_limit.html create mode 100644 doc/html/pcre2_set_recursion_limit.html create mode 100644 doc/html/pcre2_set_recursion_memory_management.html create mode 100644 doc/html/pcre2_set_substitute_callout.html create mode 100644 doc/html/pcre2_substitute.html create mode 100644 doc/html/pcre2_substring_copy_byname.html create mode 100644 doc/html/pcre2_substring_copy_bynumber.html create mode 100644 doc/html/pcre2_substring_free.html create mode 100644 doc/html/pcre2_substring_get_byname.html create mode 100644 doc/html/pcre2_substring_get_bynumber.html create mode 100644 doc/html/pcre2_substring_length_byname.html create mode 100644 doc/html/pcre2_substring_length_bynumber.html create mode 100644 doc/html/pcre2_substring_list_free.html create mode 100644 doc/html/pcre2_substring_list_get.html create mode 100644 doc/html/pcre2_substring_nametable_scan.html create mode 100644 doc/html/pcre2_substring_number_from_name.html create mode 100644 doc/html/pcre2api.html create mode 100644 doc/html/pcre2build.html create mode 100644 doc/html/pcre2callout.html create mode 100644 doc/html/pcre2compat.html create mode 100644 doc/html/pcre2convert.html create mode 100644 doc/html/pcre2demo.html create mode 100644 doc/html/pcre2grep.html create mode 100644 doc/html/pcre2jit.html create mode 100644 doc/html/pcre2limits.html create mode 100644 doc/html/pcre2matching.html create mode 100644 doc/html/pcre2partial.html create mode 100644 doc/html/pcre2pattern.html create mode 100644 doc/html/pcre2perform.html create mode 100644 doc/html/pcre2posix.html create mode 100644 doc/html/pcre2sample.html create mode 100644 doc/html/pcre2serialize.html create mode 100644 doc/html/pcre2syntax.html create mode 100644 doc/html/pcre2test.html create mode 100644 doc/html/pcre2unicode.html create mode 100644 doc/index.html.src create mode 100644 doc/pcre2-config.1 create mode 100644 doc/pcre2-config.txt create mode 100644 doc/pcre2.3 create mode 100644 doc/pcre2.txt create mode 100644 doc/pcre2_callout_enumerate.3 create mode 100644 doc/pcre2_code_copy.3 create mode 100644 doc/pcre2_code_copy_with_tables.3 create mode 100644 doc/pcre2_code_free.3 create mode 100644 doc/pcre2_compile.3 create mode 100644 doc/pcre2_compile_context_copy.3 create mode 100644 doc/pcre2_compile_context_create.3 create mode 100644 doc/pcre2_compile_context_free.3 create mode 100644 doc/pcre2_config.3 create mode 100644 doc/pcre2_convert_context_copy.3 create mode 100644 doc/pcre2_convert_context_create.3 create mode 100644 doc/pcre2_convert_context_free.3 create mode 100644 doc/pcre2_converted_pattern_free.3 create mode 100644 doc/pcre2_dfa_match.3 create mode 100644 doc/pcre2_general_context_copy.3 create mode 100644 doc/pcre2_general_context_create.3 create mode 100644 doc/pcre2_general_context_free.3 create mode 100644 doc/pcre2_get_error_message.3 create mode 100644 doc/pcre2_get_mark.3 create mode 100644 doc/pcre2_get_match_data_heapframes_size.3 create mode 100644 doc/pcre2_get_match_data_size.3 create mode 100644 doc/pcre2_get_ovector_count.3 create mode 100644 doc/pcre2_get_ovector_pointer.3 create mode 100644 doc/pcre2_get_startchar.3 create mode 100644 doc/pcre2_jit_compile.3 create mode 100644 doc/pcre2_jit_free_unused_memory.3 create mode 100644 doc/pcre2_jit_match.3 create mode 100644 doc/pcre2_jit_stack_assign.3 create mode 100644 doc/pcre2_jit_stack_create.3 create mode 100644 doc/pcre2_jit_stack_free.3 create mode 100644 doc/pcre2_maketables.3 create mode 100644 doc/pcre2_maketables_free.3 create mode 100644 doc/pcre2_match.3 create mode 100644 doc/pcre2_match_context_copy.3 create mode 100644 doc/pcre2_match_context_create.3 create mode 100644 doc/pcre2_match_context_free.3 create mode 100644 doc/pcre2_match_data_create.3 create mode 100644 doc/pcre2_match_data_create_from_pattern.3 create mode 100644 doc/pcre2_match_data_free.3 create mode 100644 doc/pcre2_pattern_convert.3 create mode 100644 doc/pcre2_pattern_info.3 create mode 100644 doc/pcre2_serialize_decode.3 create mode 100644 doc/pcre2_serialize_encode.3 create mode 100644 doc/pcre2_serialize_free.3 create mode 100644 doc/pcre2_serialize_get_number_of_codes.3 create mode 100644 doc/pcre2_set_bsr.3 create mode 100644 doc/pcre2_set_callout.3 create mode 100644 doc/pcre2_set_character_tables.3 create mode 100644 doc/pcre2_set_compile_extra_options.3 create mode 100644 doc/pcre2_set_compile_recursion_guard.3 create mode 100644 doc/pcre2_set_depth_limit.3 create mode 100644 doc/pcre2_set_glob_escape.3 create mode 100644 doc/pcre2_set_glob_separator.3 create mode 100644 doc/pcre2_set_heap_limit.3 create mode 100644 doc/pcre2_set_match_limit.3 create mode 100644 doc/pcre2_set_max_pattern_compiled_length.3 create mode 100644 doc/pcre2_set_max_pattern_length.3 create mode 100644 doc/pcre2_set_max_varlookbehind.3 create mode 100644 doc/pcre2_set_newline.3 create mode 100644 doc/pcre2_set_offset_limit.3 create mode 100644 doc/pcre2_set_parens_nest_limit.3 create mode 100644 doc/pcre2_set_recursion_limit.3 create mode 100644 doc/pcre2_set_recursion_memory_management.3 create mode 100644 doc/pcre2_set_substitute_callout.3 create mode 100644 doc/pcre2_substitute.3 create mode 100644 doc/pcre2_substring_copy_byname.3 create mode 100644 doc/pcre2_substring_copy_bynumber.3 create mode 100644 doc/pcre2_substring_free.3 create mode 100644 doc/pcre2_substring_get_byname.3 create mode 100644 doc/pcre2_substring_get_bynumber.3 create mode 100644 doc/pcre2_substring_length_byname.3 create mode 100644 doc/pcre2_substring_length_bynumber.3 create mode 100644 doc/pcre2_substring_list_free.3 create mode 100644 doc/pcre2_substring_list_get.3 create mode 100644 doc/pcre2_substring_nametable_scan.3 create mode 100644 doc/pcre2_substring_number_from_name.3 create mode 100644 doc/pcre2api.3 create mode 100644 doc/pcre2build.3 create mode 100644 doc/pcre2callout.3 create mode 100644 doc/pcre2compat.3 create mode 100644 doc/pcre2convert.3 create mode 100644 doc/pcre2demo.3 create mode 100644 doc/pcre2grep.1 create mode 100644 doc/pcre2grep.txt create mode 100644 doc/pcre2jit.3 create mode 100644 doc/pcre2limits.3 create mode 100644 doc/pcre2matching.3 create mode 100644 doc/pcre2partial.3 create mode 100644 doc/pcre2pattern.3 create mode 100644 doc/pcre2perform.3 create mode 100644 doc/pcre2posix.3 create mode 100644 doc/pcre2sample.3 create mode 100644 doc/pcre2serialize.3 create mode 100644 doc/pcre2syntax.3 create mode 100644 doc/pcre2test.1 create mode 100644 doc/pcre2test.txt create mode 100644 doc/pcre2unicode.3 create mode 100755 install-sh create mode 100644 libpcre2-16.pc.in create mode 100644 libpcre2-32.pc.in create mode 100644 libpcre2-8.pc.in create mode 100644 libpcre2-posix.pc.in create mode 100644 ltmain.sh create mode 100644 m4/ax_pthread.m4 create mode 100644 m4/libtool.m4 create mode 100644 m4/ltoptions.m4 create mode 100644 m4/ltsugar.m4 create mode 100644 m4/ltversion.m4 create mode 100644 m4/lt~obsolete.m4 create mode 100644 m4/pcre2_visibility.m4 create mode 100755 missing create mode 100644 pcre2-config.in create mode 100755 perltest.sh create mode 100644 src/config.h.generic create mode 100644 src/config.h.in create mode 100644 src/pcre2.h.generic create mode 100644 src/pcre2.h.in create mode 100644 src/pcre2_auto_possess.c create mode 100644 src/pcre2_chartables.c.dist create mode 100644 src/pcre2_chkdint.c create mode 100644 src/pcre2_compile.c create mode 100644 src/pcre2_config.c create mode 100644 src/pcre2_context.c create mode 100644 src/pcre2_convert.c create mode 100644 src/pcre2_dfa_match.c create mode 100644 src/pcre2_dftables.c create mode 100644 src/pcre2_error.c create mode 100644 src/pcre2_extuni.c create mode 100644 src/pcre2_find_bracket.c create mode 100644 src/pcre2_fuzzsupport.c create mode 100644 src/pcre2_internal.h create mode 100644 src/pcre2_intmodedep.h create mode 100644 src/pcre2_jit_compile.c create mode 100644 src/pcre2_jit_match.c create mode 100644 src/pcre2_jit_misc.c create mode 100644 src/pcre2_jit_neon_inc.h create mode 100644 src/pcre2_jit_simd_inc.h create mode 100644 src/pcre2_jit_test.c create mode 100644 src/pcre2_maketables.c create mode 100644 src/pcre2_match.c create mode 100644 src/pcre2_match_data.c create mode 100644 src/pcre2_newline.c create mode 100644 src/pcre2_ord2utf.c create mode 100644 src/pcre2_pattern_info.c create mode 100644 src/pcre2_printint.c create mode 100644 src/pcre2_script_run.c create mode 100644 src/pcre2_serialize.c create mode 100644 src/pcre2_string_utils.c create mode 100644 src/pcre2_study.c create mode 100644 src/pcre2_substitute.c create mode 100644 src/pcre2_substring.c create mode 100644 src/pcre2_tables.c create mode 100644 src/pcre2_ucd.c create mode 100644 src/pcre2_ucp.h create mode 100644 src/pcre2_ucptables.c create mode 100644 src/pcre2_valid_utf.c create mode 100644 src/pcre2_xclass.c create mode 100644 src/pcre2demo.c create mode 100644 src/pcre2grep.c create mode 100644 src/pcre2posix.c create mode 100644 src/pcre2posix.h create mode 100644 src/pcre2posix_test.c create mode 100644 src/pcre2test.c create mode 100644 src/sljit/allocator_src/sljitExecAllocatorApple.c create mode 100644 src/sljit/allocator_src/sljitExecAllocatorCore.c create mode 100644 src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c create mode 100644 src/sljit/allocator_src/sljitExecAllocatorPosix.c create mode 100644 src/sljit/allocator_src/sljitExecAllocatorWindows.c create mode 100644 src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c create mode 100644 src/sljit/allocator_src/sljitProtExecAllocatorPosix.c create mode 100644 src/sljit/allocator_src/sljitWXExecAllocatorPosix.c create mode 100644 src/sljit/allocator_src/sljitWXExecAllocatorWindows.c create mode 100644 src/sljit/sljitConfig.h create mode 100644 src/sljit/sljitConfigCPU.h create mode 100644 src/sljit/sljitConfigInternal.h create mode 100644 src/sljit/sljitLir.c create mode 100644 src/sljit/sljitLir.h create mode 100644 src/sljit/sljitNativeARM_32.c create mode 100644 src/sljit/sljitNativeARM_64.c create mode 100644 src/sljit/sljitNativeARM_T2_32.c create mode 100644 src/sljit/sljitNativeLOONGARCH_64.c create mode 100644 src/sljit/sljitNativeMIPS_32.c create mode 100644 src/sljit/sljitNativeMIPS_64.c create mode 100644 src/sljit/sljitNativeMIPS_common.c create mode 100644 src/sljit/sljitNativePPC_32.c create mode 100644 src/sljit/sljitNativePPC_64.c create mode 100644 src/sljit/sljitNativePPC_common.c create mode 100644 src/sljit/sljitNativeRISCV_32.c create mode 100644 src/sljit/sljitNativeRISCV_64.c create mode 100644 src/sljit/sljitNativeRISCV_common.c create mode 100644 src/sljit/sljitNativeS390X.c create mode 100644 src/sljit/sljitNativeX86_32.c create mode 100644 src/sljit/sljitNativeX86_64.c create mode 100644 src/sljit/sljitNativeX86_common.c create mode 100644 src/sljit/sljitSerialize.c create mode 100644 src/sljit/sljitUtils.c create mode 100755 test-driver create mode 100644 testdata/grepbinary create mode 100644 testdata/grepfilelist create mode 100644 testdata/grepinput create mode 100644 testdata/grepinput3 create mode 100644 testdata/grepinput8 create mode 100644 testdata/grepinputC.bz2 create mode 100644 testdata/grepinputC.gz create mode 100644 testdata/grepinputM create mode 100644 testdata/grepinputv create mode 100644 testdata/grepinputx create mode 100644 testdata/greplist create mode 100644 testdata/grepnot.bz2 create mode 100644 testdata/grepoutput create mode 100644 testdata/grepoutput8 create mode 100644 testdata/grepoutputC create mode 100644 testdata/grepoutputCN create mode 100644 testdata/grepoutputCNU create mode 100644 testdata/grepoutputCU create mode 100644 testdata/grepoutputCbz2 create mode 100644 testdata/grepoutputCgz create mode 100644 testdata/grepoutputN create mode 100644 testdata/grepoutputUN create mode 100644 testdata/greppatN4 create mode 100644 testdata/testbtables create mode 100644 testdata/testinput1 create mode 100644 testdata/testinput10 create mode 100644 testdata/testinput11 create mode 100644 testdata/testinput12 create mode 100644 testdata/testinput13 create mode 100644 testdata/testinput14 create mode 100644 testdata/testinput15 create mode 100644 testdata/testinput16 create mode 100644 testdata/testinput17 create mode 100644 testdata/testinput18 create mode 100644 testdata/testinput19 create mode 100644 testdata/testinput2 create mode 100644 testdata/testinput20 create mode 100644 testdata/testinput21 create mode 100644 testdata/testinput22 create mode 100644 testdata/testinput23 create mode 100644 testdata/testinput24 create mode 100644 testdata/testinput25 create mode 100644 testdata/testinput26 create mode 100644 testdata/testinput3 create mode 100644 testdata/testinput4 create mode 100644 testdata/testinput5 create mode 100644 testdata/testinput6 create mode 100644 testdata/testinput7 create mode 100644 testdata/testinput8 create mode 100644 testdata/testinput9 create mode 100644 testdata/testinputEBC create mode 100644 testdata/testinputheap create mode 100644 testdata/testoutput1 create mode 100644 testdata/testoutput10 create mode 100644 testdata/testoutput11-16 create mode 100644 testdata/testoutput11-32 create mode 100644 testdata/testoutput12-16 create mode 100644 testdata/testoutput12-32 create mode 100644 testdata/testoutput13 create mode 100644 testdata/testoutput14-16 create mode 100644 testdata/testoutput14-32 create mode 100644 testdata/testoutput14-8 create mode 100644 testdata/testoutput15 create mode 100644 testdata/testoutput16 create mode 100644 testdata/testoutput17 create mode 100644 testdata/testoutput18 create mode 100644 testdata/testoutput19 create mode 100644 testdata/testoutput2 create mode 100644 testdata/testoutput20 create mode 100644 testdata/testoutput21 create mode 100644 testdata/testoutput22-16 create mode 100644 testdata/testoutput22-32 create mode 100644 testdata/testoutput22-8 create mode 100644 testdata/testoutput23 create mode 100644 testdata/testoutput24 create mode 100644 testdata/testoutput25 create mode 100644 testdata/testoutput26 create mode 100644 testdata/testoutput3 create mode 100644 testdata/testoutput3A create mode 100644 testdata/testoutput3B create mode 100644 testdata/testoutput4 create mode 100644 testdata/testoutput5 create mode 100644 testdata/testoutput6 create mode 100644 testdata/testoutput7 create mode 100644 testdata/testoutput8-16-2 create mode 100644 testdata/testoutput8-16-3 create mode 100644 testdata/testoutput8-16-4 create mode 100644 testdata/testoutput8-32-2 create mode 100644 testdata/testoutput8-32-3 create mode 100644 testdata/testoutput8-32-4 create mode 100644 testdata/testoutput8-8-2 create mode 100644 testdata/testoutput8-8-3 create mode 100644 testdata/testoutput8-8-4 create mode 100644 testdata/testoutput9 create mode 100644 testdata/testoutputEBC create mode 100644 testdata/testoutputheap-16 create mode 100644 testdata/testoutputheap-32 create mode 100644 testdata/testoutputheap-8 create mode 100644 testdata/valgrind-jit.supp create mode 100644 testdata/wintestinput3 create mode 100644 testdata/wintestoutput3 create mode 100644 vms/configure.com create mode 100644 vms/openvms_readme.txt create mode 100644 vms/pcre2.h_patch create mode 100644 vms/stdint.h diff --git a/132html b/132html new file mode 100755 index 0000000..05ab60e --- /dev/null +++ b/132html @@ -0,0 +1,317 @@ +#! /usr/bin/perl -w + +# Script to turn PCRE2 man pages into HTML + + +# Subroutine to handle font changes and other escapes + +sub do_line { +my($s) = $_[0]; + +$s =~ s/ +$s =~ s/>/>/g; +$s =~ s"\\fI(.*?)\\f[RP]"$1"g; +$s =~ s"\\fB(.*?)\\f[RP]"$1"g; +$s =~ s"\\e"\\"g; +$s =~ s/(?<=Copyright )\(c\)/©/g; +$s; +} + +# Subroutine to ensure not in a paragraph + +sub end_para { +if ($inpara) + { + print TEMP "\n" if ($inpre); + print TEMP "

\n"; + } +$inpara = $inpre = 0; +$wrotetext = 0; +} + +# Subroutine to start a new paragraph + +sub new_para { +&end_para(); +print TEMP "

\n"; +$inpara = 1; +} + + +# Main program + +$innf = 0; +$inpara = 0; +$inpre = 0; +$wrotetext = 0; +$toc = 0; +$ref = 1; + +while ($#ARGV >= 0 && $ARGV[0] =~ /^-/) + { + $toc = 1 if $ARGV[0] eq "-toc"; + shift; + } + +# Initial output to STDOUT + +print < + +$ARGV[0] specification + + +

$ARGV[0] man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+End + +print "

\n" if ($toc); + +# Copy the remainder to the standard output + +close(TEMP); +open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; + +print while (); + +print < +Return to the PCRE2 index page. +

+End + +close(TEMP); +unlink("/tmp/$$"); + +# End diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..9669f77 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,36 @@ +THE MAIN PCRE2 LIBRARY CODE +--------------------------- + +Written by: Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com + +Retired from University of Cambridge Computing Service, +Cambridge, England. + +Copyright (c) 1997-2024 University of Cambridge +All rights reserved + + +PCRE2 JUST-IN-TIME COMPILATION SUPPORT +-------------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Emain domain: freemail.hu + +Copyright(c) 2010-2024 Zoltan Herczeg +All rights reserved. + + +STACK-LESS JUST-IN-TIME COMPILER +-------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Emain domain: freemail.hu + +Copyright(c) 2009-2024 Zoltan Herczeg +All rights reserved. + +#### diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b73ed73 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,1233 @@ +# CMakeLists.txt +# +# This file enables PCRE2 to be built with the CMake configuration and build +# tool. Download CMake in source or binary form from http://www.cmake.org/ +# Converted to support PCRE2 from the original PCRE file, August 2014. +# +# Original listfile by Christian Ehrlicher +# Refined and expanded by Daniel Richard G. +# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered +# 2007-09-19 Adjusted by PH to retain previous default settings +# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre +# (b) Ensure pcretest and pcregrep link with the local library, +# not a previously-installed one. +# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and +# PCRE_SUPPORT_LIBBZ2. +# 2008-01-20 Brought up to date to include several new features by Christian +# Ehrlicher. +# 2008-01-22 Sheri added options for backward compatibility of library names +# when building with minGW: +# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to +# be built without "lib" as prefix. (The libraries will be named +# pcre.dll, pcreposix.dll and pcrecpp.dll). +# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to +# be built with suffix of "-0.dll". (The libraries will be named +# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names +# built by default with Configure and Make. +# 2008-01-23 PH removed the automatic build of pcredemo. +# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed. +# 2008-07-03 PH updated for revised UCP property support (change of files) +# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name +# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE +# is included within another project. +# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to +# add options to stop the building of pcregrep and the tests, and +# to disable the final configuration report. +# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that +# are set by specifying a release type. +# 2010-01-02 PH added test for stdint.h +# 2010-03-02 PH added test for inttypes.h +# 2011-08-01 PH added PCREGREP_BUFSIZE +# 2011-08-22 PH added PCRE_SUPPORT_JIT +# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov +# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT +# 2011-10-04 Sheri added support for including coff data in windows shared libraries +# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in +# the source dir by the user prior to building +# 2011-10-04 Sheri changed various add_test's to use exes' location built instead +# of DEBUG location only (likely only matters in MSVC) +# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and +# RunGrepTest (used for UNIX and Msys) +# 2011-10-04 Sheri added scripts to provide needed variables and to execute +# RunTest.bat in Win32 (for effortless testing with "make test") +# 2011-10-04 Sheri Increased minimum required cmake version +# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c +# 2012-01-10 Zoltan Herczeg added libpcre16 support +# 2012-01-13 Stephen Kelly added out of source build support +# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out +# of the configure.ac file +# 2012-02-26 PH added support for libedit +# 2012-09-06 PH added support for PCRE_EBCDIC_NL25 +# 2012-09-08 ChPe added PCRE32 support +# 2012-10-23 PH added support for VALGRIND and GCOV +# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings +# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and +# so it has been removed. +# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".") +# 2013-11-05 PH added support for PARENS_NEST_LIMIT +# 2014-08-29 PH converted the file for PCRE2 (which has no C++). +# 2015-04-24 PH added support for PCRE2_DEBUG +# 2015-07-16 PH updated for new pcre2_find_bracket source module +# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III) +# 2015-10=16 PH added support for never-backslash-C +# 2016-03-01 PH applied Chris Wilson's patch for MSVC static +# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under +# a new option instead of being unconditional. +# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch +# fix by David Gaussmann +# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE +# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30 +# 2017-04-08 PH added HEAP_LIMIT +# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support +# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed) +# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC +# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h +# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied +# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below) +# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere) +# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace +# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror +# 2020-04-16 enh added check for __attribute__((uninitialized)) +# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and +# library versioning. +# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator +# 2020-04-28 PH added function check for memfd_create based on Carlo's patch +# 2020-05-25 PH added a check for Intel CET +# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel +# 2021-06-29 JWSB added the option to build static library with PIC. +# 2021-07-05 JWSB modified such both the static and shared library can be +# build in one go. +# 2021-08-28 PH increased minimum version +# 2021-08-28 PH added test for realpath() +# 2022-12-10 PH added support for pcre2posix_test +# 2023-01-15 Carlo added C99 as the minimum required +# 2023-08-06 PH added support for setting variable length lookbehind maximum + +# Increased minimum to 3.5 to workaround deprecated backward compatibility +# since 3.27. +cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +project(PCRE2 C) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED TRUE) + +set(CMAKE_C_VISIBILITY_PRESET hidden) +cmake_policy(SET CMP0063 NEW) + +# Set policy CMP0026 to avoid warnings for the use of LOCATION in +# GET_TARGET_PROPERTY. This should no longer be required. +# CMAKE_POLICY(SET CMP0026 OLD) + +# With a recent cmake, you can provide a rootdir to look for non +# standard installed library dependencies, but to do so, the policy +# needs to be set to new (by uncommenting the following) +# CMAKE_POLICY(SET CMP0074 NEW) + +# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH +# on the command line. +# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src) + +# external packages +FIND_PACKAGE( BZip2 ) +FIND_PACKAGE( ZLIB ) +FIND_PACKAGE( Readline ) +FIND_PACKAGE( Editline ) + +# Configuration checks + +INCLUDE(CheckCSourceCompiles) +INCLUDE(CheckFunctionExists) +INCLUDE(CheckSymbolExists) +INCLUDE(CheckIncludeFile) +INCLUDE(CheckTypeSize) +INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR + +CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) +CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H) +CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) +CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) +CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) + +CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) +CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) +CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) +CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) +CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) + +CHECK_C_SOURCE_COMPILES( + "#include + #include + int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }" + HAVE_REALPATH +) + +set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) +set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") + +CHECK_C_SOURCE_COMPILES( + "#include + int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }" + HAVE_BUILTIN_MUL_OVERFLOW +) + +CHECK_C_SOURCE_COMPILES( + "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }" + HAVE_ATTRIBUTE_UNINITIALIZED +) + +CHECK_C_SOURCE_COMPILES([=[ + extern __attribute__ ((visibility ("default"))) int f(void); + int main(void) { return f(); } + int f(void) { return 42; } + ]=] HAVE_VISIBILITY +) + +if (HAVE_VISIBILITY) + set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=]) +else() + set(PCRE2_EXPORT) +endif() + +set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) + +# Check whether Intel CET is enabled, and if so, adjust compiler flags. This +# code was written by PH, trying to imitate the logic from the autotools +# configuration. + +CHECK_C_SOURCE_COMPILES( + "#ifndef __CET__ + #error CET is not enabled + #endif + int main() { return 0; }" + INTEL_CET_ENABLED +) + +IF (INTEL_CET_ENABLED) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") +ENDIF(INTEL_CET_ENABLED) + +# User-configurable options +# +# Note: CMakeSetup displays these in alphabetical order, regardless of +# the order we use here. + +SET(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") + +OPTION(BUILD_STATIC_LIBS "Build static libraries." ON) + +OPTION(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) + +OPTION(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) + +OPTION(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) + +OPTION(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) + +OPTION(PCRE2_DEBUG "Include debugging code" OFF) + +OPTION(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) + +SET(PCRE2_EBCDIC OFF CACHE BOOL + "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)") + +SET(PCRE2_EBCDIC_NL25 OFF CACHE BOOL + "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") + +SET(PCRE2_LINK_SIZE "2" CACHE STRING + "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.") + +SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING + "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.") + +SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING + "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.") + +SET(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING + "Default limit on variable lookbehinds.") + +SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING + "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.") + +SET(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING + "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.") + +SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING + "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.") + +SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING + "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.") + +SET(PCRE2_NEWLINE "LF" CACHE STRING + "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") + +SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL + "Obsolete option: do not use") + +SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL + "Enable support for Just-in-time compiling.") + +IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL + "Enable SELinux compatible execmem allocator in JIT (experimental).") +ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE) +ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + +SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL + "Enable use of Just-in-time compiling in pcre2grep.") + +SET(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL + "Enable callout string support in pcre2grep.") + +SET(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL + "Enable callout string fork support in pcre2grep.") + +SET(PCRE2_SUPPORT_UNICODE ON CACHE BOOL + "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") + +SET(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL + "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") + +SET(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL + "If ON, backslash-C (upper case C) is locked out.") + +SET(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL + "Enable Valgrind support.") + +OPTION(PCRE2_SHOW_REPORT "Show the final configuration report" ON) +OPTION(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON) +OPTION(PCRE2_BUILD_TESTS "Build the tests" ON) + +IF (MINGW) + OPTION(NON_STANDARD_LIB_PREFIX + "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." + OFF) + + OPTION(NON_STANDARD_LIB_SUFFIX + "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." + OFF) +ENDIF(MINGW) + +IF(MSVC) + OPTION(PCRE2_STATIC_RUNTIME + "ON=Compile against the static runtime (/MT)." + OFF) + OPTION(INSTALL_MSVC_PDB + "ON=Install .pdb files built by MSVC, if generated" + OFF) +ENDIF(MSVC) + +# bzip2 lib +IF(BZIP2_FOUND) + OPTION (PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON) +ENDIF(BZIP2_FOUND) +IF(PCRE2_SUPPORT_LIBBZ2) + INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR}) +ENDIF(PCRE2_SUPPORT_LIBBZ2) + +# zlib +IF(ZLIB_FOUND) + OPTION (PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON) +ENDIF(ZLIB_FOUND) +IF(PCRE2_SUPPORT_LIBZ) + INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) +ENDIF(PCRE2_SUPPORT_LIBZ) + +# editline lib +IF(EDITLINE_FOUND) + OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) +ENDIF(EDITLINE_FOUND) +IF(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ELSE(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + MESSAGE(FATAL_ERROR + " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" + " or set Editline_ROOT to a full libedit installed tree, as needed\n" + " Might need to enable policy CMP0074 in CMakeLists.txt" + ) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ENDIF(EDITLINE_FOUND) + +# readline lib +IF(READLINE_FOUND) + OPTION (PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON) +ENDIF(READLINE_FOUND) +IF(PCRE2_SUPPORT_LIBREADLINE) + INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR}) +ENDIF(PCRE2_SUPPORT_LIBREADLINE) + +# Prepare build configuration + +IF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) + MESSAGE(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") +ENDIF(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) + +IF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) + MESSAGE(FATAL_ERROR "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled") +ENDIF(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) + +IF(PCRE2_BUILD_PCRE2_8) + SET(SUPPORT_PCRE2_8 1) +ENDIF(PCRE2_BUILD_PCRE2_8) + +IF(PCRE2_BUILD_PCRE2_16) + SET(SUPPORT_PCRE2_16 1) +ENDIF(PCRE2_BUILD_PCRE2_16) + +IF(PCRE2_BUILD_PCRE2_32) + SET(SUPPORT_PCRE2_32 1) +ENDIF(PCRE2_BUILD_PCRE2_32) + +IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) + MESSAGE(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program") + SET(PCRE2_BUILD_PCRE2GREP OFF) +ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) + +IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) + IF(READLINE_FOUND) + MESSAGE(FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + ENDIF(READLINE_FOUND) +ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) + +IF(PCRE2_SUPPORT_BSR_ANYCRLF) + SET(BSR_ANYCRLF 1) +ENDIF(PCRE2_SUPPORT_BSR_ANYCRLF) + +IF(PCRE2_NEVER_BACKSLASH_C) + SET(NEVER_BACKSLASH_C 1) +ENDIF(PCRE2_NEVER_BACKSLASH_C) + +IF(PCRE2_SUPPORT_UNICODE) + SET(SUPPORT_UNICODE 1) +ENDIF(PCRE2_SUPPORT_UNICODE) + +IF(PCRE2_SUPPORT_JIT) + SET(SUPPORT_JIT 1) + IF(UNIX) + FIND_PACKAGE(Threads REQUIRED) + IF(CMAKE_USE_PTHREADS_INIT) + SET(REQUIRE_PTHREAD 1) + ENDIF(CMAKE_USE_PTHREADS_INIT) + ENDIF(UNIX) +ENDIF(PCRE2_SUPPORT_JIT) + +IF(PCRE2_SUPPORT_JIT_SEALLOC) + SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED) + UNSET(CMAKE_REQUIRED_DEFINITIONS) + IF(${REQUIRED}) + IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + ADD_DEFINITIONS(-D_GNU_SOURCE) + SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) + ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + MESSAGE(FATAL_ERROR "Your configuration is not supported") + ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + ELSE(${REQUIRED}) + SET(PCRE2_SUPPORT_JIT_SEALLOC OFF) + ENDIF(${REQUIRED}) +ENDIF(PCRE2_SUPPORT_JIT_SEALLOC) + +IF(PCRE2GREP_SUPPORT_JIT) + SET(SUPPORT_PCRE2GREP_JIT 1) +ENDIF(PCRE2GREP_SUPPORT_JIT) + +IF(PCRE2GREP_SUPPORT_CALLOUT) + SET(SUPPORT_PCRE2GREP_CALLOUT 1) + IF(PCRE2GREP_SUPPORT_CALLOUT_FORK) + SET(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) + ENDIF(PCRE2GREP_SUPPORT_CALLOUT_FORK) +ENDIF(PCRE2GREP_SUPPORT_CALLOUT) + +IF(PCRE2_SUPPORT_VALGRIND) + SET(SUPPORT_VALGRIND 1) +ENDIF(PCRE2_SUPPORT_VALGRIND) + +IF(PCRE2_DISABLE_PERCENT_ZT) + SET(DISABLE_PERCENT_ZT 1) +ENDIF(PCRE2_DISABLE_PERCENT_ZT) + +# This next one used to reference ${READLINE_LIBRARY}) +# but I was advised to add the NCURSES test as well, along with +# some modifications to cmake/FindReadline.cmake which should +# make it possible to override the default if necessary. PH + +IF(PCRE2_SUPPORT_LIBREADLINE) + SET(SUPPORT_LIBREADLINE 1) + SET(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) +ENDIF(PCRE2_SUPPORT_LIBREADLINE) + +# libedit is a plug-compatible alternative to libreadline + +IF(PCRE2_SUPPORT_LIBEDIT) + SET(SUPPORT_LIBEDIT 1) + SET(PCRE2TEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY}) +ENDIF(PCRE2_SUPPORT_LIBEDIT) + +IF(PCRE2_SUPPORT_LIBZ) + SET(SUPPORT_LIBZ 1) + SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES}) +ENDIF(PCRE2_SUPPORT_LIBZ) + +IF(PCRE2_SUPPORT_LIBBZ2) + SET(SUPPORT_LIBBZ2 1) + SET(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES}) +ENDIF(PCRE2_SUPPORT_LIBBZ2) + +SET(NEWLINE_DEFAULT "") + +IF(PCRE2_NEWLINE STREQUAL "CR") + SET(NEWLINE_DEFAULT "1") +ENDIF(PCRE2_NEWLINE STREQUAL "CR") +IF(PCRE2_NEWLINE STREQUAL "LF") + SET(NEWLINE_DEFAULT "2") +ENDIF(PCRE2_NEWLINE STREQUAL "LF") +IF(PCRE2_NEWLINE STREQUAL "CRLF") + SET(NEWLINE_DEFAULT "3") +ENDIF(PCRE2_NEWLINE STREQUAL "CRLF") +IF(PCRE2_NEWLINE STREQUAL "ANY") + SET(NEWLINE_DEFAULT "4") +ENDIF(PCRE2_NEWLINE STREQUAL "ANY") +IF(PCRE2_NEWLINE STREQUAL "ANYCRLF") + SET(NEWLINE_DEFAULT "5") +ENDIF(PCRE2_NEWLINE STREQUAL "ANYCRLF") +IF(PCRE2_NEWLINE STREQUAL "NUL") + SET(NEWLINE_DEFAULT "6") +ENDIF(PCRE2_NEWLINE STREQUAL "NUL") + +IF(NEWLINE_DEFAULT STREQUAL "") + MESSAGE(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".") +ENDIF(NEWLINE_DEFAULT STREQUAL "") + +IF(PCRE2_EBCDIC) + SET(EBCDIC 1) +ENDIF(PCRE2_EBCDIC) + +IF(PCRE2_EBCDIC_NL25) + SET(EBCDIC 1) + SET(EBCDIC_NL25 1) +ENDIF(PCRE2_EBCDIC_NL25) + +# Output files + +CONFIGURE_FILE(config-cmake.h.in + ${PROJECT_BINARY_DIR}/config.h + @ONLY) + +# Parse version numbers and date out of configure.ac + +file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac + configure_lines + LIMIT_COUNT 50 # Read only the first 50 lines of the file +) + +set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date" + "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version") +foreach(configure_line ${configure_lines}) + foreach(_substitution_variable ${SEARCHED_VARIABLES}) + string(TOUPPER ${_substitution_variable} _substitution_variable_upper) + if (NOT ${_substitution_variable_upper}) + string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) + if (CMAKE_MATCH_1) + set(${_substitution_variable_upper} ${CMAKE_MATCH_1}) + endif() + endif() + endforeach() +endforeach() + +macro(PARSE_LIB_VERSION VARIABLE_PREFIX) + string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION}) + list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT) + list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION) + list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE) + + math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}") + math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") + math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1") + set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}") + set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}") +endmacro() + +PARSE_LIB_VERSION(LIBPCRE2_POSIX) +PARSE_LIB_VERSION(LIBPCRE2_8) +PARSE_LIB_VERSION(LIBPCRE2_16) +PARSE_LIB_VERSION(LIBPCRE2_32) + +CONFIGURE_FILE(src/pcre2.h.in + ${PROJECT_BINARY_DIR}/pcre2.h + @ONLY) + +# Make sure to not link debug libs +# against release libs and vice versa +IF(WIN32) + SET(CMAKE_DEBUG_POSTFIX "d") +ENDIF(WIN32) + +# Character table generation + +OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) +IF(PCRE2_REBUILD_CHARTABLES) + ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c) + ADD_CUSTOM_COMMAND( + COMMENT "Generating character tables (pcre2_chartables.c) for current locale" + DEPENDS pcre2_dftables + COMMAND pcre2_dftables + ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c + OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c + ) +ELSE(PCRE2_REBUILD_CHARTABLES) + CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist + ${PROJECT_BINARY_DIR}/pcre2_chartables.c + COPYONLY) +ENDIF(PCRE2_REBUILD_CHARTABLES) + +# Source code + +SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) + +SET(PCRE2_SOURCES + src/pcre2_auto_possess.c + ${PROJECT_BINARY_DIR}/pcre2_chartables.c + src/pcre2_chkdint.c + src/pcre2_compile.c + src/pcre2_config.c + src/pcre2_context.c + src/pcre2_convert.c + src/pcre2_dfa_match.c + src/pcre2_error.c + src/pcre2_extuni.c + src/pcre2_find_bracket.c + src/pcre2_jit_compile.c + src/pcre2_maketables.c + src/pcre2_match.c + src/pcre2_match_data.c + src/pcre2_newline.c + src/pcre2_ord2utf.c + src/pcre2_pattern_info.c + src/pcre2_script_run.c + src/pcre2_serialize.c + src/pcre2_string_utils.c + src/pcre2_study.c + src/pcre2_substitute.c + src/pcre2_substring.c + src/pcre2_tables.c + src/pcre2_ucd.c + src/pcre2_valid_utf.c + src/pcre2_xclass.c +) + +SET(PCRE2POSIX_HEADERS src/pcre2posix.h) +SET(PCRE2POSIX_SOURCES src/pcre2posix.c) + +IF(MINGW AND BUILD_SHARED_LIBS) + IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o + PRE-LINK + COMMAND windres ARGS pcre2.rc pcre2.o + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT Using pcre2 coff info in mingw build) + SET(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) + ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + + IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o + PRE-LINK + COMMAND windres ARGS pcre2posix.rc pcre2posix.o + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT Using pcre2posix coff info in mingw build) + SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) + ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) +ENDIF(MINGW AND BUILD_SHARED_LIBS) + +IF(MSVC AND BUILD_SHARED_LIBS) + SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files}) + SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files}) + IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) + ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + + IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + SET(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) + ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) +ENDIF(MSVC AND BUILD_SHARED_LIBS) + +# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681 +# This code was taken from the CMake wiki, not from WebM. + +IF(MSVC AND PCRE2_STATIC_RUNTIME) + MESSAGE(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") + foreach(flag_var + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endforeach() +ENDIF(MSVC AND PCRE2_STATIC_RUNTIME) + +# Build setup + +ADD_DEFINITIONS(-DHAVE_CONFIG_H) + +IF(MSVC) + ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS) +ENDIF(MSVC) + +SET(CMAKE_INCLUDE_CURRENT_DIR 1) + +set(targets) + +# 8-bit library + +IF(PCRE2_BUILD_PCRE2_8) + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION}) + TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC) + TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + SET(targets ${targets} pcre2-8-static) + ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION}) + TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static) + TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) + set(targets ${targets} pcre2-posix-static) + + IF(MSVC) + SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) + SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) + ELSE(MSVC) + SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) + SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) + ENDIF(MSVC) + IF(PCRE2_STATIC_PIC) + SET_TARGET_PROPERTIES(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + ENDIF(PCRE2_STATIC_PIC) + ENDIF(BUILD_STATIC_LIBS) + + IF(BUILD_SHARED_LIBS) + ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) + SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + OUTPUT_NAME pcre2-8) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + set(targets ${targets} pcre2-8-shared) + + ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) + SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + OUTPUT_NAME pcre2-posix) + set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED") + TARGET_COMPILE_DEFINITIONS(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) + TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared) + SET(targets ${targets} pcre2-posix-shared) + SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files}) + SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files}) + + IF(MINGW) + IF(NON_STANDARD_LIB_PREFIX) + SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") + ENDIF(NON_STANDARD_LIB_PREFIX) + IF(NON_STANDARD_LIB_SUFFIX) + SET_TARGET_PROPERTIES(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") + ENDIF(NON_STANDARD_LIB_SUFFIX) + ENDIF(MINGW) + ENDIF(BUILD_SHARED_LIBS) + + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-static) + ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-static) + ELSE(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-8 ALIAS pcre2-8-shared) + ADD_LIBRARY(pcre2-posix ALIAS pcre2-posix-shared) + ENDIF(BUILD_STATIC_LIBS) +ENDIF(PCRE2_BUILD_PCRE2_8) + +# 16-bit library + +IF(PCRE2_BUILD_PCRE2_16) + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) + SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION}) + TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + set(targets ${targets} pcre2-16-static) + + IF(MSVC) + SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) + ELSE(MSVC) + SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) + ENDIF(MSVC) + IF(PCRE2_STATIC_PIC) + SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + ENDIF(PCRE2_STATIC_PIC) + ENDIF(BUILD_STATIC_LIBS) + + IF(BUILD_SHARED_LIBS) + ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) + SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + OUTPUT_NAME pcre2-16) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + set(targets ${targets} pcre2-16-shared) + SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files}) + SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files}) + + IF(MINGW) + IF(NON_STANDARD_LIB_PREFIX) + SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES PREFIX "") + ENDIF(NON_STANDARD_LIB_PREFIX) + IF(NON_STANDARD_LIB_SUFFIX) + SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") + ENDIF(NON_STANDARD_LIB_SUFFIX) + ENDIF(MINGW) + ENDIF(BUILD_SHARED_LIBS) + + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-static) + ELSE(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-16 ALIAS pcre2-16-shared) + ENDIF(BUILD_STATIC_LIBS) +ENDIF(PCRE2_BUILD_PCRE2_16) + +# 32-bit library + +IF(PCRE2_BUILD_PCRE2_32) + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) + SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION}) + TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + set(targets ${targets} pcre2-32-static) + + IF(MSVC) + SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) + ELSE(MSVC) + SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) + ENDIF(MSVC) + IF(PCRE2_STATIC_PIC) + SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + ENDIF(PCRE2_STATIC_PIC) + ENDIF(BUILD_STATIC_LIBS) + + IF(BUILD_SHARED_LIBS) + ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) + SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + OUTPUT_NAME pcre2-32) + IF(REQUIRE_PTHREAD) + TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads) + ENDIF(REQUIRE_PTHREAD) + set(targets ${targets} pcre2-32-shared) + SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files}) + SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files}) + + IF(MINGW) + IF(NON_STANDARD_LIB_PREFIX) + SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES PREFIX "") + ENDIF(NON_STANDARD_LIB_PREFIX) + IF(NON_STANDARD_LIB_SUFFIX) + SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") + ENDIF(NON_STANDARD_LIB_SUFFIX) + ENDIF(MINGW) + ENDIF(BUILD_SHARED_LIBS) + + IF(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-static) + ELSE(BUILD_STATIC_LIBS) + ADD_LIBRARY(pcre2-32 ALIAS pcre2-32-shared) + ENDIF(BUILD_STATIC_LIBS) +ENDIF(PCRE2_BUILD_PCRE2_32) + +# Generate pkg-config files + +set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}") +set(prefix ${CMAKE_INSTALL_PREFIX}) + +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") +set(includedir "\${prefix}/include") +if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug)) + set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX}) +endif() + +if(PCRE2_BUILD_PCRE2_8) + configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc") + configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc") + set(enable_pcre2_8 "yes") +else() + set(enable_pcre2_8 "no") +endif() + +if(PCRE2_BUILD_PCRE2_16) + configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc") + set(enable_pcre2_16 "yes") +else() + set(enable_pcre2_16 "no") +endif() + +if(PCRE2_BUILD_PCRE2_32) + configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc") + set(enable_pcre2_32 "yes") +else() + set(enable_pcre2_32 "no") +endif() + +configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF) + +# Executables + +IF(PCRE2_BUILD_PCRE2GREP) + ADD_EXECUTABLE(pcre2grep src/pcre2grep.c) + SET_PROPERTY(TARGET pcre2grep + PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + set(targets ${targets} pcre2grep) + TARGET_LINK_LIBRARIES(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) +ENDIF(PCRE2_BUILD_PCRE2GREP) + +# Testing + +IF(PCRE2_BUILD_TESTS) + ENABLE_TESTING() + + SET(PCRE2TEST_SOURCES src/pcre2test.c) + + IF(MSVC) + # This is needed to avoid a stack overflow error in the standard tests. The + # flag should be indicated with a forward-slash instead of a hyphen, but + # then CMake treats it as a file path. + SET(PCRE2TEST_LINKER_FLAGS -STACK:2500000) + ENDIF(MSVC) + + ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES}) + set(targets ${targets} pcre2test) + IF(PCRE2_BUILD_PCRE2_8) + LIST(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) + ENDIF(PCRE2_BUILD_PCRE2_8) + IF(PCRE2_BUILD_PCRE2_16) + LIST(APPEND PCRE2TEST_LIBS pcre2-16) + ENDIF(PCRE2_BUILD_PCRE2_16) + IF(PCRE2_BUILD_PCRE2_32) + LIST(APPEND PCRE2TEST_LIBS pcre2-32) + ENDIF(PCRE2_BUILD_PCRE2_32) + TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS}) + + IF(PCRE2_BUILD_PCRE2_8) + ADD_EXECUTABLE(pcre2posix_test src/pcre2posix_test.c) + TARGET_LINK_LIBRARIES(pcre2posix_test pcre2-posix pcre2-8) + ENDIF(PCRE2_BUILD_PCRE2_8) + + IF(PCRE2_SUPPORT_JIT) + ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c) + SET(PCRE2_JIT_TEST_LIBS) + IF(PCRE2_BUILD_PCRE2_8) + LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8) + ENDIF(PCRE2_BUILD_PCRE2_8) + IF(PCRE2_BUILD_PCRE2_16) + LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-16) + ENDIF(PCRE2_BUILD_PCRE2_16) + IF(PCRE2_BUILD_PCRE2_32) + LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-32) + ENDIF(PCRE2_BUILD_PCRE2_32) + TARGET_LINK_LIBRARIES(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS}) + ENDIF(PCRE2_SUPPORT_JIT) + + # exes in Debug location tested by the RunTest and RunGrepTest shell scripts + # via "make test" + + # The commented out code below provokes a warning about future removal + # of the facility, and requires policy CMP0026 to be set to "OLD". I have + # got fed-up with the warnings, but my plea for help on the mailing list + # produced no response. So, I've hacked. The new code below seems to work on + # Linux. + +# IF(PCRE2_BUILD_PCRE2GREP) +# GET_TARGET_PROPERTY(PCRE2GREP_EXE pcre2grep DEBUG_LOCATION) +# ENDIF(PCRE2_BUILD_PCRE2GREP) +# +# GET_TARGET_PROPERTY(PCRE2TEST_EXE pcre2test DEBUG_LOCATION) + + IF(PCRE2_BUILD_PCRE2GREP) + SET(PCRE2GREP_EXE $) + ENDIF(PCRE2_BUILD_PCRE2GREP) + + SET(PCRE2TEST_EXE $) + + +# ================================================= + # Write out a CTest configuration file + # + FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest + "# This is a generated file. +MESSAGE(\"When testing is complete, review test output in the +\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\") +MESSAGE(\" \") +") + + FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +. ${PROJECT_SOURCE_DIR}/RunTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +") + + IF(UNIX) + ADD_TEST(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh) + ENDIF(UNIX) + + IF(PCRE2_BUILD_PCRE2GREP) + FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +. ${PROJECT_SOURCE_DIR}/RunGrepTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +") + + IF(UNIX) + ADD_TEST(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + ENDIF(UNIX) + ENDIF(PCRE2_BUILD_PCRE2GREP) + + IF(WIN32) + # Provide environment for executing the bat file version of RunTest + FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc) + FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin) + FILE(TO_NATIVE_PATH ${PCRE2TEST_EXE} winexe) + + FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +# The next line was replaced by the following one after a user comment. +# SET pcre2test=\"${winexe}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +call %srcdir%\\RunTest.Bat +if errorlevel 1 exit /b 1 +echo RunTest.bat tests successfully completed +") + + ADD_TEST(NAME pcre2_test_bat + COMMAND pcre2_test.bat) + SET_TESTS_PROPERTIES(pcre2_test_bat PROPERTIES + PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed") + + IF("$ENV{OSTYPE}" STREQUAL "msys") + # Both the sh and bat file versions of RunTest are run if make test is used + # in msys + ADD_TEST(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh) + IF(PCRE2_BUILD_PCRE2GREP) + ADD_TEST(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + ENDIF(PCRE2_BUILD_PCRE2GREP) + ENDIF("$ENV{OSTYPE}" STREQUAL "msys") + ENDIF(WIN32) + + # Changed to accommodate testing whichever location was just built + + IF(PCRE2_SUPPORT_JIT) + ADD_TEST(pcre2_jit_test pcre2_jit_test) + ENDIF(PCRE2_SUPPORT_JIT) + + IF(PCRE2_BUILD_PCRE2_8) + ADD_TEST(pcre2posix_test pcre2posix_test) + ENDIF(PCRE2_BUILD_PCRE2_8) + +ENDIF(PCRE2_BUILD_TESTS) + +# Installation + +SET(CMAKE_INSTALL_ALWAYS 1) + +INSTALL(TARGETS ${targets} + RUNTIME DESTINATION bin + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" + DESTINATION bin + # Set 0755 permissions + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + +INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) + +# CMake config files. +set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) +set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake) +configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY) +set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) +set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake) +configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY) +install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION cmake) + +FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html) +FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) +FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) + +INSTALL(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) +INSTALL(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) +INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html) + +IF(MSVC AND INSTALL_MSVC_PDB) + INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo) + INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug) +ENDIF(MSVC AND INSTALL_MSVC_PDB) + +# Help, only for nice output +IF(BUILD_STATIC_LIBS) + SET(BUILD_STATIC_LIBS ON) +ELSE(BUILD_STATIC_LIBS) + SET(BUILD_STATIC_LIBS OFF) +ENDIF(BUILD_STATIC_LIBS) + +IF(PCRE2_HEAP_MATCH_RECURSE) + MESSAGE(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") +ENDIF(PCRE2_HEAP_MATCH_RECURSE) + +IF(PCRE2_SHOW_REPORT) + STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) + IF (CMAKE_C_FLAGS) + SET(cfsp " ") + ENDIF(CMAKE_C_FLAGS) + MESSAGE(STATUS "") + MESSAGE(STATUS "") + MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") + MESSAGE(STATUS "") + MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}") + MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}") + MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}") + MESSAGE(STATUS "") + MESSAGE(STATUS " Build 8 bit PCRE2 library ....... : ${PCRE2_BUILD_PCRE2_8}") + MESSAGE(STATUS " Build 16 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_16}") + MESSAGE(STATUS " Build 32 bit PCRE2 library ...... : ${PCRE2_BUILD_PCRE2_32}") + MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE2_SUPPORT_JIT}") + MESSAGE(STATUS " Use SELinux allocator in JIT .... : ${PCRE2_SUPPORT_JIT_SEALLOC}") + MESSAGE(STATUS " Enable Unicode support .......... : ${PCRE2_SUPPORT_UNICODE}") + MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE2_NEWLINE}") + MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") + MESSAGE(STATUS " \\C is disabled .................. : ${PCRE2_NEVER_BACKSLASH_C}") + MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE2_EBCDIC}") + MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}") + MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}") + MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}") + MESSAGE(STATUS " Maximum variable lookbehind ..... : ${PCRE2_MAX_VARLOOKBEHIND}") + MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}") + MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}") + MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}") + MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}") + MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") + MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") + MESSAGE(STATUS " with PIC enabled ............. : ${PCRE2_STATIC_PIC}") + MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") + MESSAGE(STATUS " Enable JIT in pcre2grep ......... : ${PCRE2GREP_SUPPORT_JIT}") + MESSAGE(STATUS " Enable callouts in pcre2grep .... : ${PCRE2GREP_SUPPORT_CALLOUT}") + MESSAGE(STATUS " Enable callout fork in pcre2grep. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") + MESSAGE(STATUS " Buffer size for pcre2grep ....... : ${PCRE2GREP_BUFSIZE}") + MESSAGE(STATUS " Build tests (implies pcre2test .. : ${PCRE2_BUILD_TESTS}") + MESSAGE(STATUS " and pcre2grep)") + IF(ZLIB_FOUND) + MESSAGE(STATUS " Link pcre2grep with libz ........ : ${PCRE2_SUPPORT_LIBZ}") + ELSE(ZLIB_FOUND) + MESSAGE(STATUS " Link pcre2grep with libz ........ : Library not found" ) + ENDIF(ZLIB_FOUND) + IF(BZIP2_FOUND) + MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : ${PCRE2_SUPPORT_LIBBZ2}") + ELSE(BZIP2_FOUND) + MESSAGE(STATUS " Link pcre2grep with libbz2 ...... : Library not found" ) + ENDIF(BZIP2_FOUND) + IF(EDITLINE_FOUND) + MESSAGE(STATUS " Link pcre2test with libeditline . : ${PCRE2_SUPPORT_LIBEDIT}") + ELSE(EDITLINE_FOUND) + MESSAGE(STATUS " Link pcre2test with libeditline . : Library not found" ) + ENDIF(EDITLINE_FOUND) + IF(READLINE_FOUND) + MESSAGE(STATUS " Link pcre2test with libreadline . : ${PCRE2_SUPPORT_LIBREADLINE}") + ELSE(READLINE_FOUND) + MESSAGE(STATUS " Link pcre2test with libreadline . : Library not found" ) + ENDIF(READLINE_FOUND) + MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}") + IF(PCRE2_DISABLE_PERCENT_ZT) + MESSAGE(STATUS " Use %zu and %td ..................: OFF" ) + ELSE(PCRE2_DISABLE_PERCENT_ZT) + MESSAGE(STATUS " Use %zu and %td ..................: AUTO" ) + ENDIF(PCRE2_DISABLE_PERCENT_ZT) + + IF(MINGW AND BUILD_SHARED_LIBS) + MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") + MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}") + ENDIF(MINGW AND BUILD_SHARED_LIBS) + + IF(MSVC) + MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}") + ENDIF(MSVC) + + MESSAGE(STATUS "") +ENDIF(PCRE2_SHOW_REPORT) + +# end CMakeLists.txt diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..c233950 --- /dev/null +++ b/COPYING @@ -0,0 +1,5 @@ +PCRE2 LICENCE + +Please see the file LICENCE in the PCRE2 distribution for licensing details. + +End diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..ea228c1 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,3097 @@ +Change Log for PCRE2 +-------------------- + +Before the move to GitHub, this was the only record of changes to PCRE2. Now +there is also the log of commit messages. + +Version 10.44 07-June-2024 +-------------------------- + +1. If a pattern contained a variable-length lookbehind in which the first +branch was not the one with the shortest minimum length, and the lookbehind +contained a capturing group, and elsewhere in the pattern there was another +lookbehind that referenced that group, the pattern was incorrectly compiled, +leading to unpredictable results, including crashes in JIT compiling. An +example pattern is: /(((?<=123?456456|ABC)))(?<=\2)/ + +2. Further updates to the oss-fuzz support: + + (a) Limit quantifiers for groups and classes to be no more than 10. This + avoids very long JIT compile times that happen in some cases when groups + are replicated for quantification, and very long match times when + classes contain a lot of non-ascii characters. + + (b) Added PCRE2_EXTENDED_MORE to the list of allowed options. + + (c) Arranged for text error messages to be shown in 16-bit and 32-bit modes. + + (d) Made the output in standalone mode more readable. + + (e) General code tidies. + + (f) Limit the size of compiled patterns to 10MB (see 6 below). + + (g) Do not run JIT on patterns whose compiled length is greater than 200K + bytes because this takes a long time, causing oss-fuzz to time out. + + (h) Avoid compiling or matching twice with the same options (this could + happen if the input didn't set any options). + +3. Increase the maximum length of a name for a group from 32 to 128 because +there is a user for whom 32 is too small. + +4. Cause pcre2test to output a message when pcre2_jit_compile() gives an error +return if either jitverify or info is specified. + +5. Some auxiliary files for building under OpenVMS that were contributed by +Alexey Chupahin have been installed. + +6. Added pcre2_set_max_pattern_compiled_length() to limit the size of compiled +patterns. + +7. There was a bug in the implementation of \X caused by my (PH) misreading or +misunderstanding one of the grapheme sequence breaking rules in Unicode Annex +#29. A break should occur between two characters with the Extended Pictographic +break property unless a zero-width joiner intervenes. PCRE2 was not insisting +on the ZWJ, causing \X to match more than it should. See GitHub issue #410. + +8. Avoid compilation issues with proprietary compilers in UNIX since 10.43. + + +Version 10.43 16-February-2024 +------------------------------ + +1. The test program added by change 2 of 10.42 didn't work when the default +newline setting didn't include \n as a newline. One test needed (*LF) to ensure +that it worked. + +2. Added the new freestanding POSIX test program to the ManyConfigTests script +in the maint directory (overlooked in 2 below). Also improved the selection +facilities in that script, and added a test with JIT in a non-source directory, +fixing an oversight that would have made such a test fail before. + +3. Added pcre2_get_match_data_heapframes_size() and related pcre2test flags +to allow for finer control of the heap used when pcre2_match() without JIT is +used and the match_data might be reused. This began as PR #191, but has had +further refinement and documentation edits. + +4. Applied PR #181, which tidies some casts in pcre2_valid_utf.c. + +5. Applied PR #184, which avoids overflow issues with the heap limit +(introduced in 10.41/9). + +6. Applied PR #192, which changes the timing units for pcre2test from +milliseconds to microseconds. This is more useful for modern CPUs. + +7. Applied PR #193, which makes the requirement for C99 explicit in +configure.ac and CMakeLists.txt. + +8. Fixed a bug in pcre2test when a ridiculously large string repeat required a +stupid amount of memory. It now gives a clean realloc() failure error. + +9. Updates to restrict the interaction between ASCII and non-ASCII characters +for caseless matching and items like \d: + + (a) Added PCRE2_EXTRA_CASELESS_RESTRICT to lock out mixing of ASCII and + non-ASCII when matching caselessly. This is also /r in pcre2test and + (?r) within patterns. + + (b) Added PCRE2_EXTRA_ASCII_{BSD,BSS,BSW,POSIX} and corresponding (?aD) etc + in patterns and /a in pcre2test. + + (c) Corresponding updates to pcre2test. + +10. Unicode has been updated to 15.0.0. + +11. The Python scripts and ucptest.c in maint have been updated (a) a minor +change needed for 9(a) above; (b) fix bugs in ucptest, + +12. Integer overflow testing is now centralized in a new function. + +13. Made PCRE2_UCP the default in UTF mode in pcre2grep, and added new options +--case-restrict and --no-ucp. + +14. In the debugging printint module (which is normally only linked into +pcre2test), avoid the use of a variable called "not" because that's deprecated +in C and forbidden in C++. Also rewrite some code to avoid a goto into a block +that bypassed its initialization (though it didn't actually matter). + +15. More minor code adjustments to avoid using reserved C++ words as variable +names ("new" and "typename") and another jump that bypassed an (irrelevant) +initialization. + +16. Merged a pull request that removed pcre2_ucptables.c from the list of files +to compile in NON-AUTOTOOLS-BUILD because it is #included in pcre2_tables.c. +Also adjusted the BUILD.bazel and build.zig files, which had the same issue. At +the same time, fixed a typo in the Bazel file. + +17. Add PCRE2_EXTRA_ASCII_DIGIT to allow [:digit:] to be kept on sync with \d +even in UCP mode. + +18. Fix an invalid match of ascii word classes when invalid utf is enabled. + +19. Add a --posix-digit to pcre2grep for compatibility with GNU grep, and +other tools that prefer the POSIX compatible unicode definition for \d. + +20. Report the bit width of the library in use by pcre2test for usability. + +21. A pathological pattern conversion test could result in a string longer than +the available input buffer. Cause such a test to fail. + +22. Add a check that forces a compiler error if PCRE2_CODE_UNIT_WIDTH is not 8, +16, or 32 when compiling any of the library modules. + +23. Update pcre2_compile() to treat a NULL pattern with zero length as an empty +string. + +24. Add support for limited-length variable-length lookbehind assertions, with +default maximum length 255 characters (same as Perl) but with a function to +adjust the limit. + +25. Applied pull request #262, which updates the zig configuration, and #278 +which fixes a bug with out-of-source-tree CMake build testing. + +26. Add support for LoongArch to JIT. + +27. Fixed a bug in pcre2_match() in the code for handling the vector of +backtracking frames on the heap, which caused a heap overflow if *LIMIT_HEAP +restricted an attempt to extend to less than the frame size. Generally tidy up +the code for extending the heap frames vector. This fixes GitHub issue #275. + +28. Update pcre2_fuzzsupport.c to avoid clang sanitize complaint about shifting +left by 16 when there are non-zeros in the top 16 bits. + +29. Perl 5.34.0 changed the meaning of (for example) {,3} which did not used to +be treated as a quantifier. Now it is interpreted as {0,3} and PCRE2 has +changed to match. Note that {,} is still not a quantifier. + +30. Perl allows spaces and/or horizontal tabs after { or before } in all items +that use braces, and also before or after the comma in quantifiers. PCRE2 now +does the same, except for \u{...}, which is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This an ECMAScript, non-Perl compatible, +extension, so PCRE2 follows ECMAScript rather than Perl. + +31. Applied pull request #300 by Carlo, which fixes #261. The bug was that +pcre2_match() was not fully resetting all captures that had been set within a +(possibly recursive) subroutine call such as (?3). + +32. Changed the meaning of \w (and its synonyms) in UCP mode to match Perl. It +now matches characters whose general categories are L or N or whose particular +categories are Mn (non-spacing mark) or Pc (combining punctuation). The latter +includes underscore. + +33. Changed the meaning of [:xdigit:] in UCP mode to match Perl. It now also +matches the "fullwidth" versions of the hex digits. Just like it is done for +[:digit:], PCRE2_EXTRA_ASCII_DIGIT can be used to keep this class ASCII only +without affecting other POSIX classes. + +34. GitHub PR305 fixes a potential integer overflow in pcre2_dfa_match(). + +35. Updated handling of \b and \B in UCP mode to match the changes to \w in 32 +above because \b and \B are defined in terms of \w. + +36. Within a pattern (?aT) and (?-aT) set and reset the PCRE2_EXTRA_ASCII_DIGIT +option, and (?aP) also sets (?aT) so that (?-aP) disables all ASCII +restrictions on POSIX classes. + +37. If PCRE2_FIRSTLINE was set on an anchored pattern, pcre2_match() and +pcre2_dfa_match() misbehaved. PCRE2_FIRSTLINE is now ignored for anchored +patterns. + +38. Add a test for ridiculous ovector offset values to the substring extraction +functions. + +39. Make OP_REVERSE use IMM2_SIZE for its data instead of LINK_SIZE, for +consistency with OP_VREVERSE. + +40. In some legacy environments with a pre C99 snprintf, pcre2_regerror could +return an incorrect value when the provided buffer was too small. + +41. Applied pull request #342 which adds sanity checks for ctype functions and +locks out any accidental sign-extension. + +42. In the 32-bit library, in non-UTF mode, a quantifier that followed a +literal character with a value greater than or equal to 0x80000000u caused +undefined behaviour. + +43. \z was misbehaving when matching fragments inside invalid UTF strings. + +44. Implement --group-separator and --no-group-separator for pcre2grep. + +45. Fix \X matching in 32 bit mode without UTF in JIT. + +46. Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set in JIT. + +47. Refactor the handling of whole-pattern recursion (?0) in pcre2_match() so +that its end is handled similarly to other recursions. This has altered the +behaviour of /|(?0)./endanchored which was previously not right. + +48. Improved the test for looping recursion by checking the last referenced +character as well as the current character. This allows some patterns that +previously triggered the check to run to completion instead of giving the loop +error. + +49. In 32-bit mode, the compiler looped for the pattern /[\x{ffffffff}]/ when +PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying +to look for other cases for characters above the Unicode range. + +50. In caseless 32-bit mode with UCP (but not UTF) set, the character +0xffffffff incorrectly matched any character that has more than one other case, +in particular k and s. + +51. Fix accept and endanchored interaction in JIT. + +52. Fix backreferences with unset backref and non-greedy iterators in JIT. + +53. Improve the logic that checks for a list of starting code units -- positive +lookahead assertions are now ignored if the immediately following item is one +that sets a mandatory starting character. For example, /a?(?=bc|)d/ used to set +all of a, b, and d as possible starting code units; now it sets only a and d. + +54. Fix incorrect class character matches in JIT. + +55. In pcre2test, ensure pcre2_jit_match() is used when jitfast is used with +substitution testing. + +56. Insert omitted setting of subject length in match data at the end of +pcre2_jit_match(). + +57. Implemented PCRE2_DISABLE_RECURSELOOP_CHECK for pcre2_match() to enable +some apparently looping recursions to run to completion and therefore match the +JIT behaviour. With this set, real loops will eventually get caught by match or +heap limits or run out of resource. + +58. AC did a lot of work on pcre2_fuzzsupport.c to extend it to 16-bit and +32-bit libraries and to compare JIT and non-JIT matching. + + +Version 10.42 11-December-2022 +------------------------------ + +1. Change 19 of 10.41 wasn't quite right; it put the definition of a default, +empty value for PCRE2_CALL_CONVENTION in src/pcre2posix.c instead of +src/pcre2posix.h, which meant that programs that included pcre2posix.h but not +pcre2.h failed to compile. + +2. To catch similar issues to the above in future, a new small test program +that includes pcre2posix.h but not pcre2.h has been added to the test suite. + +3. When the -S option of pcre2test was used to set a stack size greater than +the allowed maximum, the error message displayed the hard limit incorrectly. +This was pointed out on GitHub pull request #171, but the suggested patch +didn't cope with all cases. Some further modification was required. + +4. Supplying an ovector count of more than 65535 to pcre2_match_data_create() +caused a crash because the field in the match data block is only 16 bits. A +maximum of 65535 is now silently applied. + +5. Merged @carenas patch #175 which fixes #86 - segfault on aarch64 (ARM), + +6. The prototype for pcre2_substring_list_free() specified its argument as +PCRE2_SPTR * which is a const data type, whereas the yield from +pcre2_substring_list() is not const. This caused compiler warnings. I have +changed the argument of pcre2_substring_list_free() to be PCRE2_UCHAR ** to +remove this anomaly. This might cause new warnings in existing code where a +cast has been used to avoid previous ones. + + +Version 10.41 06-December-2022 +------------------------------ + +1. Add fflush() before and after a fork callout in pcre2grep to get its output +to be the same on all systems. (There were previously ordering differences in +Alpine Linux). + +2. Merged patch from @carenas (GitHub #110) for pthreads support in CMake. + +3. SSF scorecards grumbled about possible overflow in an expression in +pcre2test. It never would have overflowed in practice, but some casts have been +added and at the some time there's been some tidying of fprints that output +size_t values. + +4. PR #94 showed up an unused enum in pcre2_convert.c, which is now removed. + +5. Minor code re-arrangement to remove gcc warning about realloc() in +pcre2test. + +6. Change a number of int variables that hold buffer and line lengths in +pcre2grep to PCRE2_SIZE (aka size_t). + +7. Added an #ifdef to cut out a call to PRIV(jit_free) when JIT is not +supported (even though that function would do nothing in that case) at the +request of a user who doesn't even want to link with pcre_jit_compile.o. Also +tidied up an untidy #ifdef arrangement in pcre2test. + +8. Fixed an issue in the backtracking optimization of character repeats in +JIT. Furthermore optimize star repetitions, not just plus repetitions. + +9. Removed the use of an initial backtracking frames vector on the system stack +in pcre2_match() so that it now always uses the heap. (In a multi-thread +environment with very small stacks there had been an issue.) This also is +tidier for JIT matching, which didn't need that vector. The heap vector is now +remembered in the match data block and re-used if that block itself is re-used. +It is freed with the match data block. + +10. Adjusted the find_limits code in pcre2test to work with change 9 above. + +11. Added find_limits_noheap to pcre2test, because the heap limits are now +different in different environments and so cannot be included in the standard +tests. + +12. Created a test for pcre2_match() heap processing that is not part of the +tests run by 'make check', but can be run manually. The current output is from +a 64-bit system. + +13. Implemented -Z aka --null in pcre2grep. + +14. A minor change to pcre2test and the addition of several new pcre2grep tests +have improved LCOV coverage statistics. At the same time, code in pcre2grep and +elsewhere that can never be obeyed in normal testing has been excluded from +coverage. + +15. Fixed a bug in pcre2grep that could cause an extra newline to be written +after output generated by --output. + +16. If a file has a .bz2 extension but is not in fact compressed, pcre2grep +should process it as a plain text file. A bug stopped this happening; now fixed +and added to the tests. + +17. When pcre2grep was running not in UTF mode, if a string specified by +--output or obtained from a callout in a pattern contained a character (byte) +greater than 127, it was incorrectly output in UTF-8 format. + +18. Added some casts after warnings from Clang sanitize. + +19. Merged patch from cbouc (GitHub #139): 4 function prototypes were missing +PCRE2_CALL_CONVENTION in src/pcre2posix.h. All function prototypes returning +pointers had out of place PCRE2_CALL_CONVENTION in src/pcre2.h.*. These +produced errors when building for Windows with #define PCRE2_CALL_CONVENTION +__stdcall. + +20. A negative repeat value in a pcre2test subject line was not being +diagnosed, leading to infinite looping. + +21. Updated RunGrepTest to discard the warning that Bash now gives when setting +LC_CTYPE to a bad value (because older versions didn't). + +22. Updated pcre2grep so that it behaves like GNU grep when matching more than +one pattern and a later pattern matches at an earlier point in the subject when +the matched substrings are being identified by colour or by offsets. + +23. Updated the PrepareRelease script so that the man page that it makes for +the pcre2demo demonstration program is more standard and does not cause errors +when processed by lexgrog or mandb -c (GitHub issue #160). + +24. The JIT compiler was updated. + + +Version 10.40 15-April-2022 +--------------------------- + +1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect +handling of multiple passes. + +2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue +in pcre2grep with buffered fseek(stdin). + +3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is +not supported. + +4. Revert an unintended change in JIT repeat detection. + +5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd. + +6. Merged documentation and comments patches from @carenas (GitHub #47). + +7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code +from pcre2grep. + +8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46. + +9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and +substituting. + +10. Add null_subject and null_replacement modifiers to pcre2test. + +11. Add check for NULL subject to POSIX regexec() function. + +12. Add check for NULL replacement to pcre2_substitute(). + +13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and +pcre2_substitute(), and the replacement argument of the latter, if the pointer +is NULL and the length is zero, treat as an empty string. Apparently a number +of applications treat NULL/0 in this way. + +14. Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +15. Fix some minor issues raised by clang sanitize. + +16. Very minor code speed up for maximizing character property matches. + +17. A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +18. The Python scripts in the maint directory have been refactored. There are +now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c +(which is #included by pcre2_tables.c). The data lists that used to be +duplicated are now held in a single common Python module. + +19. On CHERI, and thus Arm's Morello prototype, pointers are represented as +hardware capabilities, which consist of both an integer address and additional +metadata, meaning they are twice the size of the platform's size_t type, i.e. +16 bytes on a 64-bit system. The ovector member of heapframe happens to only be +8 byte aligned, and so computing frame_size ended up with a multiple of 8 but +not 16. Whilst the first frame was always suitably aligned, this then +misaligned the frame that follows, resulting in an alignment fault when storing +a pointer to Fecode at the start of match. Patch to fix this issue by Jessica +Clarke PR#72. + +20. Added -LP and -LS listing options to pcre2test. + +21. A user discovered that the library names in CMakeLists.txt for MSVC +debugger (PDB) files were incorrect - perhaps never tried for PCRE2? + +22. An item such as [Aa] is optimized into a caseless single character match. +When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a +pattern, the optimizing "must be present for a match" character check was not +being flagged as caseless, causing some matches that should have succeeded to +fail. + +23. Fixed a unicode property matching issue in JIT. The character was not +fully read in caseless matching. + +24. Fixed an issue affecting recursions in JIT caused by duplicated data +transfers. + +25. Merged patch from @carenas (GitHub #96) which fixes some problems with +pcre2test and readline/readedit: + + * Use the right header for libedit in FreeBSD with autoconf + * Really allow libedit with cmake + * Avoid using readline headers with libedit + + +Version 10.39 29-October-2021 +----------------------------- + +1. Fix incorrect detection of alternatives in first character search in JIT. + +2. Merged patch from @carenas (GitHub #28): + + Visual Studio 2013 includes support for %zu and %td, so let newer + versions of it avoid the fallback, and while at it, make sure that + the first check is for DISABLE_PERCENT_ZT so it will be always + honoured if chosen. + + prtdiff_t is signed, so use a signed type instead, and make sure + that an appropriate width is chosen if pointers are 64bit wide and + long is not (ex: Windows 64bit). + + IMHO removing the cast (and therefore the possibility of truncation) + make the code cleaner and the fallback is likely portable enough + with all 64-bit POSIX systems doing LP64 except for Windows. + +3. Merged patch from @carenas (GitHub #29) to update to Unicode 14.0.0. + +4. Merged patch from @carenas (GitHub #30): + + * Cleanup: remove references to no longer used stdint.h + + Since 19c50b9d (Unconditionally use inttypes.h instead of trying for stdint.h + (simplification) and remove the now unnecessary inclusion in + pcre2_internal.h., 2018-11-14), stdint.h is no longer used. + + Remove checks for it in autotools and CMake and document better the expected + build failures for systems that might have stdint.h (C99) and not inttypes.h + (from POSIX), like old Windows. + + * Cleanup: remove detection for inttypes.h which is a hard dependency + + CMake checks for standard headers are not meant to be used for hard + dependencies, so will prevent a possible fallback to work. + + Alternatively, the header could be checked to make the configuration fail + instead of breaking the build, but that was punted, as it was missing anyway + from autotools. + +5. Merged patch from @carenas (GitHub #32): + + * jit: allow building with ancient MSVC versions + + Visual Studio older than 2013 fails to build with JIT enabled, because it is + unable to parse non C89 compatible syntax, with mixed declarations and code. + While most recent compilers wouldn't even report this as a warning since it + is valid C99, it could be also made visible by adding to gcc/clang the + -Wdeclaration-after-statement flag at build time. + + Move the code below the affected definitions. + + * pcre2grep: avoid mixing declarations with code + + Since d5a61ee8 (Patch to detect (and ignore) symlink loops in pcre2grep, + 2021-08-28), code will fail to build in a strict C89 compiler. + + Reformat slightly to make it C89 compatible again. + + +Version 10.38 01-October-2021 +----------------------------- + +1. Fix invalid single character repetition issues in JIT when the repetition +is inside a capturing bracket and the bracket is preceded by character +literals. + +2. Installed revised CMake configuration files provided by Jan-Willem Blokland. +This extends the CMake build system to build both static and shared libraries +in one go, builds the static library with PIC, and exposes PCRE2 libraries +using the CMake config files. JWB provided these notes: + +- Introduced CMake variable BUILD_STATIC_LIBS to build the static library. + +- Make a small modification to config-cmake.h.in by removing the PCRE2_STATIC + variable. Added PCRE2_STATIC variable to the static build using the + target_compile_definitions() function. + +- Extended the CMake config files. + + - Introduced CMake variable PCRE2_USE_STATIC_LIBS to easily switch between + the static and shared libraries. + + - Added the PCRE_STATIC variable to the target compile definitions for the + import of the static library. + +Building static and shared libraries using MSVC results in a name clash of +the libraries. Both static and shared library builds create, for example, the +file pcre2-8.lib. Therefore, I decided to change the static library names by +adding "-static". For example, pcre2-8.lib has become pcre2-8-static.lib. +[Comment by PH: this is MSVC-specific. It doesn't happen on Linux.] + +3. Increased the minimum release number for CMake to 3.0.0 because older than +2.8.12 is deprecated (it was set to 2.8.5) and causes warnings. Even 3.0.0 is +quite old; it was released in 2014. + +4. Implemented a modified version of Thomas Tempelmann's pcre2grep patch for +detecting symlink loops. This is dependent on the availability of realpath(), +which is now tested for in ./configure and CMakeLists.txt. + +5. Implemented a modified version of Thomas Tempelmann's patch for faster +case-independent "first code unit" searches for unanchored patterns in 8-bit +mode in the interpreters. Instead of just remembering whether one case matched +or not, it remembers the position of a previous match so as to avoid +unnecessary repeated searching. + +6. Perl now locks out \K in lookarounds, so PCRE2 now does the same by default. +However, just in case anybody was relying on the old behaviour, there is an +option called PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK that enables the old behaviour. +An option has also been added to pcre2grep to enable this. + +7. Re-enable a JIT optimization which was unintentionally disabled in 10.35. + +8. There is a loop counter to catch excessively crazy patterns when checking +the lengths of lookbehinds at compile time. This was incorrectly getting reset +whenever a lookahead was processed, leading to some fuzzer-generated patterns +taking a very long time to compile when (?|) was present in the pattern, +because (?|) disables caching of group lengths. + + +Version 10.37 26-May-2021 +------------------------- + +1. Change RunGrepTest to use tr instead of sed when testing with binary +zero bytes, because sed varies a lot from system to system and has problems +with binary zeros. This is from Bugzilla #2681. Patch from Jeremie +Courreges-Anglas via Nam Nguyen. This fixes RunGrepTest for OpenBSD. Later: +it broke it for at least one version of Solaris, where tr can't handle binary +zeros. However, that system had /usr/xpg4/bin/tr installed, which works OK, so +RunGrepTest now checks for that command and uses it if found. + +2. Compiling with gcc 10.2's -fanalyzer option showed up a hypothetical problem +with a NULL dereference. I don't think this case could ever occur in practice, +but I have put in a check in order to get rid of the compiler error. + +3. An alternative patch for CMakeLists.txt because 10.36 #4 breaks CMake on +Windows. Patch from email@cs-ware.de fixes bugzilla #2688. + +4. Two bugs related to over-large numbers have been fixed so the behaviour is +now the same as Perl. + + (a) A pattern such as /\214748364/ gave an overflow error instead of being + treated as the octal number \214 followed by literal digits. + + (b) A sequence such as {65536 that has no terminating } so is not a + quantifier was nevertheless complaining that a quantifier number was too big. + +5. A run of autoconf suggested that configure.ac was out-of-date with respect +to the latest autoconf. Running autoupdate made some valid changes, some valid +suggestions, and also some invalid changes, which were fixed by hand. Autoconf +now runs clean and the resulting "configure" seems to work, so I hope nothing +is broken. Later: the requirement for autoconf 2.70 broke some automatic test +robots. It doesn't seem to be necessary: trying a reduction to 2.60. + +6. The pattern /a\K.(?0)*/ when matched against "abac" by the interpreter gave +the answer "bac", whereas Perl and JIT both yield "c". This was because the +effect of \K was not propagating back from the full pattern recursion. Other +recursions such as /(a\K.(?1)*)/ did not have this problem. + +7. Restore single character repetition optimization in JIT. Currently fewer +character repetitions are optimized than in 10.34. + +8. When the names of the functions in the POSIX wrapper were changed to +pcre2_regcomp() etc. (see change 10.33 #4 below), functions with the original +names were left in the library so that pre-compiled programs would still work. +However, this has proved troublesome when programs link with several libraries, +some of which use PCRE2 via the POSIX interface while others use a native POSIX +library. For this reason, the POSIX function names are removed in this release. +The macros in pcre2posix.h should ensure that re-compiling fixes any programs +that haven't been compiled since before 10.33. + + +Version 10.36 04-December-2020 +------------------------------ + +1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to +compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for +Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt +invented by PH. + +2. Fix infinite loop when a single byte newline is searched in JIT when +invalid utf8 mode is enabled. + +3. Updated CMakeLists.txt with patch from Wolfgang Stöggl (Bugzilla #2584): + + - Include GNUInstallDirs and use ${CMAKE_INSTALL_LIBDIR} instead of hardcoded + lib. This allows differentiation between lib and lib64. + CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for + pkgconfig file generation. + + - Add the version of PCRE2 to the configuration summary like ./configure + does. + + - Fix typo: MACTHED_STRING->MATCHED_STRING + +4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla +#2588): + + - Add escaped double quotes around include directory in CMakeLists.txt to + allow spaces in directory names. + + - This fixes a cmake error, if the path of the pcre2 source contains a space. + +5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's +documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST. +Moreover, these functions come from specific header files, which need to be +specified (and, thankfully, are the same on both the Linux and WinXX +platforms.) + +6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c. + +7. Applied a patch from Wolfgang Stöggl (Bugzilla #2600) to fix postfix for +debug Windows builds using CMake. This also updated configure so that it +generates *.pc files and pcre2-config with the same content, as in the past. + +8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a +single digit, the code unit beyond d was being read (i.e. there was a read +buffer overflow). Fixes ClusterFuzz 23779. + +9. After the rework in r1235, certain character ranges were incorrectly +handled by an optimization in JIT. Furthermore a wrong offset was used to +read a value from a buffer which could lead to memory overread. + +10. Unnoticed for many years was the fact that delimiters other than / in the +testinput1 and testinput4 files could cause incorrect behaviour when these +files were processed by perltest.sh. There were several tests that used quotes +as delimiters, and it was just luck that they didn't go wrong with perltest.sh. +All the patterns in testinput1 and testinput4 now use / as their delimiter. +This fixes Bugzilla #2641. + +11. Perl has started to give an error for \K within lookarounds (though there +are cases where it doesn't). PCRE2 still allows this, so the tests that include +this case have been moved from test 1 to test 2. + +12. Further to 10 above, pcre2test has been updated to detect and grumble if a +delimiter other than / is used after #perltest. + +13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS +was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding +the start of a match was not resetting correctly after a failed match on the +first valid fragment of the subject, possibly causing incorrect "no match" +returns on subsequent fragments. For example, the pattern /A/ failed to match +the subject \xe5A. Fixes Bugzilla #2642. + +14. Fixed a bug in character set matching when JIT is enabled and both unicode +scripts and unicode classes are present at the same time. + +15. Added GNU grep's -m (aka --max-count) option to pcre2grep. + +16. Refactored substitution processing in pcre2grep strings, both for the -O +option and when dealing with callouts. There is now a single function that +handles $ expansion in all cases (instead of multiple copies of almost +identical code). This means that the same escape sequences are available +everywhere, which was not previously the case. At the same time, the escape +sequences $x{...} and $o{...} have been introduced, to allow for characters +whose code points are greater than 255 in Unicode mode. + +17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit +test for a version of sed that can handle binary zero, instead of assuming that +any Linux version will work. Later: replaced $(...) by `...` because not all +shells recognize the former. + +18. Fixed a word boundary check bug in JIT when partial matching is enabled. + +19. Fix ARM64 compilation warning in JIT. Patch by Carlo. + +20. A bug in the RunTest script meant that if the first part of test 2 failed, +the failure was not reported. + +21. Test 2 was failing when run from a directory other than the source +directory. This failure was previously missed in RunTest because of 20 above. +Fixes added to both RunTest and RunTest.bat. + +22. Patch to CMakeLists.txt from Daniel to fix problem with testing under +Windows. + + +Version 10.35 09-May-2020 +--------------------------- + +1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT. + +2. Fix ARMv5 JIT improper handling of labels right after a constant pool. + +3. A JIT bug is fixed which allowed to read the fields of the compiled +pattern before its existence is checked. + +4. Back in the PCRE1 day, capturing groups that contained recursive back +references to themselves were made atomic (version 8.01, change 18) because +after the end a repeated group, the captured substrings had their values from +the final repetition, not from an earlier repetition that might be the +destination of a backtrack. This feature was documented, and was carried over +into PCRE2. However, it has now been realized that the major refactoring that +was done for 10.30 has made this atomizing unnecessary, and it is confusing +when users are unaware of it, making some patterns appear not to be working as +expected. Capture values of recursive back references in repeated groups are +now correctly backtracked, so this unnecessary restriction has been removed. + +5. Added PCRE2_SUBSTITUTE_LITERAL. + +6. Avoid some VS compiler warnings. + +7. Added PCRE2_SUBSTITUTE_MATCHED. + +8. Added (?* and (?<* as synonyms for (*napla: and (*naplb: to match another +regex engine. The Perl regex folks are aware of this usage and have made a note +about it. + +9. When an assertion is repeated, PCRE2 used to limit the maximum repetition to +1, believing that repeating an assertion is pointless. However, if a positive +assertion contains capturing groups, repetition can be useful. In any case, an +assertion could always be wrapped in a repeated group. The only restriction +that is now imposed is that an unlimited maximum is changed to one more than +the minimum. + +10. Fix *THEN verbs in lookahead assertions in JIT. + +11. Added PCRE2_SUBSTITUTE_REPLACEMENT_ONLY. + +12. The JIT stack should be freed when the low-level stack allocation fails. + +13. In pcre2grep, if the final line in a scanned file is output but does not +end with a newline sequence, add a newline according to the --newline setting. + +14. (?(DEFINE)...) groups were not being handled correctly when checking for +the fixed length of a lookbehind assertion. Such a group within a lookbehind +should be skipped, as it does not contribute to the length of the group. +Instead, the (DEFINE) group was being processed, and if at the end of the +lookbehind, that end was not correctly recognized. Errors such as "lookbehind +assertion is not fixed length" and also "internal error: bad code value in +parsed_skip()" could result. + +15. Put a limit of 1000 on recursive calls in pcre2_study() when searching +nested groups for starting code units, in order to avoid stack overflow issues. +If the limit is reached, it just gives up trying for this optimization. + +16. The control verb chain list must always be restored when exiting from a +recurse function in JIT. + +17. Fix a crash which occurs when the character type of an invalid UTF +character is decoded in JIT. + +18. Changes in many areas of the code so that when Unicode is supported and +PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for +upper/lower case computations on characters whose code points are greater than +127. + +19. The function for checking UTF-16 validity was returning an incorrect offset +for the start of the error when a high surrogate was not followed by a valid +low surrogate. This caused incorrect behaviour, for example when +PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the +invalid high surrogate, such as /aa/ matching "\x{d800}aa". + +20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern +could be mis-compiled and therefore not match correctly. This is the example +that found this: /(?(DEFINE)(?bar))(? has been raised to +50, (b) the new --om-capture option changes the limit, (c) an error is raised +if -o asks for a group that is above the limit. + +12. The quantifier {1} was always being ignored, but this is incorrect when it +is made possessive and applied to an item in parentheses, because a +parenthesized item may contain multiple branches or other backtracking points, +for example /(a|ab){1}+c/ or /(a+){1}+a/. + +13. For partial matches, pcre2test was always showing the maximum lookbehind +characters, flagged with "<", which is misleading when the lookbehind didn't +actually look behind the start (because it was later in the pattern). Showing +all consulted preceding characters for partial matches is now controlled by the +existing "allusedtext" modifier and, as for complete matches, this facility is +available only for non-JIT matching, because JIT does not maintain the first +and last consulted characters. + +14. DFA matching (using pcre2_dfa_match()) was not recognising a partial match +if the end of the subject was encountered in a lookahead (conditional or +otherwise), an atomic group, or a recursion. + +15. Give error if pcre2test -t, -T, -tm or -TM is given an argument of zero. + +16. Check for integer overflow when computing lookbehind lengths. Fixes +Clusterfuzz issue 15636. + +17. Implemented non-atomic positive lookaround assertions. + +18. If a lookbehind contained a lookahead that contained another lookbehind +within it, the nested lookbehind was not correctly processed. For example, if +/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching +"b". + +19. Implemented pcre2_get_match_data_size(). + +20. Two alterations to partial matching: + + (a) The definition of a partial match is slightly changed: if a pattern + contains any lookbehinds, an empty partial match may be given, because this + is another situation where adding characters to the current subject can + lead to a full match. Example: /c*+(?<=[bc])/ with subject "ab". + + (b) Similarly, if a pattern could match an empty string, an empty partial + match may be given. Example: /(?![ab]).*/ with subject "ab". This case + applies only to PCRE2_PARTIAL_HARD. + + (c) An empty string partial hard match can be returned for \z and \Z as it + is documented that they shouldn't match. + +21. A branch that started with (*ACCEPT) was not being recognized as one that +could match an empty string. + +22. Corrected pcre2_set_character_tables() tables data type: was const unsigned +char * instead of const uint8_t *, as generated by pcre2_maketables(). + +23. Upgraded to Unicode 12.1.0. + +24. Add -jitfast command line option to pcre2test (to make all the jit options +available directly). + +25. Make pcre2test -C show if libreadline or libedit is supported. + +26. If the length of one branch of a group exceeded 65535 (the maximum value +that is remembered as a minimum length), the whole group's length was +incorrectly recorded as 65535, leading to incorrect "no match" when start-up +optimizations were in force. + +27. The "rightmost consulted character" value was not always correct; in +particular, if a pattern ended with a negative lookahead, characters that were +inspected in that lookahead were not included. + +28. Add the pcre2_maketables_free() function. + +29. The start-up optimization that looks for a unique initial matching +code unit in the interpretive engines uses memchr() in 8-bit mode. When the +search is caseless, it was doing so inefficiently, which ended up slowing down +the match drastically when the subject was very long. The revised code (a) +remembers if one case is not found, so it never repeats the search for that +case after a bumpalong and (b) when one case has been found, it searches only +up to that position for an earlier occurrence of the other case. This fix +applies to both interpretive pcre2_match() and to pcre2_dfa_match(). + +30. While scanning to find the minimum length of a group, if any branch has +minimum length zero, there is no need to scan any subsequent branches (a small +compile-time performance improvement). + +31. Installed a .gitignore file on a user's suggestion. When using the svn +repository with git (through git svn) this helps keep it tidy. + +32. Add underflow check in JIT which may occur when the value of subject +string pointer is close to 0. + +33. Arrange for classes such as [Aa] which contain just the two cases of the +same character, to be treated as a single caseless character. This causes the +first and required code unit optimizations to kick in where relevant. + +34. Improve the bitmap of starting bytes for positive classes that include wide +characters, but no property types, in UTF-8 mode. Previously, on encountering +such a class, the bits for all bytes greater than \xc4 were set, thus +specifying any character with codepoint >= 0x100. Now the only bits that are +set are for the relevant bytes that start the wide characters. This can give a +noticeable performance improvement. + +35. If the bitmap of starting code units contains only 1 or 2 bits, replace it +with a single starting code unit (1 bit) or a caseless single starting code +unit if the two relevant characters are case-partners. This is particularly +relevant to the 8-bit library, though it applies to all. It can give a +performance boost for patterns such as [Ww]ord and (word|WORD). However, this +optimization doesn't happen if there is a "required" code unit of the same +value (because the search for a "required" code unit starts at the match start +for non-unique first code unit patterns, but after a unique first code unit, +and patterns such as a*a need the former action). + +36. Small patch to pcre2posix.c to set the erroroffset field to -1 immediately +after a successful compile, instead of at the start of matching to avoid a +sanitizer complaint (regexec is supposed to be thread safe). + +37. Add NEON vectorization to JIT to speed up matching of first character and +pairs of characters on ARM64 CPUs. + +38. If a non-ASCII character was the first in a starting assertion in a +caseless match, the "first code unit" optimization did not get the casing +right, and the assertion failed to match a character in the other case if it +did not start with the same code unit. + +39. Fixed the incorrect computation of jump sizes on x86 CPUs in JIT. A masking +operation was incorrectly removed in r1136. Reported by Ralf Junker. + + +Version 10.33 16-April-2019 +--------------------------- + +1. Added "allvector" to pcre2test to make it easy to check the part of the +ovector that shouldn't be changed, in particular after substitute and failed or +partial matches. + +2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has +a greater than 1 fixed quantifier. This issue was found by Yunho Kim. + +3. Added support for callouts from pcre2_substitute(). After 10.33-RC1, but +prior to release, fixed a bug that caused a crash if pcre2_substitute() was +called with a NULL match context. + +4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments while still exporting the POSIX names for +pre-existing programs that use them. (The Debian alternative names are also +defined as macros, but not documented.) + +5. Fix an xclass matching issue in JIT. + +6. Implement PCRE2_EXTRA_ESCAPED_CR_IS_LF (see Bugzilla 2315). + +7. Implement the Perl 5.28 experimental alphabetic names for atomic groups and +lookaround assertions, for example, (*pla:...) and (*atomic:...). These are +characterized by a lower case letter following (* and to simplify coding for +this, the character tables created by pcre2_maketables() were updated to add a +new "is lower case letter" bit. At the same time, the now unused "is +hexadecimal digit" bit was removed. The default tables in +src/pcre2_chartables.c.dist are updated. + +8. Implement the new Perl "script run" features (*script_run:...) and +(*atomic_script_run:...) aka (*sr:...) and (*asr:...). + +9. Fixed two typos in change 22 for 10.21, which added special handling for +ranges such as a-z in EBCDIC environments. The original code probably never +worked, though there were no bug reports. + +10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via +pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast +path. Also, when a match fails, set the subject field in the match data to NULL +for tidiness - none of the substring extractors should reference this after +match failure. + +11. If a pattern started with a subroutine call that had a quantifier with a +minimum of zero, an incorrect "match must start with this character" could be +recorded. Example: /(?&xxx)*ABC(?XYZ)/ would (incorrectly) expect 'A' to +be the first character of a match. + +12. The heap limit checking code in pcre2_dfa_match() could suffer from +overflow if the heap limit was set very large. This could cause incorrect "heap +limit exceeded" errors. + +13. Add "kibibytes" to the heap limit output from pcre2test -C to make the +units clear. + +14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness. + +15. Updated the VMS-specific code in pcre2test on the advice of a VMS user. + +16. Removed the unnecessary inclusion of stdint.h (or inttypes.h) from +pcre2_internal.h as it is now included by pcre2.h. Also, change 17 for 10.32 +below was unnecessarily complicated, as inttypes.h is a Standard C header, +which is defined to be a superset of stdint.h. Instead of conditionally +including stdint.h or inttypes.h, pcre2.h now unconditionally includes +inttypes.h. This supports environments that do not have stdint.h but do have +inttypes.h, which are known to exist. A note in the autotools documentation +says (November 2018) that there are none known that are the other way round. + +17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to +forcibly disable the use of %zu and %td in formatting strings because there is +at least one version of VMS that claims to be C99 but does not support these +modifiers. + +18. Added --disable-pcre2grep-callout-fork, which restricts the callout support +in pcre2grep to the inbuilt echo facility. This may be useful in environments +that do not support fork(). + +19. Fix two instances of <= 0 being applied to unsigned integers (the VMS +compiler complains). + +20. Added "fork" support for VMS to pcre2grep, for running an external program +via a string callout. + +21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel. + +22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN) +followed by ^ it was not recognized as anchored. + +23. The RunGrepTest script used to cut out the test of NUL characters for +Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD +systems can't either. I've inverted the test so that only those OS that are +known to work (currently only Linux) try to run this test. + +24. Some tests in RunGrepTest appended to testtrygrep from two different file +descriptors instead of redirecting stderr to stdout. This worked on Linux, but +it was reported not to on other systems, causing the tests to fail. + +25. In the RunTest script, make the test for stack setting use the same value +for the stack as it needs for -bigstack. + +26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning. + +26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s +which are valid in character classes, but not as the end of ranges, were being +treated as literals. An example is [_-\s] (but not [\s-_] because that gave an +error at the *start* of a range). Now an "invalid range" error is given +independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape +sequences such as \eX when they appeared invalidly in a character class. Now +the option applies only to unrecognized or malformed escape sequences. + +28. Fix word boundary in JIT compiler. Patch by Mike Munday. + +29. The pcre2_dfa_match() function was incorrectly handling conditional version +tests such as (?(VERSION>=0)...) when the version test was true. Incorrect +processing or a crash could result. + +30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group +names, as Perl does. There was a small bug in this new code, found by +ClusterFuzz 12950, fixed before release. + +31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh} +construct. + +32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits +from auto-anchoring if \p{Any}* starts a pattern. + +33. Compile invalid UTF check in JIT test when only pcre32 is enabled. + +34. For some time now, CMake has been warning about the setting of policy +CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be +removed in a future version. A request for CMake expertise on the list produced +no result, so I have now hacked CMakeLists.txt along the lines of some changes +I found on the Internet. The new code no longer needs the policy setting, and +it appears to work fine on Linux. + +35. Setting --enable-jit=auto for an out-of-tree build failed because the +source directory wasn't in the search path for AC_TRY_COMPILE always. Patch +from Ross Burton. + +36. Disable SSE2 JIT optimizations in x86 CPUs when SSE2 is not available. +Patch by Guillem Jover. + +37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler +warnings were reported. + +38. Using the clang compiler with sanitizing options causes runtime complaints +about truncation for statements such as x = ~x when x is an 8-bit value; it +seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x +gets rid of the warnings. There were also two missing casts in pcre2test. + + +Version 10.32 10-September-2018 +------------------------------- + +1. When matching using the REG_STARTEND feature of the POSIX API with a +non-zero starting offset, unset capturing groups with lower numbers than a +group that did capture something were not being correctly returned as "unset" +(that is, with offset values of -1). + +2. When matching using the POSIX API, pcre2test used to omit listing unset +groups altogether. Now it shows those that come before any actual captures as +"", as happens for non-POSIX matching. + +3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only", +whatever the build configuration was. It now correctly says "\R matches all +Unicode newlines" in the default case when --enable-bsr-anycrlf has not been +specified. Similarly, running "pcre2test -C bsr" never produced the result +ANY. + +4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing +multi-code-unit characters caused bad behaviour and possibly a crash. This +issue was fixed for other kinds of repeat in release 10.20 by change 19, but +repeating character classes were overlooked. + +5. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +6. A small fix to pcre2grep to avoid compiler warnings for -Wformat-overflow=2. + +7. Added --enable-jit=auto support to configure.ac. + +8. Added some dummy variables to the heapframe structure in 16-bit and 32-bit +modes for the benefit of m68k, where pointers can be 16-bit aligned. The +dummies force 32-bit alignment and this ensures that the structure is a +multiple of PCRE2_SIZE, a requirement that is tested at compile time. In other +architectures, alignment requirements take care of this automatically. + +9. When returning an error from pcre2_pattern_convert(), ensure the error +offset is set zero for early errors. + +10. A number of patches for Windows support from Daniel Richard G: + + (a) List of error numbers in Runtest.bat corrected (it was not the same as in + Runtest). + + (b) pcre2grep snprintf() workaround as used elsewhere in the tree. + + (c) Support for non-C99 snprintf() that returns -1 in the overflow case. + +11. Minor tidy of pcre2_dfa_match() code. + +12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer +use the stack for local workspace and local ovectors. Instead, an initial block +of stack is reserved, but if this is insufficient, heap memory is used. The +heap limit parameter now applies to pcre2_dfa_match(). + +13. If a "find limits" test of DFA matching in pcre2test resulted in too many +matches for the ovector, no matches were displayed. + +14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as +EOF. The test looks to have come from a fuzzer. + +15. If PCRE2 was built with a default match limit a lot greater than the +default default of 10 000 000, some JIT tests of the match limit no longer +failed. All such tests now set 10 000 000 as the upper limit. + +16. Another Windows related patch for pcregrep to ensure that WIN32 is +undefined under Cygwin. + +17. Test for the presence of stdint.h and inttypes.h in configure and CMake and +include whichever exists (stdint preferred) instead of unconditionally +including stdint. This makes life easier for old and non-standard systems. + +18. Further changes to improve portability, especially to old and or non- +standard systems: + + (a) Put all printf arguments in RunGrepTest into single, not double, quotes, + and use \0 not \x00 for binary zero. + + (b) Avoid the use of C++ (i.e. BCPL) // comments. + + (c) Parameterize the use of %zu in pcre2test to make it like %td. For both of + these now, if using MSVC or a standard C before C99, %lu is used with a + cast if necessary. + +19. Applied a contributed patch to CMakeLists.txt to increase the stack size +when linking pcre2test with MSVC. This gets rid of a stack overflow error in +the standard set of tests. + +20. Output a warning in pcre2test when ignoring the "altglobal" modifier when +it is given with the "replace" modifier. + +21. In both pcre2test and pcre2_substitute(), with global matching, a pattern +that matched an empty string, but never at the starting match offset, was not +handled in a Perl-compatible way. The pattern /(a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP +shouldn't find a MARK (because is in an atomic group), but it did. + +26. Upgraded the perltest.sh script: (1) #pattern lines can now be used to set +a list of modifiers for all subsequent patterns - only those that the script +recognizes are meaningful; (2) #subject lines can be used to set or unset a +default "mark" modifier; (3) Unsupported #command lines give a warning when +they are ignored; (4) Mark data is output only if the "mark" modifier is +present. + +27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +28. A (*MARK) name was not being passed back for positive assertions that were +terminated by (*ACCEPT). + +29. Add support for \N{U+dddd}, but only in Unicode mode. + +30. Add support for (?^) for unsetting all imnsx options. + +31. The PCRE2_EXTENDED (/x) option only ever discarded space characters whose +code point was less than 256 and that were recognized by the lookup table +generated by pcre2_maketables(), which uses isspace() to identify white space. +Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085, +U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by +Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl. + +32. In certain circumstances, option settings within patterns were not being +correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly +matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the +end of its group during the parse process, but without another setting such as +(?m) the compile phase got it right.) This bug was introduced by the +refactoring in release 10.23. + +33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to +define memmove() as function call to bcopy(). This hasn't been tested for a +long time because in pcre2test the result of memmove() was being used, whereas +bcopy() doesn't return a result. This feature is now refactored always to call +an emulation function when there is no memmove(). The emulation makes use of +bcopy() when available. + +34. When serializing a pattern, set the memctl, executable_jit, and tables +fields (that is, all the fields that contain pointers) to zeros so that the +result of serializing is always the same. These fields are re-set when the +pattern is deserialized. + +35. In a pattern such as /[^\x{100}-\x{ffff}]*[\x80-\xff]/ which has a repeated +negative class with no characters less than 0x100 followed by a positive class +with only characters less than 0x100, the first class was incorrectly being +auto-possessified, causing incorrect match failures. + +36. Removed the character type bit ctype_meta, which dates from PCRE1 and is +not used in PCRE2. + +37. Tidied up unnecessarily complicated macros used in the escapes table. + +38. Since 10.21, the new testoutput8-16-4 file has accidentally been omitted +from distribution tarballs, owing to a typo in Makefile.am which had +testoutput8-16-3 twice. Now fixed. + +39. If the only branch in a conditional subpattern was anchored, the whole +subpattern was treated as anchored, when it should not have been, since the +assumed empty second branch cannot be anchored. Demonstrated by test patterns +such as /(?(1)^())b/ or /(?(?=^))b/. + +40. A repeated conditional subpattern that could match an empty string was +always assumed to be unanchored. Now it is checked just like any other +repeated conditional subpattern, and can be found to be anchored if the minimum +quantifier is one or more. I can't see much use for a repeated anchored +pattern, but the behaviour is now consistent. + +41. Minor addition to pcre2_jit_compile.c to avoid static analyzer complaint +(for an event that could never occur but you had to have external information +to know that). + +42. If before the first match in a file that was being searched by pcre2grep +there was a line that was sufficiently long to cause the input buffer to be +expanded, the variable holding the location of the end of the previous match +was being adjusted incorrectly, and could cause an overflow warning from a code +sanitizer. However, as the value is used only to print pending "after" lines +when the next match is reached (and there are no such lines in this case) this +bug could do no damage. + + +Version 10.31 12-February-2018 +------------------------------ + +1. Fix typo (missing ]) in VMS code in pcre2test.c. + +2. Replace the replicated code for matching extended Unicode grapheme sequences +(which got a lot more complicated by change 10.30/49) by a single subroutine +that is called by both pcre2_match() and pcre2_dfa_match(). + +3. Add idempotent guard to pcre2_internal.h. + +4. Add new pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +5. Cut out \C tests in the JIT regression tests when NEVER_BACKSLASH_C is +defined (e.g. by --enable-never-backslash-C). + +6. Defined public names for all the pcre2_compile() error numbers, and used +the public names in pcre2_convert.c. + +7. Fixed a small memory leak in pcre2test (convert contexts). + +8. Added two casts to compile.c and one to match.c to avoid compiler warnings. + +9. Added code to pcre2grep when compiled under VMS to set the symbol +PCRE2GREP_RC to the exit status, because VMS does not distinguish between +exit(0) and exit(1). + +10. Added the -LM (list modifiers) option to pcre2test. Also made -C complain +about a bad option only if the following argument item does not start with a +hyphen. + +11. pcre2grep was truncating components of file names to 128 characters when +processing files with the -r option, and also (some very odd code) truncating +path names to 512 characters. There is now a check on the absolute length of +full path file names, which may be up to 2047 characters long. + +12. When an assertion contained (*ACCEPT) it caused all open capturing groups +to be closed (as for a non-assertion ACCEPT), which was wrong and could lead to +misbehaviour for subsequent references to groups that started outside the +assertion. ACCEPT in an assertion now closes only those groups that were +started within that assertion. Fixes oss-fuzz issues 3852 and 3891. + +13. Multiline matching in pcre2grep was misbehaving if the pattern matched +within a line, and then matched again at the end of the line and over into +subsequent lines. Behaviour was different with and without colouring, and +sometimes context lines were incorrectly printed and/or line endings were lost. +All these issues should now be fixed. + +14. If --line-buffered was specified for pcre2grep when input was from a +compressed file (.gz or .bz2) a segfault occurred. (Line buffering should be +ignored for compressed files.) + +15. Although pcre2_jit_match checks whether the pattern is compiled +in a given mode, it was also expected that at least one mode is available. +This is fixed and pcre2_jit_match returns with PCRE2_ERROR_JIT_BADOPTION +when the pattern is not optimized by JIT at all. + +16. The line number and related variables such as match counts in pcre2grep +were all int variables, causing overflow when files with more than 2147483647 +lines were processed (assuming 32-bit ints). They have all been changed to +unsigned long ints. + +17. If a backreference with a minimum repeat count of zero was first in a +pattern, apart from assertions, an incorrect first matching character could be +recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set +as the first character of a match. + +18. Characters in a leading positive assertion are considered for recording a +first character of a match when the rest of the pattern does not provide one. +However, a character in a non-assertive group within a leading assertion such +as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an +infelicity rather than an outright bug, because it did not affect the result of +a match, just its speed. (In fact, in this case, the starting 'a' was +subsequently picked up in the study.) + +19. A minor tidy in pcre2_match(): making all PCRE2_ERROR_ returns use "return" +instead of "RRETURN" saves unwinding the backtracks in these cases (only one +didn't). + +20. Allocate a single callout block on the stack at the start of pcre2_match() +and set its never-changing fields once only. Do the same for pcre2_dfa_match(). + +21. Save the extra compile options (set in the compile context) with the +compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS +to retrieve them, and update pcre2test to show them. + +22. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. The bits are set by pcre2_match(), but +not by JIT or pcre2_dfa_match(). Their settings are shown in pcre2test callouts +if the callout_extra subject modifier is set. These bits are provided to help +with tracking how a backtracking match is proceeding. + +23. Updated the pcre2demo.c demonstration program, which was missing the extra +code for -g that handles the case when \K in an assertion causes the match to +end at the original start point. Also arranged for it to detect when \K causes +the end of a match to be before its start. + +24. Similar to 23 above, strange things (including loops) could happen in +pcre2grep when \K was used in an assertion when --colour was used or in +multiline mode. The "end at original start point" bug is fixed, and if the end +point is found to be before the start point, they are swapped. + +25. When PCRE2_FIRSTLINE without PCRE2_NO_START_OPTIMIZE was used in non-JIT +matching (both pcre2_match() and pcre2_dfa_match()) and the matched string +started with the first code unit of a newline sequence, matching failed because +it was not tried at the newline. + +26. Code for giving up a non-partial match after failing to find a starting +code unit anywhere in the subject was missing when searching for one of a +number of code units (the bitmap case) in both pcre2_match() and +pcre2_dfa_match(). This was a missing optimization rather than a bug. + +27. Tidied up the ACROSSCHAR macro to be like FORWARDCHAR and BACKCHAR, using a +pointer argument rather than a code unit value. This should not have affected +the generated code. + +28. The JIT compiler has been updated. + +29. Avoid pointer overflow for unset captures in pcre2_substring_list_get(). +This could not actually cause a crash because it was always used in a memcpy() +call with zero length. + +30. Some internal structures have a variable-length ovector[] as their last +element. Their actual memory is obtained dynamically, giving an ovector of +appropriate length. However, they are defined in the structure as +ovector[NUMBER], where NUMBER is large so that array bound checkers don't +grumble. The value of NUMBER was 10000, but a fuzzer exceeded 5000 capturing +groups, making the ovector larger than this. The number has been increased to +131072, which allows for the maximum number of captures (65535) plus the +overall match. This fixes oss-fuzz issue 5415. + +31. Auto-possessification at the end of a capturing group was dependent on what +follows the group (e.g. /(a+)b/ would auto-possessify the a+) but this caused +incorrect behaviour when the group was called recursively from elsewhere in the +pattern where something different might follow. This bug is an unforseen +consequence of change #1 for 10.30 - the implementation of backtracking into +recursions. Iterators at the ends of capturing groups are no longer considered +for auto-possessification if the pattern contains any recursions. Fixes +Bugzilla #2232. + + +Version 10.30 14-August-2017 +---------------------------- + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +hard-to-do issues such as #1887 in Bugzilla. The code is also cleaner because +the old code had a number of fudges to try to reduce stack usage. It seems to +run no slower than the old code. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. These +bugs were never in fully released code, but are noted here for the record. + + (a) If a pattern had fewer capturing parentheses than the ovector supplied in + the match data block, a memory error (detectable by ASAN) occurred after + a match, because the external block was being set from non-existent + internal ovector fields. Fixes oss-fuzz issue 781. + + (b) A pattern with very many capturing parentheses (when the internal frame + size was greater than the initial frame vector on the stack) caused a + crash. A vector on the heap is now set up at the start of matching if the + vector on the stack is not big enough to handle at least 10 frames. + Fixes oss-fuzz issue 783. + + (c) Handling of (*VERB)s in recursions was wrong in some cases. + + (d) Captures in negative assertions that were used as conditions were not + happening if the assertion matched via (*ACCEPT). + + (e) Mark values were not being passed out of recursions. + + (f) Refactor some code in do_callout() to avoid picky compiler warnings about + negative indices. Fixes oss-fuzz issue 1454. + + (g) Similarly refactor the way the variable length ovector is addressed for + similar reasons. Fixes oss-fuzz issue 1465. + +2. Now that pcre2_match() no longer uses recursive function calls (see above), +the "match limit recursion" value seems misnamed. It still exists, and limits +the depth of tree that is searched. To avoid future confusion, it has been +renamed as "depth limit" in all relevant places (--with-depth-limit, +(*LIMIT_DEPTH), pcre2_set_depth_limit(), etc) but the old names are still +available for backwards compatibility. + +3. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers: + + (a) Check for malloc failures when getting memory for the ovector (POSIX) or + the match data block (non-POSIX). + +4. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property +for a character with a code point greater than 0x10ffff (the Unicode maximum) +caused a crash. + +5. If a lookbehind assertion that contained a back reference to a group +appearing later in the pattern was compiled with the PCRE2_ANCHORED option, +undefined actions (often a segmentation fault) could occur, depending on what +other options were set. An example assertion is (?" should be ">=" in opcode check in pcre2_auto_possess.c. + (b) Added some casts to avoid "suspicious implicit sign extension". + (c) Resource leaks in pcre2test in rare error cases. + (d) Avoid warning for never-use case OP_TABLE_LENGTH which is just a fudge + for checking at compile time that tables are the right size. + (e) Add missing "fall through" comment. + +29. Implemented PCRE2_EXTENDED_MORE and related /xx and (?xx) features. + +30. Implement (?n: for PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +31. If more than one of "push", "pushcopy", or "pushtablescopy" were set in +pcre2test, a crash could occur. + +32. Make -bigstack in RunTest allocate a 64MiB stack (instead of 16MiB) so +that all the tests can run with clang's sanitizing options. + +33. Implement extra compile options in the compile context and add the first +one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. + +34. Implement newline type PCRE2_NEWLINE_NUL. + +35. A lookbehind assertion that had a zero-length branch caused undefined +behaviour when processed by pcre2_dfa_match(). This is oss-fuzz issue 1859. + +36. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. (Compare item 10.23/36.) This should fix oss-fuzz #1761. + +37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +38. Fix returned offsets from regexec() when REG_STARTEND is used with a +starting offset greater than zero. + +39. Implement REG_PEND (GNU extension) for the POSIX wrapper. + +40. Implement the subject_literal modifier in pcre2test, and allow jitstack on +pattern lines. + +41. Implement PCRE2_LITERAL and use it to support REG_NOSPEC. + +42. Implement PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD for the benefit +of pcre2grep. + +43. Re-implement pcre2grep's -F, -w, and -x options using PCRE2_LITERAL, +PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This fixes two bugs: + + (a) The -F option did not work for fixed strings containing \E. + (b) The -w option did not work for patterns with multiple branches. + +44. Added configuration options for the SELinux compatible execmem allocator in +JIT. + +45. Increased the limit for searching for a "must be present" code unit in +subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are +much faster. + +46. Arrange for anchored patterns to record and use "first code unit" data, +because this can give a fast "no match" without searching for a "required code +unit". Previously only non-anchored patterns did this. + +47. Upgraded the Unicode tables from Unicode 8.0.0 to Unicode 10.0.0. + +48. Add the callout_no_where modifier to pcre2test. + +49. Update extended grapheme breaking rules to the latest set that are in +Unicode Standard Annex #29. + +50. Added experimental foreign pattern conversion facilities +(pcre2_pattern_convert() and friends). + +51. Change the macro FWRITE, used in pcre2grep, to FWRITE_IGNORE because FWRITE +is defined in a system header in cygwin. Also modified some of the #ifdefs in +pcre2grep related to Windows and Cygwin support. + +52. Change 3(g) for 10.23 was a bit too zealous. If a hyphen that follows a +character class is the last character in the class, Perl does not give a +warning. PCRE2 now also treats this as a literal. + +53. Related to 52, though PCRE2 was throwing an error for [[:digit:]-X] it was +not doing so for [\d-X] (and similar escapes), as is documented. + +54. Fixed a MIPS issue in the JIT compiler reported by Joshua Kinard. + +55. Fixed a "maybe uninitialized" warning for class_uchardata in \p handling in +pcre2_compile() which could never actually trigger (code should have been cut +out when Unicode support is disabled). + + +Version 10.23 14-February-2017 +------------------------------ + +1. Extended pcre2test with the utf8_input modifier so that it is able to +generate all possible 16-bit and 32-bit code unit values in non-UTF modes. + +2. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without +PCRE2_UCP set, a negative character type such as \D in a positive class should +cause all characters greater than 255 to match, whatever else is in the class. +There was a bug that caused this not to happen if a Unicode property item was +added to such a class, for example [\D\P{Nd}] or [\W\pL]. + +3. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed, including: + + (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead + of giving an invalid quantifier error. + + (b) {0} can now be used after a group in a lookbehind assertion; previously + this caused an "assertion is not fixed length" error. + + (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with + the name "DEFINE" exists. PCRE2 now does likewise. + + (d) A recursion condition test such as (?(R2)...) must now refer to an + existing subpattern. + + (e) A conditional recursion test such as (?(R)...) misbehaved if there was a + group whose name began with "R". + + (f) When testing zero-terminated patterns under valgrind, the terminating + zero is now marked "no access". This catches bugs that would otherwise + show up only with non-zero-terminated patterns. + + (g) A hyphen appearing immediately after a POSIX character class (for example + /[[:ascii:]-z]/) now generates an error. Perl does accept this as a + literal, but gives a warning, so it seems best to fail it in PCRE. + + (h) An empty \Q\E sequence may appear after a callout that precedes an + assertion condition (it is, of course, ignored). + +One effect of the refactoring is that some error numbers and messages have +changed, and the pattern offset given for compiling errors is not always the +right-most character that has been read. In particular, for a variable-length +lookbehind assertion it now points to the start of the assertion. Another +change is that when a callout appears before a group, the "length of next +pattern item" that is passed now just gives the length of the opening +parenthesis item, not the length of the whole group. A length of zero is now +given only for a callout at the end of the pattern. Automatic callouts are no +longer inserted before and after explicit callouts in the pattern. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. Many +of the bugs were discovered by fuzzing testing. Several of them were related to +the change from assuming a zero-terminated pattern (which previously had +required non-zero terminated strings to be copied). These bugs were never in +fully released code, but are noted here for the record. + + (a) An overall recursion such as (?0) inside a lookbehind assertion was not + being diagnosed as an error. + + (b) In utf mode, the length of a *MARK (or other verb) name was being checked + in characters instead of code units, which could lead to bad code being + compiled, leading to unpredictable behaviour. + + (c) In extended /x mode, characters whose code was greater than 255 caused + a lookup outside one of the global tables. A similar bug existed for wide + characters in *VERB names. + + (d) The amount of memory needed for a compiled pattern was miscalculated if a + lookbehind contained more than one toplevel branch and the first branch + was of length zero. + + (e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero- + terminated pattern, if a # comment ran on to the end of the pattern, one + or more code units past the end were being read. + + (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g. + "{2,2") could cause reading beyond the pattern. + + (g) When reading a callout string, if the end delimiter was at the end of the + pattern one further code unit was read. + + (h) An unterminated number after \g' could cause reading beyond the pattern. + + (i) An insufficient memory size was being computed for compiling with + PCRE2_AUTO_CALLOUT. + + (j) A conditional group with an assertion condition used more memory than was + allowed for it during parsing, so too many of them could therefore + overrun a buffer. + + (k) If parsing a pattern exactly filled the buffer, the internal test for + overrun did not check when the final META_END item was added. + + (l) If a lookbehind contained a subroutine call, and the called group + contained an option setting such as (?s), and the PCRE2_ANCHORED option + was set, unpredictable behaviour could occur. The underlying bug was + incorrect code and insufficient checking while searching for the end of + the called subroutine in the parsed pattern. + + (m) Quantifiers following (*VERB)s were not being diagnosed as errors. + + (n) The use of \Q...\E in a (*VERB) name when PCRE2_ALT_VERBNAMES and + PCRE2_AUTO_CALLOUT were both specified caused undetermined behaviour. + + (o) If \Q was preceded by a quantified item, and the following \E was + followed by '?' or '+', and there was at least one literal character + between them, an internal error "unexpected repeat" occurred (example: + /.+\QX\E+/). + + (p) A buffer overflow could occur while sorting the names in the group name + list (depending on the order in which the names were seen). + + (q) A conditional group that started with a callout was not doing the right + check for a following assertion, leading to compiling bad code. Example: + /(?(C'XX))?!XX/ + + (r) If a character whose code point was greater than 0xffff appeared within + a lookbehind that was within another lookbehind, the calculation of the + lookbehind length went wrong and could provoke an internal error. + + (t) The sequence \E- or \Q\E- after a POSIX class in a character class caused + an internal error. Now the hyphen is treated as a literal. + +4. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +5. pcre2test has been upgraded so that, when run under valgrind with valgrind +support enabled, reading past the end of the pattern is detected, both when +compiling and during callout processing. + +6. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +7. Automatic callouts are no longer generated before and after callouts in the +pattern. + +8. When pcre2test was outputting information from a callout, the caret indicator +for the current position in the subject line was incorrect if it was after an +escape sequence for a character whose code point was greater than \x{ff}. + +9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be +PCRE2_STATIC_RUNTIME). Fix from David Gaussmann. + +10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer +expansion when long lines are encountered. Original patch by Dmitry +Cherniachenko. + +11. If pcre2grep was compiled with JIT support, but the library was compiled +without it (something that neither ./configure nor CMake allow, but it can be +done by editing config.h), pcre2grep was giving a JIT error. Now it detects +this situation and does not try to use JIT. + +12. Added some "const" qualifiers to variables in pcre2grep. + +13. Added Dmitry Cherniachenko's patch for colouring output in Windows +(untested by me). Also, look for GREP_COLOUR or GREP_COLOR if the environment +variables PCRE2GREP_COLOUR and PCRE2GREP_COLOR are not found. + +14. Add the -t (grand total) option to pcre2grep. + +15. A number of bugs have been mended relating to match start-up optimizations +when the first thing in a pattern is a positive lookahead. These all applied +only when PCRE2_NO_START_OPTIMIZE was *not* set: + + (a) A pattern such as (?=.*X)X$ was incorrectly optimized as if it needed + both an initial 'X' and a following 'X'. + (b) Some patterns starting with an assertion that started with .* were + incorrectly optimized as having to match at the start of the subject or + after a newline. There are cases where this is not true, for example, + (?=.*[A-Z])(?=.{8,16})(?!.*[\s]) matches after the start in lines that + start with spaces. Starting .* in an assertion is no longer taken as an + indication of matching at the start (or after a newline). + +16. The "offset" modifier in pcre2test was not being ignored (as documented) +when the POSIX API was in use. + +17. Added --enable-fuzz-support to "configure", causing an non-installed +library containing a test function that can be called by fuzzers to be +compiled. A non-installed binary to run the test function locally, called +pcre2fuzzcheck is also compiled. + +18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and +which started with .* inside a positive lookahead was incorrectly being +compiled as implicitly anchored. + +19. Removed all instances of "register" declarations, as they are considered +obsolete these days and in any case had become very haphazard. + +20. Add strerror() to pcre2test for failed file opening. + +21. Make pcre2test -C list valgrind support when it is enabled. + +22. Add the use_length modifier to pcre2test. + +23. Fix an off-by-one bug in pcre2test for the list of names for 'get' and +'copy' modifiers. + +24. Add PCRE2_CALL_CONVENTION into the prototype declarations in pcre2.h as it +is apparently needed there as well as in the function definitions. (Why did +nobody ask for this in PCRE1?) + +25. Change the _PCRE2_H and _PCRE2_UCP_H guard macros in the header files to +PCRE2_H_IDEMPOTENT_GUARD and PCRE2_UCP_H_IDEMPOTENT_GUARD to be more standard +compliant and unique. + +26. pcre2-config --libs-posix was listing -lpcre2posix instead of +-lpcre2-posix. Also, the CMake build process was building the library with the +wrong name. + +27. In pcre2test, give some offset information for errors in hex patterns. +This uses the C99 formatting sequence %td, except for MSVC which doesn't +support it - %lu is used instead. + +28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to +pcre2test for testing it. + +29. Fix small memory leak in pcre2test. + +30. Fix out-of-bounds read for partial matching of /./ against an empty string +when the newline type is CRLF. + +31. Fix a bug in pcre2test that caused a crash when a locale was set either in +the current pattern or a previous one and a wide character was matched. + +32. The appearance of \p, \P, or \X in a substitution string when +PCRE2_SUBSTITUTE_EXTENDED was set caused a segmentation fault (NULL +dereference). + +33. If the starting offset was specified as greater than the subject length in +a call to pcre2_substitute() an out-of-bounds memory reference could occur. + +34. When PCRE2 was compiled to use the heap instead of the stack for recursive +calls to match(), a repeated minimizing caseless back reference, or a +maximizing one where the two cases had different numbers of code units, +followed by a caseful back reference, could lose the caselessness of the first +repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX +but didn't). + +35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum +matching length and just records zero. Typically this happens when there are +too many nested or recursive back references. If the limit was reached in +certain recursive cases it failed to be triggered and an internal error could +be the result. + +36. The pcre2_dfa_match() function now takes note of the recursion limit for +the internal recursive calls that are used for lookrounds and recursions within +the pattern. + +37. More refactoring has got rid of the internal could_be_empty_branch() +function (around 400 lines of code, including comments) by keeping track of +could-be-emptiness as the pattern is compiled instead of scanning compiled +groups. (This would have been much harder before the refactoring of #3 above.) +This lifts a restriction on the number of branches in a group (more than about +1100 would give "pattern is too complicated"). + +38. Add the "-ac" command line option to pcre2test as a synonym for "-pattern +auto_callout". + +39. In a library with Unicode support, incorrect data was compiled for a +pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide +characters to match (for example, /[\s[:^ascii:]]/). + +40. The callout_error modifier has been added to pcre2test to make it possible +to return PCRE2_ERROR_CALLOUT from a callout. + +41. A minor change to pcre2grep: colour reset is now "[0m" instead of +"[00m". + +42. The limit in the auto-possessification code that was intended to catch +overly-complicated patterns and not spend too much time auto-possessifying was +being reset too often, resulting in very long compile times for some patterns. +Now such patterns are no longer completely auto-possessified. + +43. Applied Jason Hood's revised patch for RunTest.bat. + +44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood. + +45. Minor cosmetic fix to pcre2test: move a variable that is not used under +Windows into the "not Windows" code. + +46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy +some of the code: + + * normalised the Windows condition by ensuring WIN32 is defined; + * enables the callout feature under Windows; + * adds globbing (Microsoft's implementation expands quoted args), + using a tweaked opendirectory; + * implements the is_*_tty functions for Windows; + * --color=always will write the ANSI sequences to file; + * add sequences 4 (underline works on Win10) and 5 (blink as bright + background, relatively standard on DOS/Win); + * remove the (char *) casts for the now-const strings; + * remove GREP_COLOUR (grep's command line allowed the 'u', but not + the environment), parsing GREP_COLORS instead; + * uses the current colour if not set, rather than black; + * add print_match for the undefined case; + * fixes a typo. + +In addition, colour settings containing anything other than digits and +semicolon are ignored, and the colour controls are no longer output for empty +strings. + +47. Detecting patterns that are too large inside the length-measuring loop +saves processing ridiculously long patterns to their end. + +48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it +just wastes time. In the UTF case it can also produce redundant entries in +XCLASS lists caused by characters with multiple other cases and pairs of +characters in the same "not-x" sublists. + +49. A pattern such as /(?=(a\K))/ can report the end of the match being before +its start; pcre2test was not handling this correctly when using the POSIX +interface (it was OK with the native interface). + +50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will +continue to work, falling back to interpretation if anything goes wrong with +JIT. + +51. Applied patches from Christian Persch to configure.ac to make use of the +AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT +modules. + +52. Minor fixes to pcre2grep from Jason Hood: + * fixed some spacing; + * Windows doesn't usually use single quotes, so I've added a define + to use appropriate quotes [in an example]; + * LC_ALL was displayed as "LCC_ALL"; + * numbers 11, 12 & 13 should end in "th"; + * use double quotes in usage message. + +53. When autopossessifying, skip empty branches without recursion, to reduce +stack usage for the benefit of clang with -fsanitize-address, which uses huge +stack frames. Example pattern: /X?(R||){3335}/. Fixes oss-fuzz issue 553. + +54. A pattern with very many explicit back references to a group that is a long +way from the start of the pattern could take a long time to compile because +searching for the referenced group in order to find the minimum length was +being done repeatedly. Now up to 128 group minimum lengths are cached and the +attempt to find a minimum length is abandoned if there is a back reference to a +group whose number is greater than 128. (In that case, the pattern is so +complicated that this optimization probably isn't worth it.) This fixes +oss-fuzz issue 557. + +55. Issue 32 for 10.22 below was not correctly fixed. If pcre2grep in multiline +mode with --only-matching matched several lines, it restarted scanning at the +next line instead of moving on to the end of the matched string, which can be +several lines after the start. + +56. Applied Jason Hood's new patch for RunGrepTest.bat that updates it in line +with updates to the non-Windows version. + + + +Version 10.22 29-July-2016 +-------------------------- + +1. Applied Jason Hood's patches to RunTest.bat and testdata/wintestoutput3 +to fix problems with running the tests under Windows. + +2. Implemented a facility for quoting literal characters within hexadecimal +patterns in pcre2test, to make it easier to create patterns with just a few +non-printing characters. + +3. Binary zeros are not supported in pcre2test input files. It now detects them +and gives an error. + +4. Updated the valgrind parameters in RunTest: (a) changed smc-check=all to +smc-check=all-non-file; (b) changed obj:* in the suppression file to obj:??? so +that it matches only unknown objects. + +5. Updated the maintenance script maint/ManyConfigTests to make it easier to +select individual groups of tests. + +6. When the POSIX wrapper function regcomp() is called, the REG_NOSUB option +used to set PCRE2_NO_AUTO_CAPTURE when calling pcre2_compile(). However, this +disables the use of back references (and subroutine calls), which are supported +by other implementations of regcomp() with RE_NOSUB. Therefore, REG_NOSUB no +longer causes PCRE2_NO_AUTO_CAPTURE to be set, though it still ignores nmatch +and pmatch when regexec() is called. + +7. Because of 6 above, pcre2test has been modified with a new modifier called +posix_nosub, to call regcomp() with REG_NOSUB. Previously the no_auto_capture +modifier had this effect. That option is now ignored when the POSIX API is in +use. + +8. Minor tidies to the pcre2demo.c sample program, including more comments +about its 8-bit-ness. + +9. Detect unmatched closing parentheses and give the error in the pre-scan +instead of later. Previously the pre-scan carried on and could give a +misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a +message about invalid duplicate group names. + +10. It has happened that pcre2test was accidentally linked with another POSIX +regex library instead of libpcre2-posix. In this situation, a call to regcomp() +(in the other library) may succeed, returning zero, but of course putting its +own data into the regex_t block. In one example the re_pcre2_code field was +left as NULL, which made pcre2test think it had not got a compiled POSIX regex, +so it treated the next line as another pattern line, resulting in a confusing +error message. A check has been added to pcre2test to see if the data returned +from a successful call of regcomp() are valid for PCRE2's regcomp(). If they +are not, an error message is output and the pcre2test run is abandoned. The +message points out the possibility of a mis-linking. Hopefully this will avoid +some head-scratching the next time this happens. + +11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind +assertion, caused pcre2test to output a very large number of spaces when the +callout was taken, making the program appearing to loop. + +12. A pattern that included (*ACCEPT) in the middle of a sufficiently deeply +nested set of parentheses of sufficient size caused an overflow of the +compiling workspace (which was diagnosed, but of course is not desirable). + +13. Detect missing closing parentheses during the pre-pass for group +identification. + +14. Changed some integer variable types and put in a number of casts, following +a report of compiler warnings from Visual Studio 2013 and a few tests with +gcc's -Wconversion (which still throws up a lot). + +15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test +for testing it. + +16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of +regerror(). When the error buffer is too small, my version of snprintf() puts a +binary zero in the final byte. Bug #1801 seems to show that other versions do +not do this, leading to bad output from pcre2test when it was checking for +buffer overflow. It no longer assumes a binary zero at the end of a too-small +regerror() buffer. + +17. Fixed typo ("&&" for "&") in pcre2_study(). Fortunately, this could not +actually affect anything, by sheer luck. + +18. Two minor fixes for MSVC compilation: (a) removal of apparently incorrect +"const" qualifiers in pcre2test and (b) defining snprintf as _snprintf for +older MSVC compilers. This has been done both in src/pcre2_internal.h for most +of the library, and also in src/pcre2posix.c, which no longer includes +pcre2_internal.h (see 24 below). + +19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC +static compilation. Subsequently applied Chris Wilson's second patch, putting +the first patch under a new option instead of being unconditional when +PCRE_STATIC is set. + +20. Updated pcre2grep to set stdout as binary when run under Windows, so as not +to convert \r\n at the ends of reflected lines into \r\r\n. This required +ensuring that other output that is written to stdout (e.g. file names) uses the +appropriate line terminator: \r\n for Windows, \n otherwise. + +21. When a line is too long for pcre2grep's internal buffer, show the maximum +length in the error message. + +22. Added support for string callouts to pcre2grep (Zoltan's patch with PH +additions). + +23. RunTest.bat was missing a "set type" line for test 22. + +24. The pcre2posix.c file was including pcre2_internal.h, and using some +"private" knowledge of the data structures. This is unnecessary; the code has +been re-factored and no longer includes pcre2_internal.h. + +25. A racing condition is fixed in JIT reported by Mozilla. + +26. Minor code refactor to avoid "array subscript is below array bounds" +compiler warning. + +27. Minor code refactor to avoid "left shift of negative number" warning. + +28. Add a bit more sanity checking to pcre2_serialize_decode() and document +that it expects trusted data. + +29. Fix typo in pcre2_jit_test.c + +30. Due to an oversight, pcre2grep was not making use of JIT when available. +This is now fixed. + +31. The RunGrepTest script is updated to use the valgrind suppressions file +when testing with JIT under valgrind (compare 10.21/51 below). The suppressions +file is updated so that is now the same as for PCRE1: it suppresses the +Memcheck warnings Addr16 and Cond in unknown objects (that is, JIT-compiled +code). Also changed smc-check=all to smc-check=all-non-file as was done for +RunTest (see 4 above). + +32. Implemented the PCRE2_NO_JIT option for pcre2_match(). + +33. Fix typo that gave a compiler error when JIT not supported. + +34. Fix comment describing the returns from find_fixedlength(). + +35. Fix potential negative index in pcre2test. + +36. Calls to pcre2_get_error_message() with error numbers that are never +returned by PCRE2 functions were returning empty strings. Now the error code +PCRE2_ERROR_BADDATA is returned. A facility has been added to pcre2test to +show the texts for given error numbers (i.e. to call pcre2_get_error_message() +and display what it returns) and a few representative error codes are now +checked in RunTest. + +37. Added "&& !defined(__INTEL_COMPILER)" to the test for __GNUC__ in +pcre2_match.c, in anticipation that this is needed for the same reason it was +recently added to pcrecpp.cc in PCRE1. + +38. Using -o with -M in pcre2grep could cause unnecessary repeated output when +the match extended over a line boundary, as it tried to find more matches "on +the same line" - but it was already over the end. + +39. Allow \C in lookbehinds and DFA matching in UTF-32 mode (by converting it +to the same code as '.' when PCRE2_DOTALL is set). + +40. Fix two clang compiler warnings in pcre2test when only one code unit width +is supported. + +41. Upgrade RunTest to automatically re-run test 2 with a large (64MiB) stack +if it fails when running the interpreter with a 16MiB stack (and if changing +the stack size via pcre2test is possible). This avoids having to manually set a +large stack size when testing with clang. + +42. Fix register overwrite in JIT when SSE2 acceleration is enabled. + +43. Detect integer overflow in pcre2test pattern and data repetition counts. + +44. In pcre2test, ignore "allcaptures" after DFA matching. + +45. Fix unaligned accesses on x86. Patch by Marc Mutz. + +46. Fix some more clang compiler warnings. + + +Version 10.21 12-January-2016 +----------------------------- + +1. Improve matching speed of patterns starting with + or * in JIT. + +2. Use memchr() to find the first character in an unanchored match in 8-bit +mode in the interpreter. This gives a significant speed improvement. + +3. Removed a redundant copy of the opcode_possessify table in the +pcre2_auto_possessify.c source. + +4. Fix typos in dftables.c for z/OS. + +5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that +processing them could involve a buffer overflow if the following character was +an opening parenthesis. + +6. Change 36 for 10.20 also introduced a bug in processing this pattern: +/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK) +setting (which (*:0) is), then (?x) did not get unset at the end of its group +during the scan for named groups, and hence the external # was incorrectly +treated as a comment and the invalid (?' at the end of the pattern was not +diagnosed. This caused a buffer overflow during the real compile. This bug was +discovered by Karl Skomski with the LLVM fuzzer. + +7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its +own source module to avoid a circular dependency between src/pcre2_compile.c +and src/pcre2_study.c + +8. A callout with a string argument containing an opening square bracket, for +example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer +overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +9. The handling of callouts during the pre-pass for named group identification +has been tightened up. + +10. The quantifier {1} can be ignored, whether greedy, non-greedy, or +possessive. This is a very minor optimization. + +11. A possessively repeated conditional group that could match an empty string, +for example, /(?(R))*+/, was incorrectly compiled. + +12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian +Persch). + +13. An empty comment (?#) in a pattern was incorrectly processed and could +provoke a buffer overflow. This bug was discovered by Karl Skomski with the +LLVM fuzzer. + +14. Fix infinite recursion in the JIT compiler when certain patterns such as +/(?:|a|){100}x/ are analysed. + +15. Some patterns with character classes involving [: and \\ were incorrectly +compiled and could cause reading from uninitialized memory or an incorrect +error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The +first of these bugs was discovered by Karl Skomski with the LLVM fuzzer. + +16. Pathological patterns containing many nested occurrences of [: caused +pcre2_compile() to run for a very long time. This bug was found by the LLVM +fuzzer. + +17. A missing closing parenthesis for a callout with a string argument was not +being diagnosed, possibly leading to a buffer overflow. This bug was found by +the LLVM fuzzer. + +18. A conditional group with only one branch has an implicit empty alternative +branch and must therefore be treated as potentially matching an empty string. + +19. If (?R was followed by - or + incorrect behaviour happened instead of a +diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +20. Another bug that was introduced by change 36 for 10.20: conditional groups +whose condition was an assertion preceded by an explicit callout with a string +argument might be incorrectly processed, especially if the string contained \Q. +This bug was discovered by Karl Skomski with the LLVM fuzzer. + +21. Compiling PCRE2 with the sanitize options of clang showed up a number of +very pedantic coding infelicities and a buffer overflow while checking a UTF-8 +string if the final multi-byte UTF-8 character was truncated. + +22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +23. Finding the minimum matching length of complex patterns with back +references and/or recursions can take a long time. There is now a cut-off that +gives up trying to find a minimum length when things get too complex. + +24. An optimization has been added that speeds up finding the minimum matching +length for patterns containing repeated capturing groups or recursions. + +25. If a pattern contained a back reference to a group whose number was +duplicated as a result of appearing in a (?|...) group, the computation of the +minimum matching length gave a wrong result, which could cause incorrect "no +match" errors. For such patterns, a minimum matching length cannot at present +be computed. + +26. Added a check for integer overflow in conditions (?() and +(?(R). This omission was discovered by Karl Skomski with the LLVM +fuzzer. + +27. Fixed an issue when \p{Any} inside an xclass did not read the current +character. + +28. If pcre2grep was given the -q option with -c or -l, or when handling a +binary file, it incorrectly wrote output to stdout. + +29. The JIT compiler did not restore the control verb head in case of *THEN +control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +30. The way recursive references such as (?3) are compiled has been re-written +because the old way was the cause of many issues. Now, conversion of the group +number into a pattern offset does not happen until the pattern has been +completely compiled. This does mean that detection of all infinitely looping +recursions is postponed till match time. In the past, some easy ones were +detected at compile time. This re-writing was done in response to yet another +bug found by the LLVM fuzzer. + +31. A test for a back reference to a non-existent group was missing for items +such as \987. This caused incorrect code to be compiled. This issue was found +by Karl Skomski with a custom LLVM fuzzer. + +32. Error messages for syntax errors following \g and \k were giving inaccurate +offsets in the pattern. + +33. Improve the performance of starting single character repetitions in JIT. + +34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0. + +35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now +give the right offset instead of zero. + +36. The JIT compiler should not check repeats after a {0,1} repeat byte code. +This issue was found by Karl Skomski with a custom LLVM fuzzer. + +37. The JIT compiler should restore the control chain for empty possessive +repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +38. A bug which was introduced by the single character repetition optimization +was fixed. + +39. Match limit check added to recursion. This issue was found by Karl Skomski +with a custom LLVM fuzzer. + +40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look +only at the part of the subject that is relevant when the starting offset is +non-zero. + +41. Improve first character match in JIT with SSE2 on x86. + +42. Fix two assertion fails in JIT. These issues were found by Karl Skomski +with a custom LLVM fuzzer. + +43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy +III). + +44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added +test (there are now 20 in total). + +45. Fixed a corner case of range optimization in JIT. + +46. Add the ${*MARK} facility to pcre2_substitute(). + +47. Modifier lists in pcre2test were splitting at spaces without the required +commas. + +48. Implemented PCRE2_ALT_VERBNAMES. + +49. Fixed two issues in JIT. These were found by Karl Skomski with a custom +LLVM fuzzer. + +50. The pcre2test program has been extended by adding the #newline_default +command. This has made it possible to run the standard tests when PCRE2 is +compiled with either CR or CRLF as the default newline convention. As part of +this work, the new command was added to several test files and the testing +scripts were modified. The pcre2grep tests can now also be run when there is no +LF in the default newline convention. + +51. The RunTest script has been modified so that, when JIT is used and valgrind +is specified, a valgrind suppressions file is set up to ignore "Invalid read of +size 16" errors because these are false positives when the hardware supports +the SSE2 instruction set. + +52. It is now possible to have comment lines amid the subject strings in +pcre2test (and perltest.sh) input. + +53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). + +54. Add the null_context modifier to pcre2test so that calling pcre2_compile() +and the matching functions with NULL contexts can be tested. + +55. Implemented PCRE2_SUBSTITUTE_EXTENDED. + +56. In a character class such as [\W\p{Any}] where both a negative-type escape +("not a word character") and a property escape were present, the property +escape was being ignored. + +57. Fixed integer overflow for patterns whose minimum matching length is very, +very large. + +58. Implemented --never-backslash-C. + +59. Change 55 above introduced a bug by which certain patterns provoked the +erroneous error "\ at end of pattern". + +60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling +errors or other strange effects if compiled in UCP mode. Found with libFuzzer +and AddressSanitizer. + +61. Whitespace at the end of a pcre2test pattern line caused a spurious error +message if there were only single-character modifiers. It should be ignored. + +62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results +or segmentation errors for some patterns. Found with libFuzzer and +AddressSanitizer. + +63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer +overflow. + +64. Improve error message for overly-complicated patterns. + +65. Implemented an optional replication feature for patterns in pcre2test, to +make it easier to test long repetitive patterns. The tests for 63 above are +converted to use the new feature. + +66. In the POSIX wrapper, if regerror() was given too small a buffer, it could +misbehave. + +67. In pcre2_substitute() in UTF mode, the UTF validity check on the +replacement string was happening before the length setting when the replacement +string was zero-terminated. + +68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the +second and subsequent calls to pcre2_match(). + +69. There was no check for integer overflow for a replacement group number in +pcre2_substitute(). An added check for a number greater than the largest group +number in the pattern means this is not now needed. + +70. The PCRE2-specific VERSION condition didn't work correctly if only one +digit was given after the decimal point, or if more than two digits were given. +It now works with one or two digits, and gives a compile time error if more are +given. + +71. In pcre2_substitute() there was the possibility of reading one code unit +beyond the end of the replacement string. + +72. The code for checking a subject's UTF-32 validity for a pattern with a +lookbehind involved an out-of-bounds pointer, which could potentially cause +trouble in some environments. + +73. The maximum lookbehind length was incorrectly calculated for patterns such +as /(?<=(a)(?-1))x/ which have a recursion within a backreference. + +74. Give an error if a lookbehind assertion is longer than 65535 code units. + +75. Give an error in pcre2_substitute() if a match ends before it starts (as a +result of the use of \K). + +76. Check the length of subpattern names and the names in (*MARK:xx) etc. +dynamically to avoid the possibility of integer overflow. + +77. Implement pcre2_set_max_pattern_length() so that programs can restrict the +size of patterns that they are prepared to handle. + +78. (*NO_AUTO_POSSESS) was not working. + +79. Adding group information caching improves the speed of compiling when +checking whether a group has a fixed length and/or could match an empty string, +especially when recursion or subroutine calls are involved. However, this +cannot be used when (?| is present in the pattern because the same number may +be used for groups of different sizes. To catch runaway patterns in this +situation, counts have been introduced to the functions that scan for empty +branches or compute fixed lengths. + +80. Allow for the possibility of the size of the nest_save structure not being +a factor of the size of the compiling workspace (it currently is). + +81. Check for integer overflow in minimum length calculation and cap it at +65535. + +82. Small optimizations in code for finding the minimum matching length. + +83. Lock out configuring for EBCDIC with non-8-bit libraries. + +84. Test for error code <= 0 in regerror(). + +85. Check for too many replacements (more than INT_MAX) in pcre2_substitute(). + +86. Avoid the possibility of computing with an out-of-bounds pointer (though +not dereferencing it) while handling lookbehind assertions. + +87. Failure to get memory for the match data in regcomp() is now given as a +regcomp() error instead of waiting for regexec() to pick it up. + +88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid +newline sequence. + +89. Paranoid check in regcomp() for bad error code from pcre2_compile(). + +90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well +as for link size 2. + +91. Document that JIT has a limit on pattern size, and give more information +about JIT compile failures in pcre2test. + +92. Implement PCRE2_INFO_HASBACKSLASHC. + +93. Re-arrange valgrind support code in pcre2test to avoid spurious reports +with JIT (possibly caused by SSE2?). + +94. Support offset_limit in JIT. + +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + +96. [:punct:] in UCP mode was matching some characters in the range 128-255 +that should not have been matched. + +97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all +characters with code points greater than 255 are in the class. When a Unicode +property was also in the class (if PCRE2_UCP is set, escapes such as \w are +turned into Unicode properties), wide characters were not correctly handled, +and could fail to match. + +98. In pcre2test, make the "startoffset" modifier a synonym of "offset", +because it sets the "startoffset" parameter for pcre2_match(). + +99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between +an item and its qualifier (for example, A(?#comment)?B) pcre2_compile() +misbehaved. This bug was found by the LLVM fuzzer. + +100. The error for an invalid UTF pattern string always gave the code unit +offset as zero instead of where the invalidity was found. + +101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not +working correctly in UCP mode. + +102. Similar to 99 above, if an isolated \E was present between an item and its +qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug +was found by the LLVM fuzzer. + +103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND +was set when the pmatch argument was NULL. It now returns REG_INVARG. + +104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep. + +105. An empty \Q\E sequence between an item and its qualifier caused +pcre2_compile() to misbehave when auto callouts were enabled. This bug +was found by the LLVM fuzzer. + +106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or +other verb "name" ended with whitespace immediately before the closing +parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when +both those options were set. + +107. In a number of places pcre2_compile() was not handling NULL characters +correctly, and pcre2test with the "bincode" modifier was not always correctly +displaying fields containing NULLS: + + (a) Within /x extended #-comments + (b) Within the "name" part of (*MARK) and other *verbs + (c) Within the text argument of a callout + +108. If a pattern that was compiled with PCRE2_EXTENDED started with white +space or a #-type comment that was followed by (?-x), which turns off +PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again, +pcre2_compile() assumed that (?-x) applied to the whole pattern and +consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix +for this bug means that a setting of any of the (?imsxJU) options at the start +of a pattern is no longer transferred to the options that are returned by +PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have +changed when the effects of those options were all moved to compile time. + +109. An escaped closing parenthesis in the "name" part of a (*verb) when +PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug +was found by the LLVM fuzzer. + +110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it +possible to test it. + +111. "Harden" pcre2test against ridiculously large values in modifiers and +command line arguments. + +112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_ +LENGTH. + +113. Fix printing of *MARK names that contain binary zeroes in pcre2test. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments have been added. + +2. Assertion code generator in JIT has been optimized. + +3. The invalid pattern (?(?C) has a missing assertion condition at the end. The +pcre2_compile() function read past the end of the input before diagnosing an +error. This bug was discovered by the LLVM fuzzer. + +4. Implemented pcre2_callout_enumerate(). + +5. Fix JIT compilation of conditional blocks whose assertion is converted to +(*FAIL). E.g: /(?(?!))/. + +6. The pattern /(?(?!)^)/ caused references to random memory. This bug was +discovered by the LLVM fuzzer. + +7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly +when this assertion was used as a condition, for example (?(?!)a|b). In +pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect +error about an unsupported item. + +8. For some types of pattern, for example /Z*(|d*){216}/, the auto- +possessification code could take exponential time to complete. A recursion +depth limit of 1000 has been imposed to limit the resources used by this +optimization. This infelicity was discovered by the LLVM fuzzer. + +9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class +such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored +because \S ensures they are all in the class. The code for doing this was +interacting badly with the code for computing the amount of space needed to +compile the pattern, leading to a buffer overflow. This bug was discovered by +the LLVM fuzzer. + +10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside +other kinds of group caused stack overflow at compile time. This bug was +discovered by the LLVM fuzzer. + +11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment +between a subroutine call and its quantifier was incorrectly compiled, leading +to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer. + +12. The illegal pattern /(?(?.*!.*)?)/ was not being diagnosed as missing an +assertion after (?(. The code was failing to check the character after (?(?< +for the ! or = that would indicate a lookbehind assertion. This bug was +discovered by the LLVM fuzzer. + +13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with +a fixed maximum following a group that contains a subroutine reference was +incorrectly compiled and could trigger buffer overflow. This bug was discovered +by the LLVM fuzzer. + +14. Negative relative recursive references such as (?-7) to non-existent +subpatterns were not being diagnosed and could lead to unpredictable behaviour. +This bug was discovered by the LLVM fuzzer. + +15. The bug fixed in 14 was due to an integer variable that was unsigned when +it should have been signed. Some other "int" variables, having been checked, +have either been changed to uint32_t or commented as "must be signed". + +16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1))) +caused a stack overflow instead of the diagnosis of a non-fixed length +lookbehind assertion. This bug was discovered by the LLVM fuzzer. + +17. The use of \K in a positive lookbehind assertion in a non-anchored pattern +(e.g. /(?<=\Ka)/) could make pcre2grep loop. + +18. There was a similar problem to 17 in pcre2test for global matches, though +the code there did catch the loop. + +19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), +and a subsequent item in the pattern caused a non-match, backtracking over the +repeated \X did not stop, but carried on past the start of the subject, causing +reference to random memory and/or a segfault. There were also some other cases +where backtracking after \C could crash. This set of bugs was discovered by the +LLVM fuzzer. + +20. The function for finding the minimum length of a matching string could take +a very long time if mutual recursion was present many times in a pattern, for +example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has +been implemented. This infelicity was discovered by the LLVM fuzzer. + +21. Implemented PCRE2_NEVER_BACKSLASH_C. + +22. The feature for string replication in pcre2test could read from freed +memory if the replication required a buffer to be extended, and it was not +working properly in 16-bit and 32-bit modes. This issue was discovered by a +fuzzer: see http://lcamtuf.coredump.cx/afl/. + +23. Added the PCRE2_ALT_CIRCUMFLEX option. + +24. Adjust the treatment of \8 and \9 to be the same as the current Perl +behaviour. + +25. Static linking against the PCRE2 library using the pkg-config module was +failing on missing pthread symbols. + +26. If a group that contained a recursive back reference also contained a +forward reference subroutine call followed by a non-forward-reference +subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to +compile correct code, leading to undefined behaviour or an internally detected +error. This bug was discovered by the LLVM fuzzer. + +27. Quantification of certain items (e.g. atomic back references) could cause +incorrect code to be compiled when recursive forward references were involved. +For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was +discovered by the LLVM fuzzer. + +28. A repeated conditional group whose condition was a reference by name caused +a buffer overflow if there was more than one group with the given name. This +bug was discovered by the LLVM fuzzer. + +29. A recursive back reference by name within a group that had the same name as +another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/. +This bug was discovered by the LLVM fuzzer. + +30. A forward reference by name to a group whose number is the same as the +current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a +buffer overflow at compile time. This bug was discovered by the LLVM fuzzer. + +31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1 +as an int; fixed by writing it as 1u). + +32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives +a warning for "fileno" unless -std=gnu99 us used. + +33. A lookbehind assertion within a set of mutually recursive subpatterns could +provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. + +34. Give an error for an empty subpattern name such as (?''). + +35. Make pcre2test give an error if a pattern that follows #forbud_utf contains +\P, \p, or \X. + +36. The way named subpatterns are handled has been refactored. There is now a +pre-pass over the regex which does nothing other than identify named +subpatterns and count the total captures. This means that information about +named patterns is known before the rest of the compile. In particular, it means +that forward references can be checked as they are encountered. Previously, the +code for handling forward references was contorted and led to several errors in +computing the memory requirements for some patterns, leading to buffer +overflows. + +37. There was no check for integer overflow in subroutine calls such as (?123). + +38. The table entry for \l in EBCDIC environments was incorrect, leading to its +being treated as a literal 'l' instead of causing an error. + +39. If a non-capturing group containing a conditional group that could match +an empty string was repeated, it was not identified as matching an empty string +itself. For example: /^(?:(?(1)x|)+)+$()/. + +40. In an EBCDIC environment, pcretest was mishandling the escape sequences +\a and \e in test subject lines. + +41. In an EBCDIC environment, \a in a pattern was converted to the ASCII +instead of the EBCDIC value. + +42. The handling of \c in an EBCDIC environment has been revised so that it is +now compatible with the specification in Perl's perlebcdic page. + +43. Single character repetition in JIT has been improved. 20-30% speedup +was achieved on certain patterns. + +44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in +ASCII/Unicode. This has now been added to the list of characters that are +recognized as white space in EBCDIC. + +45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave +an error (correctly) when used outside a class, but did not give an error +within a class. + +46. \h within a class was incorrectly compiled in EBCDIC environments. + +47. JIT should return with error when the compiled pattern requires +more stack space than the maximum. + +48. Fixed a memory leak in pcre2grep when a locale is set. + + +Version 10.10 06-March-2015 +--------------------------- + +1. When a pattern is compiled, it remembers the highest back reference so that +when matching, if the ovector is too small, extra memory can be obtained to +use instead. A conditional subpattern whose condition is a check on a capture +having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is +another kind of back reference, but it was not setting the highest +backreference number. This mattered only if pcre2_match() was called with an +ovector that was too small to hold the capture, and there was no other kind of +back reference (a situation which is probably quite rare). The effect of the +bug was that the condition was always treated as FALSE when the capture could +not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug +has been fixed. + +2. Functions for serialization and deserialization of sets of compiled patterns +have been added. + +3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove +excess code units at the end of the data block that may occasionally occur if +the code for calculating the size over-estimates. This change stops the +serialization code copying uninitialized data, to which valgrind objects. The +documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not +include the general overhead. This has been corrected. + +4. All code units in every slot in the table of group names are now set, again +in order to avoid accessing uninitialized data when serializing. + +5. The (*NO_JIT) feature is implemented. + +6. If a bug that caused pcre2_compile() to use more memory than allocated was +triggered when using valgrind, the code in (3) above passed a stupidly large +value to valgrind. This caused a crash instead of an "internal error" return. + +7. A reference to a duplicated named group (either a back reference or a test +for being set in a conditional) that occurred in a part of the pattern where +PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern +to be incorrectly calculated, leading to overwriting. + +8. A mutually recursive set of back references such as (\2)(\1) caused a +segfault at compile time (while trying to find the minimum matching length). +The infinite loop is now broken (with the minimum length unset, that is, zero). + +9. If an assertion that was used as a condition was quantified with a minimum +of zero, matching went wrong. In particular, if the whole group had unlimited +repetition and could match an empty string, a segfault was likely. The pattern +(?(?=0)?)+ is an example that caused this. Perl allows assertions to be +quantified, but not if they are being used as conditions, so the above pattern +is faulted by Perl. PCRE2 has now been changed so that it also rejects such +patterns. + +10. The error message for an invalid quantifier has been changed from "nothing +to repeat" to "quantifier does not follow a repeatable item". + +11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but +scanning the compiled pattern in subsequent auto-possessification can get out +of step and lead to an unknown opcode. Previously this could have caused an +infinite loop. Now it generates an "internal error" error. This is a tidyup, +not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an +undefined outcome. + +12. A UTF pattern containing a "not" match of a non-ASCII character and a +subroutine reference could loop at compile time. Example: /[^\xff]((?1))/. + +13. The locale test (RunTest 3) has been upgraded. It now checks that a locale +that is found in the output of "locale -a" can actually be set by pcre2test +before it is accepted. Previously, in an environment where a locale was listed +but would not set (an example does exist), the test would "pass" without +actually doing anything. Also the fr_CA locale has been added to the list of +locales that can be used. + +14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a +capturing group number without parentheses, the last character was incorrectly +literally included at the end of the replacement string. + +15. A possessive capturing group such as (a)*+ with a minimum repeat of zero +failed to allow the zero-repeat case if pcre2_match() was called with an +ovector too small to capture the group. + +16. Improved error message in pcre2test when setting the stack size (-S) fails. + +17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the +transfer from PCRE1, meaning that CMake configuration failed if "build tests" +was selected. (2) The file src/pcre2_serialize.c had not been added to the list +of PCRE2 sources, which caused a failure to build pcre2test. + +18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems +only on Windows. + +19. Use binary input when reading back saved serialized patterns in pcre2test. + +20. Added RunTest.bat for running the tests under Windows. + +21. "make distclean" was not removing config.h, a file that may be created for +use with CMake. + +22. A pattern such as "((?2){0,1999}())?", which has a group containing a +forward reference repeated a large (but limited) number of times within a +repeated outer group that has a zero minimum quantifier, caused incorrect code +to be compiled, leading to the error "internal error: previously-checked +referenced subpattern not found" when an incorrect memory address was read. +This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's +FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.) + +23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine +call within a group that also contained a recursive back reference caused +incorrect code to be compiled. This bug was reported as "heap overflow", +discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015: +CVE-2015-2326 was given to this.) + +24. Computing the size of the JIT read-only data in advance has been a source +of various issues, and new ones are still appear unfortunately. To fix +existing and future issues, size computation is eliminated from the code, +and replaced by on-demand memory allocation. + +25. A pattern such as /(?i)[A-`]/, where characters in the other case are +adjacent to the end of the range, and the range contained characters with more +than one other case, caused incorrect behaviour when compiled in UTF mode. In +that example, the range a-j was left out of the class. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. + +The code of the library was heavily revised as part of the new API +implementation. Details of each and every modification were not individually +logged. In addition to the API changes, the following changes were made. They +are either new functionality, or bug fixes and other noticeable changes of +behaviour that were implemented after the code had been forked. + +1. Including Unicode support at build time is now enabled by default, but it +can optionally be disabled. It is not enabled by default at run time (no +change). + +2. The test program, now called pcre2test, was re-specified and almost +completely re-written. Its input is not compatible with input for pcretest. + +3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the +PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is +matched by that pattern. + +4. For the benefit of those who use PCRE2 via some other application, that is, +not writing the function calls themselves, it is possible to check the PCRE2 +version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a +string such as "yesno". + +5. There are case-equivalent Unicode characters whose encodings use different +numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is +theoretically possible for this to happen in UTF-16 too.) If a backreference to +a group containing one of these characters was greedily repeated, and during +the match a backtrack occurred, the subject might be backtracked by the wrong +number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly +(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should +capture the final character, which is the three bytes E2, B1, and A5 in UTF-8. +Incorrect backtracking meant that group 2 captured only the last two bytes. +This bug has been fixed; the new code is slower, but it is used only when the +strings matched by the repetition are not all the same length. + +6. A pattern such as /()a/ was not setting the "first character must be 'a'" +information. This applied to any pattern with a group that matched no +characters, for example: /(?:(?=.)|(? 0) + { + $line = 0; + $file = shift @ARGV; + + open (IN, $file) || die "Failed to open $file\n"; + + while () + { + $count = 0; + $line++; + if (/^\s*$/) + { + printf "Empty line $line of $file\n"; + $yield = 1; + } + elsif (/^\./) + { + if (!/^\.\s*$| + ^\.B\s+\S| + ^\.TH\s\S| + ^\.SH\s\S| + ^\.SS\s\S| + ^\.TP(?:\s?\d+)?\s*$| + ^\.SM\s*$| + ^\.br\s*$| + ^\.rs\s*$| + ^\.sp\s*$| + ^\.nf\s*$| + ^\.fi\s*$| + ^\.P\s*$| + ^\.PP\s*$| + ^\.\\"(?:\ HREF)?\s*$| + ^\.\\"\sHTML\s\s*$| + ^\.\\"\sHTML\s<\/a>\s*$| + ^\.\\"\s<\/a>\s*$| + ^\.\\"\sJOINSH\s*$| + ^\.\\"\sJOIN\s*$/x + ) + { + printf "Bad control line $line of $file\n"; + $yield = 1; + } + } + elsif (/\\[^ef]|\\f[^IBP]/) + { + printf "Bad backslash in line $line of $file\n"; + $yield = 1; + } + while (/\\f[BI]/g) + { + $count++; + } + while (/\\fP/g) + { + $count--; + } + if ($count != 0) + { + printf "Mismatching formatting in line $line of $file\n"; + $yield = 1; + } + } + + close(IN); + } + +exit $yield; +# End diff --git a/CleanTxt b/CleanTxt new file mode 100755 index 0000000..1f42519 --- /dev/null +++ b/CleanTxt @@ -0,0 +1,113 @@ +#! /usr/bin/perl -w + +# Script to take the output of nroff -man and remove all the backspacing and +# the page footers and the screen commands etc so that it is more usefully +# readable online. In fact, in the latest nroff, intermediate footers don't +# seem to be generated any more. + +$blankcount = 0; +$lastwascut = 0; +$firstheader = 1; + +# Input on STDIN; output to STDOUT. + +while () + { + s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + s/.\x8//g; # Remove "char, backspace" + + # Handle header lines. Retain only the first one we encounter, but remove + # the blank line that follows. Any others (e.g. at end of document) and the + # following blank line are dropped. + + if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) + { + if ($firstheader) + { + $firstheader = 0; + print; + $lastprinted = $_; + $lastwascut = 0; + } + $_=; # Remove a blank that follows + next; + } + + # Count runs of empty lines + + if (/^\s*$/) + { + $blankcount++; + $lastwascut = 0; + next; + } + + # If a chunk of lines has been cut out (page footer) and the next line + # has a different indentation, put back one blank line. + + if ($lastwascut && $blankcount < 1 && defined($lastprinted)) + { + ($a) = $lastprinted =~ /^(\s*)/; + ($b) = $_ =~ /^(\s*)/; + $blankcount++ if ($a ne $b); + } + + # We get here only when we have a non-blank line in hand. If it was preceded + # by 3 or more blank lines, read the next 3 lines and see if they are blank. + # If so, remove all 7 lines, and remember that we have just done a cut. + + if ($blankcount >= 3) + { + for ($i = 0; $i < 3; $i++) + { + $next[$i] = ; + $next[$i] = "" if !defined $next[$i]; + $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + $next[$i] =~ s/.\x8//g; # Remove "char, backspace" + } + + # Cut out chunks of the form <3 blanks><3 blanks> + + if ($next[0] =~ /^\s*$/ && + $next[1] =~ /^\s*$/ && + $next[2] =~ /^\s*$/) + { + $blankcount -= 3; + $lastwascut = 1; + } + + # Otherwise output the saved blanks, the current, and the next three + # lines. Remember the last printed line. + + else + { + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + for ($i = 0; $i < 3; $i++) + { + $next[$i] =~ s/.\x8//g; + print $next[$i]; + $lastprinted = $_; + } + $lastwascut = 0; + $blankcount = 0; + } + } + + # This non-blank line is not preceded by 3 or more blank lines. Output + # any blanks there are, and the line. Remember it. Force two blank lines + # before headings. + + else + { + $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && + defined($lastprinted); + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + $lastprinted = $_; + $lastwascut = 0; + $blankcount = 0; + } + } + +# End diff --git a/Detrail b/Detrail new file mode 100755 index 0000000..1c5c7e9 --- /dev/null +++ b/Detrail @@ -0,0 +1,35 @@ +#!/usr/bin/perl + +# This is a script for removing trailing whitespace from lines in files that +# are listed on the command line. + +# This subroutine does the work for one file. + +sub detrail { +my($file) = $_[0]; +my($changed) = 0; +open(IN, "$file") || die "Can't open $file for input"; +@lines = ; +close(IN); +foreach (@lines) + { + if (/\s+\n$/) + { + s/\s+\n$/\n/; + $changed = 1; + } + } +if ($changed) + { + open(OUT, ">$file") || die "Can't open $file for output"; + print OUT @lines; + close(OUT); + } +} + +# This is the main program + +$, = ""; # Output field separator +for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); } + +# End diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..b9fa81c --- /dev/null +++ b/HACKING @@ -0,0 +1,853 @@ +Technical notes about PCRE2 +--------------------------- + +These are very rough technical notes that record potentially useful information +about PCRE2 internals. PCRE2 is a library based on the original PCRE library, +but with a revised (and incompatible) API. To avoid confusion, the original +library is referred to as PCRE1 below. For information about testing PCRE2, see +the pcre2test documentation and the comment at the head of the RunTest file. + +PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix +releases carried on the 8.xx series, up to the final 8.45 release. PCRE2 +releases started at 10.00 to avoid confusion with PCRE1. + + +Historical note 1 +----------------- + +Many years ago I implemented some regular expression functions to an algorithm +suggested by Martin Richards. The rather simple patterns were not Unix-like in +form, and were quite restricted in what they could do by comparison with Perl. +The interesting part about the algorithm was that the amount of space required +to hold the compiled form of an expression was known in advance. The code to +apply an expression did not operate by backtracking, as the original Henry +Spencer code and current PCRE2 and Perl code does, but instead checked all +possibilities simultaneously by keeping a list of current states and checking +all of them as it advanced through the subject string. In the terminology of +Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a +traditional Finite State Machine (FSM). When the pattern was all used up, all +remaining states were possible matches, and the one matching the longest subset +of the subject string was chosen. This did not necessarily maximize the +individual wild portions of the pattern, as is expected in Unix and Perl-style +regular expressions. + + +Historical note 2 +----------------- + +By contrast, the code originally written by Henry Spencer (which was +subsequently heavily modified for Perl) compiles the expression twice: once in +a dummy mode in order to find out how much store will be needed, and then for +real. (The Perl version may or may not still do this; I'm talking about the +original library.) The execution function operates by backtracking and +maximizing (or, optionally, minimizing, in Perl) the amount of the subject that +matches individual wild portions of the pattern. This is an "NFA algorithm" in +Friedl's terminology. + + +OK, here's the real stuff +------------------------- + +For the set of functions that formed the original PCRE1 library in 1997 (which +are unrelated to those mentioned above), I tried at first to invent an +algorithm that used an amount of store bounded by a multiple of the number of +characters in the pattern, to save on compiling time. However, because of the +greater complexity in Perl regular expressions, I couldn't do this, even though +the then current Perl 5.004 patterns were much simpler than those supported +nowadays. In any case, a first pass through the pattern is helpful for other +reasons. + + +Support for 16-bit and 32-bit data strings +------------------------------------------- + +The PCRE2 library can be compiled in any combination of 8-bit, 16-bit or 32-bit +modes, creating up to three different libraries. In the description that +follows, the word "short" is used for a 16-bit data quantity, and the phrase +"code unit" is used for a quantity that is a byte in 8-bit mode, a short in +16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are +given in generic form, without the _8, _16, or _32 suffix. + + +Computing the memory requirement: how it was +-------------------------------------------- + +Up to and including release 6.7, PCRE1 worked by running a very degenerate +first pass to calculate a maximum memory requirement, and then a second pass to +do the real compile - which might use a bit less than the predicted amount of +memory. The idea was that this would turn out faster than the Henry Spencer +code because the first pass is degenerate and the second pass can just store +stuff straight into memory, which it knows is big enough. + + +Computing the memory requirement: how it is +------------------------------------------- + +By the time I was working on a potential 6.8 release, the degenerate first pass +had become very complicated and hard to maintain. Indeed one of the early +things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then +I had a flash of inspiration as to how I could run the real compile function in +a "fake" mode that enables it to compute how much memory it would need, while +in most cases only ever using a small amount of working memory, and without too +many tests of the mode that might slow it down. So I refactored the compiling +functions to work this way. This got rid of about 600 lines of source and made +further maintenance and development easier. As this was such a major change, I +never released 6.8, instead upping the number to 7.0 (other quite major changes +were also present in the 7.0 release). + +A side effect of this work was that the previous limit of 200 on the nesting +depth of parentheses was removed. However, there was a downside: compiling ran +more slowly than before (30% or more, depending on the pattern) because it now +did a full analysis of the pattern. My hope was that this would not be a big +issue, and in the event, nobody has commented on it. + +At release 8.34, a limit on the nesting depth of parentheses was re-introduced +(default 250, settable at build time) so as to put a limit on the amount of +system stack used by the compile function, which uses recursive function calls +for nested parenthesized groups. This is a safety feature for environments with +small stacks where the patterns are provided by users. + + +Yet another pattern scan +------------------------ + +History repeated itself for PCRE2 release 10.20. A number of bugs relating to +named subpatterns had been discovered by fuzzers. Most of these were related to +the handling of forward references when it was not known if the named group was +unique. (References to non-unique names use a different opcode and more +memory.) The use of duplicate group numbers (the (?| facility) also caused +issues. + +To get around these problems I adopted a new approach by adding a third pass +over the pattern (really a "pre-pass"), which did nothing other than identify +all the named subpatterns and their corresponding group numbers. This means +that the actual compile (both the memory-computing dummy run and the real +compile) has full knowledge of group names and numbers throughout. Several +dozen lines of messy code were eliminated, though the new pre-pass was not +short. In particular, parsing and skipping over [] classes is complicated. + +While working on 10.22 I realized that I could simplify yet again by moving +more of the parsing into the pre-pass, thus avoiding doing it in two places, so +after 10.22 was released, the code underwent yet another big refactoring. This +is how it is from 10.23 onwards: + +The function called parse_regex() scans the pattern characters, parsing them +into literal data and meta characters. It converts escapes such as \x{123} +into literals, handles \Q...\E, and skips over comments and non-significant +white space. The result of the scanning is put into a vector of 32-bit unsigned +integers. Values less than 0x80000000 are literal data. Higher values represent +meta-characters. The top 16-bits of such values identify the meta-character, +and these are given names such as META_CAPTURE. The lower 16-bits are available +for data, for example, the capturing group number. The only situation in which +literal data values greater than 0x7fffffff can appear is when the 32-bit +library is running in non-UTF mode. This is handled by having a special +meta-character that is followed by the 32-bit data value. + +The size of the parsed pattern vector, when auto-callouts are not enabled, is +bounded by the length of the pattern (with one exception). The code is written +so that each item in the pattern uses no more vector elements than the number +of code units in the item itself. The exception is the aforementioned large +32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in +advance to check for such values. When auto-callouts are enabled, the generous +assumption is made that there will be a callout for each pattern code unit +(which of course is only actually true if all code units are literals) plus one +at the end. A default parsed pattern vector is defined on the system stack, to +minimize memory handling, but if this is not big enough, heap memory is used. + +As before, the actual compiling function is run twice, the first time to +determine the amount of memory needed for the final compiled pattern. It +now processes the parsed pattern vector, not the pattern itself, although some +of the parsed items refer to strings in the pattern - for example, group +names. As escapes and comments have already been processed, the code is a bit +simpler than before. + +Most errors can be diagnosed during the parsing scan. For those that cannot +(for example, "lookbehind assertion is not fixed length"), the parsed code +contains offsets into the pattern so that the actual compiling code can +report where errors are. + + +The elements of the parsed pattern vector +----------------------------------------- + +The word "offset" below means a code unit offset into the pattern. When +PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is +stored in a single parsed pattern element. Otherwise (typically on 64-bit +systems) it occupies two elements. The following meta items occupy just one +element, with no data: + +META_ACCEPT (*ACCEPT) +META_ASTERISK * +META_ASTERISK_PLUS *+ +META_ASTERISK_QUERY *? +META_ATOMIC (?> start of atomic group +META_CIRCUMFLEX ^ metacharacter +META_CLASS [ start of non-empty class +META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS +META_CLASS_EMPTY_NOT [^] negative empty class - ditto +META_CLASS_END ] end of non-empty class +META_CLASS_NOT [^ start non-empty negative class +META_COMMIT (*COMMIT) - no argument (see below for with argument) +META_COND_ASSERT (?(?assertion) +META_DOLLAR $ metacharacter +META_DOT . metacharacter +META_END End of pattern (this value is 0x80000000) +META_FAIL (*FAIL) +META_KET ) closing parenthesis +META_LOOKAHEAD (?= start of lookahead +META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead +META_LOOKAHEADNOT (?! start of negative lookahead +META_NOCAPTURE (?: no capture parens +META_PLUS + +META_PLUS_PLUS ++ +META_PLUS_QUERY +? +META_PRUNE (*PRUNE) - no argument (see below for with argument) +META_QUERY ? +META_QUERY_PLUS ?+ +META_QUERY_QUERY ?? +META_RANGE_ESCAPED hyphen in class range with at least one escape +META_RANGE_LITERAL hyphen in class range defined literally +META_SKIP (*SKIP) - no argument (see below for with argument) +META_THEN (*THEN) - no argument (see below for with argument) + +The two RANGE values occur only in character classes. They are positioned +between two literals that define the start and end of the range. In an EBCDIC +environment it is necessary to know whether either of the range values was +specified as an escape. In an ASCII/Unicode environment the distinction is not +relevant. + +The following have data in the lower 16 bits, and may be followed by other data +elements: + +META_ALT | alternation +META_BACKREF back reference +META_CAPTURE start of capturing group +META_ESCAPE non-literal escape sequence +META_RECURSE recursion call + +If the data for META_ALT is non-zero, it is inside a lookbehind, and the data +is the maximum length of its branch (see META_LOOKBEHIND below for more +detail). + +META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as +their data in the lower 16 bits of the element. META_RECURSE is followed by an +offset, for use in error messages. + +META_BACKREF is followed by an offset if the back reference group number is 10 +or more. The offsets of the first occurrences of references to groups whose +numbers are less than 10 are put in cb->small_ref_offset[] (only the first +occurrence is useful). On 64-bit systems this avoids using more than two parsed +pattern elements for items such as \3. The offset is used when an error occurs +because the reference is to a non-existent group. + +META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next +element contains the 16-bit type and data property values, packed together. +ESC_g and ESC_k are used only for named references - numerical ones are turned +into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed +by a length and an offset into the pattern to specify the name. + +The following have one data item that follows in the next vector element: + +META_BIGVALUE Next is a literal >= META_END +META_POSIX POSIX class item (data identifies the class) +META_POSIX_NEG negative POSIX class item (ditto) + +The following are followed by a length element, then a number of character code +values (which should match with the length): + +META_MARK (*MARK:xxxx) +META_COMMIT_ARG )*COMMIT:xxxx) +META_PRUNE_ARG (*PRUNE:xxx) +META_SKIP_ARG (*SKIP:xxxx) +META_THEN_ARG (*THEN:xxxx) + +The following are followed by a length element, then an offset in the pattern +that identifies the name: + +META_COND_NAME (?() or (?('name') or (?(name) +META_COND_RNAME (?(R&name) +META_COND_RNUMBER (?(Rdigits) +META_RECURSE_BYNAME (?&name) +META_BACKREF_BYNAME \k'name' + +META_COND_RNUMBER is used for names that start with R and continue with digits, +because this is an ambiguous case. It could be a back reference to a group with +that name, or it could be a recursion test on a numbered group. + +This one is followed by an offset, for use in error messages, then a number: + +META_COND_NUMBER (?([+-]digits) + +The following is followed just by an offset, for use in error messages: + +META_COND_DEFINE (?(DEFINE) + +The following are at first also followed just by an offset for use in error +messages. After the lengths of the branches of a lookbehind group have been +checked the error offset is no longer needed. The lower 16 bits of the main +word are now set to the maximum length of the first branch of the lookbehind +group, and the second word is set to the mimimum matching length for a +variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose +branches are all of fixed length. These values are used when generating +OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used +for any subsequent branches because there is only room for one value (the +branch maximum length) in a META_ALT item. + +META_LOOKBEHIND (?<= start of lookbehind +META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind +META_LOOKBEHINDNOT (?' and 1 for '>='; +the next two are the major and minor numbers: + +META_COND_VERSION (?(VERSIONx.y) + +Callouts are converted into one of two items: + +META_CALLOUT_NUMBER (?C with numerical argument +META_CALLOUT_STRING (?C with string argument + +In both cases, the next two elements contain the offset and length of the next +item in the pattern. Then there is either one callout number, or a length and +an offset for the string argument. The length includes both delimiters. + + +Traditional matching function +----------------------------- + +The "traditional", and original, matching function is called pcre2_match(), and +it implements an NFA algorithm, similar to the original Henry Spencer algorithm +and the way that Perl works. This is not surprising, since it is intended to be +as compatible with Perl as possible. This is the function most users of PCRE2 +will use most of the time. If PCRE2 is compiled with just-in-time (JIT) +support, and studying a compiled pattern with JIT is successful, the JIT code +is run instead of the normal pcre2_match() code, but the result is the same. + + +Supplementary matching function +------------------------------- + +There is also a supplementary matching function called pcre2_dfa_match(). This +implements a DFA matching algorithm that searches simultaneously for all +possible matches that start at one point in the subject string. (Going back to +my roots: see Historical Note 1 above.) This function intreprets the same +compiled pattern data as pcre2_match(); however, not all the facilities are +available, and those that are do not always work in quite the same way. See the +user documentation for details. + +The algorithm that is used for pcre2_dfa_match() is not a traditional FSM, +because it may have a number of states active at one time. More work would be +needed at compile time to produce a traditional FSM where only one state is +ever active at once. I believe some other regex matchers work this way. JIT +support is not available for this kind of matching. + + +Changeable options +------------------ + +The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and +some others may be changed in the middle of patterns by items such as (?i). +Their processing is handled entirely at compile time by generating different +opcodes for the different settings. The runtime functions do not need to keep +track of an option's state. + +PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE +are tracked and processed during the parsing pre-pass. The others are handled +from META_OPTIONS items during the main compile phase. + + +Format of compiled patterns +--------------------------- + +The compiled form of a pattern is a vector of unsigned code units (bytes in +8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing +items of variable length. The first code unit in an item contains an opcode, +and the length of the item is either implicit in the opcode or contained in the +data that follows it. + +In many cases listed below, LINK_SIZE data values are specified for offsets +within the compiled pattern. LINK_SIZE always specifies a number of bytes. The +default value for LINK_SIZE is 2, except for the 32-bit library, where it can +only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values, +and the 16-bit library can be compiled to use 4-byte values, though this +impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries is +necessary only when patterns whose compiled length is greater than 65535 code +units are going to be processed. When a LINK_SIZE value uses more than one code +unit, the most significant unit is first. + +In this description, we assume the "normal" compilation options. Data values +that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode +(most significant byte first), and one code unit in 16-bit and 32-bit modes. + + +Opcodes with no following data +------------------------------ + +These items are all just one unit long: + + OP_END end of pattern + OP_ANY match any one character other than newline + OP_ALLANY match any one character, including newline + OP_ANYBYTE match any single code unit, even in UTF-8/16 mode + OP_SOD match start of data: \A + OP_SOM, start of match (subject + offset): \G + OP_SET_SOM, set start of match (\K) + OP_CIRC ^ (start of data) + OP_CIRCM ^ multiline mode (start of data or after newline) + OP_NOT_WORD_BOUNDARY \W + OP_WORD_BOUNDARY \w + OP_NOT_DIGIT \D + OP_DIGIT \d + OP_NOT_HSPACE \H + OP_HSPACE \h + OP_NOT_WHITESPACE \S + OP_WHITESPACE \s + OP_NOT_VSPACE \V + OP_VSPACE \v + OP_NOT_WORDCHAR \W + OP_WORDCHAR \w + OP_EODN match end of data or newline at end: \Z + OP_EOD match end of data: \z + OP_DOLL $ (end of data, or before final newline) + OP_DOLLM $ multiline mode (end of data or before newline) + OP_EXTUNI match an extended Unicode grapheme cluster + OP_ANYNL match any Unicode newline sequence + + OP_ASSERT_ACCEPT ) + OP_ACCEPT ) These are Perl 5.10's "backtracking control + OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing + OP_FAIL ) parentheses, it may be preceded by one or more + OP_PRUNE ) OP_CLOSE, each followed by a number that + OP_SKIP ) indicates which parentheses must be closed. + OP_THEN ) + +OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. +This ends the assertion, not the entire pattern match. The assertion (?!) is +always optimized to OP_FAIL. + +OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in +non-UTF modes and in UTF-32 mode (since one code unit still equals one +character). Another use is for [^] when empty classes are permitted +(PCRE2_ALLOW_EMPTY_CLASS is set). + + +Backtracking control verbs +-------------------------- + +Verbs with no arguments generate opcodes with no following data (as listed +in the section above). + +(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a +length in one code unit, and followed by a binary zero. The name length is +limited by the size of the code unit. + +(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL) respectively. + +For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes +OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the +name following in the same format as for OP_MARK. + + +Matching literal characters +--------------------------- + +The OP_CHAR opcode is followed by a single character that is to be matched +casefully. For caseless matching of characters that have at most two +case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the +character may be more than one code unit long. In UTF-32 mode, characters are +always exactly one code unit long. + +If there is only one character in a character class, OP_CHAR or OP_CHARI is +used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is, +for something like [^a]). + +Caseless matching (positive or negative) of characters that have more than two +case-equivalent code points (which is possible only in UTF mode) is handled by +compiling a Unicode property item (see below), with the pseudo-property +PT_CLIST. The value of this property is an offset in a vector called +"ucd_caseless_sets" which identifies the start of a short list of case +equivalent characters, terminated by the value NOTACHAR (0xffffffff). + + +Repeating single characters +--------------------------- + +The common repeats (*, +, ?), when applied to a single character, use the +following opcodes, which come in caseful and caseless versions: + + Caseful Caseless + OP_STAR OP_STARI + OP_MINSTAR OP_MINSTARI + OP_POSSTAR OP_POSSTARI + OP_PLUS OP_PLUSI + OP_MINPLUS OP_MINPLUSI + OP_POSPLUS OP_POSPLUSI + OP_QUERY OP_QUERYI + OP_MINQUERY OP_MINQUERYI + OP_POSQUERY OP_POSQUERYI + +Each opcode is followed by the character that is to be repeated. In ASCII or +UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the +length is variable. Those with "MIN" in their names are the minimizing +versions. Those with "POS" in their names are possessive versions. Other kinds +of repeat make use of these opcodes: + + Caseful Caseless + OP_UPTO OP_UPTOI + OP_MINUPTO OP_MINUPTOI + OP_POSUPTO OP_POSUPTOI + OP_EXACT OP_EXACTI + +Each of these is followed by a count and then the repeated character. The count +is two bytes long in 8-bit mode (most significant byte first), or one code unit +in 16-bit and 32-bit modes. + +OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum +and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or +OP_MINUPTO or OPT_POSUPTO). + +Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI, +etc.) are used for repeated, negated, single-character classes such as [^a]*. +The normal single-character opcodes (OP_STAR, etc.) are used for repeated +positive single-character classes. + + +Repeating character types +------------------------- + +Repeats of things like \d are done exactly as for single characters, except +that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored +in the next code unit. The opcodes are: + + OP_TYPESTAR + OP_TYPEMINSTAR + OP_TYPEPOSSTAR + OP_TYPEPLUS + OP_TYPEMINPLUS + OP_TYPEPOSPLUS + OP_TYPEQUERY + OP_TYPEMINQUERY + OP_TYPEPOSQUERY + OP_TYPEUPTO + OP_TYPEMINUPTO + OP_TYPEPOSUPTO + OP_TYPEEXACT + + +Match by Unicode property +------------------------- + +OP_PROP and OP_NOTPROP are used for positive and negative matches of a +character by testing its Unicode property (the \p and \P escape sequences). +Each is followed by two code units that encode the desired property as a type +and a value. The types are a set of #defines of the form PT_xxx, and the values +are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. +The value is relevant only for PT_GC (General Category), PT_PC (Particular +Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property), +and the pseudo-property PT_CLIST, which is used to identify a list of +case-equivalent characters when there are three or more (see above). + +Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by +three code units: OP_PROP or OP_NOTPROP, and then the desired property type and +value. + + +Character classes +----------------- + +If there is only one character in a class, OP_CHAR or OP_CHARI is used for a +positive class, and OP_NOT or OP_NOTI for a negative one (that is, for +something like [^a]), except when caselessly matching a character that has more +than two case-equivalent code points (which can happen only in UTF mode). In +this case a Unicode property item is used, as described above in "Matching +literal characters". + +A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated, +negated, single-character classes. The normal single-character opcodes +(OP_STAR, etc.) are used for repeated positive single-character classes. + +When there is more than one character in a class, and all the code points are +less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a +negative one. In either case, the opcode is followed by a 32-byte (16-short, +8-word) bit map containing a 1 bit for every character that is acceptable. The +bits are counted from the least significant end of each unit. In caseless mode, +bits for both cases are set. + +The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and +16-bit and 32-bit modes, subject characters with values greater than 255 can be +handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they +do. + +For classes containing characters with values greater than 255 or that contain +\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable +code points are less than 256, followed by a list of pairs (for a range) and/or +single characters and/or properties. In caseless mode, all equivalent +characters are explicitly listed. + +OP_XCLASS is followed by a LINK_SIZE value containing the total length of the +opcode and its data. This is followed by a code unit containing flag bits: +XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a +bit map is present. There follows the bit map, if XCL_MAP is set, and then a +sequence of items coded as follows: + + XCL_END marks the end of the list + XCL_SINGLE one character follows + XCL_RANGE two characters follow + XCL_PROP a Unicode property (type, value) follows + XCL_NOTPROP a Unicode property (type, value) follows + +If a range starts with a code point less than 256 and ends with one greater +than 255, it is split into two ranges, with characters less than 256 being +indicated in the bit map, and the rest with XCL_RANGE. + +When XCL_NOT is set, the bit map, if present, contains bits for characters that +are allowed (exactly as for OP_NCLASS), but the list of items that follow it +specifies characters and properties that are not allowed. + + +Back references +--------------- + +OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the +reference number when the reference is to a unique capturing group (either by +number or by name). When named groups are used, there may be more than one +group with the same name. In this case, a reference to such a group by name +generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index +(not the byte offset) in the group name table of the first entry for the +required name, followed by the number of groups with the same name. The +matching code can then search for the first one that is set. + + +Repeating character classes and back references +----------------------------------------------- + +Single-character classes are handled specially (see above). This section +applies to other classes and also to back references. In both cases, the repeat +information follows the base item. The matching code looks at the following +opcode to see if it is one of these: + + OP_CRSTAR + OP_CRMINSTAR + OP_CRPOSSTAR + OP_CRPLUS + OP_CRMINPLUS + OP_CRPOSPLUS + OP_CRQUERY + OP_CRMINQUERY + OP_CRPOSQUERY + OP_CRRANGE + OP_CRMINRANGE + OP_CRPOSRANGE + +All but the last three are single-code-unit items, with no data. The range +opcodes are followed by the minimum and maximum repeat counts. + + +Brackets and alternation +------------------------ + +A pair of non-capturing round brackets is wrapped round each expression at +compile time, so alternation always happens in the context of brackets. + +[Note for North Americans: "bracket" to some English speakers, including +myself, can be round, square, curly, or pointy. Hence this usage rather than +"parentheses".] + +Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A +bracket opcode is followed by a LINK_SIZE value which gives the offset to the +next alternative OP_ALT or, if there aren't any branches, to the terminating +opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the +next one, or to the final opcode. For capturing brackets, the bracket number is +a count that immediately follows the offset. + +There are several opcodes that mark the end of a subpattern group. OP_KET is +used for subpatterns that do not repeat indefinitely, OP_KETRMIN and +OP_KETRMAX are used for indefinite repetitions, minimally or maximally +respectively, and OP_KETRPOS for possessive repetitions (see below for more +details). All four are followed by a LINK_SIZE value giving (as a positive +number) the offset back to the matching opening bracket opcode. + +If a subpattern is quantified such that it is permitted to match zero times, it +is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are +single-unit opcodes that tell the matcher that skipping the following +subpattern entirely is a valid match. In the case of the first two, not +skipping the pattern is also valid (greedy and non-greedy). The third is used +when a pattern has the quantifier {0,0}. It cannot be entirely discarded, +because it may be called as a subroutine from elsewhere in the pattern. + +A subpattern with an indefinite maximum repetition is replicated in the +compiled data its minimum number of times (or once with OP_BRAZERO if the +minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX +as appropriate. + +A subpattern with a bounded maximum repetition is replicated in a nested +fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO +before each replication after the minimum, so that, for example, (abc){2,5} is +compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group +has the same number. + +When a repeated subpattern has an unbounded upper limit, it is checked to see +whether it could match an empty string. If this is the case, the opcode in the +final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher +that it needs to check for matching an empty string when it hits OP_KETRMIN or +OP_KETRMAX, and if so, to break the loop. + + +Possessive brackets +------------------- + +When a repeated group (capturing or non-capturing) is marked as possessive by +the "+" notation, e.g. (abc)++, different opcodes are used. Their names all +have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead +of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum +repetition is zero, the group is preceded by OP_BRAPOSZERO. + + +Once-only (atomic) groups +------------------------- + +These are just like other subpatterns, but they start with the opcode OP_ONCE. +The check for matching an empty string in an unbounded repeat is handled +entirely at runtime, so there is just this one opcode for atomic groups. + + +Assertions +---------- + +Forward assertions are also just like other subpatterns, but starting with one +of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or +OP_ASSERT_NOT. + +Backward assertions use the opcodes OP_ASSERTBACK, OP_ASSERTBACK_NA, and +OP_ASSERTBACK_NOT. If all the branches of a backward assertion are of fixed +length (not necessarily the same), the first opcode inside each branch is +OP_REVERSE, followed by an IMM2_SIZE count of the number of characters to move +back the pointer in the subject string, thus allowing each branch to have a +different (but fixed) length. + +Variable-length backward assertions whose maximum matching length is limited +are also supported. For such assertions, the first opcode inside each branch is +OP_VREVERSE, followed by the minimum and maximum lengths for that branch, +unless these happen to be equal, in which case OP_REVERSE is used. These +IMM2_SIZE values occupy two code units each in 8-bit mode, and 1 code unit in +16/32 bit modes. + +In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are +also the number of code units, but in UTF-8/16 mode each character may occupy +more than one code unit. + + +Conditional subpatterns +----------------------- + +These are like other subpatterns, but they start with the opcode OP_COND, or +OP_SCOND for one that might match an empty string in an unbounded repeat. + +If the condition is a back reference, this is stored at the start of the +subpattern using the opcode OP_CREF followed by a count containing the +reference number, provided that the reference is to a unique capturing group. +If the reference was by name and there is more than one group with that name, +OP_DNCREF is used instead. It is followed by two counts: the index in the group +names table, and the number of groups with the same name. The allows the +matcher to check if any group with the given name is set. + +If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of +group x" (coded as "(?(Rx)"), the group number is stored at the start of the +subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the +whole pattern") or OP_DNRREF (with data as for OP_DNCREF). + +For a DEFINE condition, OP_FALSE is used (with no associated data). During +compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when +the conditional group is complete, there can be a check to ensure that it +contains only one top-level branch. Once this has happened, the opcode is +changed to OP_FALSE, so the matcher never sees OP_DEFINE. + +There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which +tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE +or OP_FALSE. + +If a condition is not a back reference, recursion test, DEFINE, or VERSION, it +must start with a parenthesized atomic assertion, whose opcode normally +immediately follows OP_COND or OP_SCOND. However, if automatic callouts are +enabled, a callout is inserted immediately before the assertion. It is also +possible to insert a manual callout at this point. Only assertion conditions +may have callouts preceding the condition. + +A condition that is the negative assertion (?!) is optimized to OP_FAIL in all +parts of the pattern, so this is another opcode that may appear as a condition. +It is treated the same as OP_FALSE. + + +Recursion +--------- + +Recursion either matches the current pattern, or some subexpression. The opcode +OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting +bracket from the start of the whole pattern. OP_RECURSE is also used for +"subroutine" calls, even though they are not strictly a recursion. Up till +release 10.30 recursions were treated as atomic groups, making them +incompatible with Perl (but PCRE had them well before Perl did). From 10.30, +backtracking into recursions is supported. + +Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only +forced no backtracking, but also allowed repetition to be handled as for other +bracketed groups. From 10.30 onwards, repeated recursions are duplicated for +their minimum repetitions, and then wrapped in non-capturing brackets for the +remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is +treated as (?1)(?1)(?:(?1)){0,2}. + + +Callouts +-------- + +A callout may have either a numerical argument or a string argument. These use +OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by +two LINK_SIZE values giving the offset in the pattern string to the start of +the following item, and another count giving the length of this item. These +values make it possible for pcre2test to output useful tracing information +using callouts. + +In the case of a numeric callout, after these two values there is a single code +unit containing the callout number, in the range 0-255, with 255 being used for +callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT +option. Thus, this opcode item is of fixed length: + + [OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER] + +For callouts with string arguments, OP_CALLOUT_STR has three more data items: +a LINK_SIZE value giving the complete length of the entire opcode item, a +LINK_SIZE item containing the offset within the pattern string to the start of +the string argument, and the string itself, preceded by its starting delimiter +and followed by a binary zero. When a callout function is called, a pointer to +the actual string is passed, but the delimiter can be accessed as string[-1] if +the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is +compiled as the following bytes (decimal numbers represent binary values): + + [OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0] + -------- ------- -------- ------- + | | | | + ------- LINK_SIZE items ------ + +Opcode table checking +--------------------- + +The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is +not a real opcode, but is used to check at compile time that tables indexed by +opcode are the correct length, in order to catch updating errors. + +Philip Hazel +November 2023 diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..e82fd21 --- /dev/null +++ b/INSTALL @@ -0,0 +1,368 @@ +Installation Instructions +************************* + + Copyright (C) 1994-1996, 1999-2002, 2004-2017, 2020-2021 Free +Software Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell command './configure && make && make install' +should configure, build, and install this package. The following +more-detailed instructions are generic; see the 'README' file for +instructions specific to this package. Some packages provide this +'INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The 'configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that +you can run in the future to recreate the current configuration, and a +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). + + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can +be considered for the next release. If you are using the cache, and at +some point 'config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. + + The simplest way to compile this package is: + + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. + + Running 'configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type 'make' to compile the package. + + 3. Optionally, type 'make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type 'make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the 'make install' phase executed with root + privileges. + + 5. Optionally, type 'make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior 'make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type 'make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide 'make + distcheck', which can by used by developers to test that all other + targets like 'make install' and 'make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the 'configure' script does not know about. Run './configure --help' +for details on some of the pertinent environment variables. + + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU 'make'. 'cd' to the +directory where you want the object files and executables to go and run +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. + + With a non-GNU 'make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use 'make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the 'lipo' tool if you have problems. + +Installation Names +================== + + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to 'configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +'make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, 'make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the +package recognizes. + + For packages that use the X Window System, 'configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of 'make' will be. For these packages, running './configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with 'make V=1'; while running './configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with 'make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX 'make' updates targets which have the same timestamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its '' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. + + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features 'configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, 'configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option '--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with '--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to 'configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the 'configure' command line, using 'VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified 'gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +'configure' Invocation +====================== + + 'configure' recognizes the following options to control how it +operates. + +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. + +'--help=short' +'--help=recursive' + Print a summary of the options unique to this package's + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. + +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' + script, and exit. + +'--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally 'config.cache'. FILE defaults to '/dev/null' to + disable caching. + +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. + +'--quiet' +'--silent' +'-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to '/dev/null' (any error + messages will still be shown). + +'--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + 'configure' can determine that directory automatically. + +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. + +'--no-create' +'-n' + Run the configure checks, but stop before creating any output + files. + +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..3c1ef03 --- /dev/null +++ b/LICENCE @@ -0,0 +1,94 @@ +PCRE2 LICENCE +------------- + +PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + +Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD" +licence, as specified below, with one exemption for certain binary +redistributions. The documentation for PCRE2, supplied in the "doc" directory, +is distributed under the same terms as the software itself. The data in the +testdata directory is not copyrighted and is in the public domain. + +The basic library functions are written in C and are freestanding. Also +included in the distribution is a just-in-time compiler that can be used to +optimize pattern matching. This is an optional feature that can be omitted when +the library is built. + + +THE BASIC LIBRARY FUNCTIONS +--------------------------- + +Written by: Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com + +Retired from University of Cambridge Computing Service, +Cambridge, England. + +Copyright (c) 1997-2024 University of Cambridge +All rights reserved. + + +PCRE2 JUST-IN-TIME COMPILATION SUPPORT +-------------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2010-2024 Zoltan Herczeg +All rights reserved. + + +STACK-LESS JUST-IN-TIME COMPILER +-------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Email domain: freemail.hu + +Copyright(c) 2009-2024 Zoltan Herczeg +All rights reserved. + + +THE "BSD" LICENCE +----------------- + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notices, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notices, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of any + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES +------------------------------------------ + +The second condition in the BSD licence (covering binary redistributions) does +not apply all the way down a chain of software. If binary package A includes +PCRE2, it must respect the condition, but if package B is software that +includes package A, the condition is not imposed on package B unless it uses +PCRE2 independently. + +End diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..ca6a638 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,967 @@ +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = subdir-objects +ACLOCAL_AMFLAGS = -I m4 + +## This seems to have become necessary for building in non-source directory. + +AM_CPPFLAGS="-I$(srcdir)/src" + +## Specify the documentation files that are distributed. + +dist_doc_DATA = \ + AUTHORS \ + COPYING \ + ChangeLog \ + LICENCE \ + NEWS \ + README \ + doc/pcre2.txt \ + doc/pcre2-config.txt \ + doc/pcre2grep.txt \ + doc/pcre2test.txt + +dist_html_DATA = \ + doc/html/NON-AUTOTOOLS-BUILD.txt \ + doc/html/README.txt \ + doc/html/index.html \ + doc/html/pcre2-config.html \ + doc/html/pcre2.html \ + doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ + doc/html/pcre2_code_free.html \ + doc/html/pcre2_compile.html \ + doc/html/pcre2_compile_context_copy.html \ + doc/html/pcre2_compile_context_create.html \ + doc/html/pcre2_compile_context_free.html \ + doc/html/pcre2_config.html \ + doc/html/pcre2_convert_context_copy.html \ + doc/html/pcre2_convert_context_create.html \ + doc/html/pcre2_convert_context_free.html \ + doc/html/pcre2_converted_pattern_free.html \ + doc/html/pcre2_dfa_match.html \ + doc/html/pcre2_general_context_copy.html \ + doc/html/pcre2_general_context_create.html \ + doc/html/pcre2_general_context_free.html \ + doc/html/pcre2_get_error_message.html \ + doc/html/pcre2_get_mark.html \ + doc/html/pcre2_get_match_data_heapframes_size.html \ + doc/html/pcre2_get_match_data_size.html \ + doc/html/pcre2_get_ovector_count.html \ + doc/html/pcre2_get_ovector_pointer.html \ + doc/html/pcre2_get_startchar.html \ + doc/html/pcre2_jit_compile.html \ + doc/html/pcre2_jit_free_unused_memory.html \ + doc/html/pcre2_jit_match.html \ + doc/html/pcre2_jit_stack_assign.html \ + doc/html/pcre2_jit_stack_create.html \ + doc/html/pcre2_jit_stack_free.html \ + doc/html/pcre2_maketables.html \ + doc/html/pcre2_maketables_free.html \ + doc/html/pcre2_match.html \ + doc/html/pcre2_match_context_copy.html \ + doc/html/pcre2_match_context_create.html \ + doc/html/pcre2_match_context_free.html \ + doc/html/pcre2_match_data_create.html \ + doc/html/pcre2_match_data_create_from_pattern.html \ + doc/html/pcre2_match_data_free.html \ + doc/html/pcre2_pattern_convert.html \ + doc/html/pcre2_pattern_info.html \ + doc/html/pcre2_serialize_decode.html \ + doc/html/pcre2_serialize_encode.html \ + doc/html/pcre2_serialize_free.html \ + doc/html/pcre2_serialize_get_number_of_codes.html \ + doc/html/pcre2_set_bsr.html \ + doc/html/pcre2_set_callout.html \ + doc/html/pcre2_set_character_tables.html \ + doc/html/pcre2_set_compile_extra_options.html \ + doc/html/pcre2_set_compile_recursion_guard.html \ + doc/html/pcre2_set_depth_limit.html \ + doc/html/pcre2_set_glob_escape.html \ + doc/html/pcre2_set_glob_separator.html \ + doc/html/pcre2_set_heap_limit.html \ + doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_compiled_length.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_max_varlookbehind.html \ + doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_newline.html \ + doc/html/pcre2_set_parens_nest_limit.html \ + doc/html/pcre2_set_recursion_limit.html \ + doc/html/pcre2_set_recursion_memory_management.html \ + doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_substitute.html \ + doc/html/pcre2_substring_copy_byname.html \ + doc/html/pcre2_substring_copy_bynumber.html \ + doc/html/pcre2_substring_free.html \ + doc/html/pcre2_substring_get_byname.html \ + doc/html/pcre2_substring_get_bynumber.html \ + doc/html/pcre2_substring_length_byname.html \ + doc/html/pcre2_substring_length_bynumber.html \ + doc/html/pcre2_substring_list_free.html \ + doc/html/pcre2_substring_list_get.html \ + doc/html/pcre2_substring_nametable_scan.html \ + doc/html/pcre2_substring_number_from_name.html \ + doc/html/pcre2api.html \ + doc/html/pcre2build.html \ + doc/html/pcre2callout.html \ + doc/html/pcre2compat.html \ + doc/html/pcre2convert.html \ + doc/html/pcre2demo.html \ + doc/html/pcre2grep.html \ + doc/html/pcre2jit.html \ + doc/html/pcre2limits.html \ + doc/html/pcre2matching.html \ + doc/html/pcre2partial.html \ + doc/html/pcre2pattern.html \ + doc/html/pcre2perform.html \ + doc/html/pcre2posix.html \ + doc/html/pcre2sample.html \ + doc/html/pcre2serialize.html \ + doc/html/pcre2syntax.html \ + doc/html/pcre2test.html \ + doc/html/pcre2unicode.html + +dist_man_MANS = \ + doc/pcre2-config.1 \ + doc/pcre2.3 \ + doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ + doc/pcre2_code_free.3 \ + doc/pcre2_compile.3 \ + doc/pcre2_compile_context_copy.3 \ + doc/pcre2_compile_context_create.3 \ + doc/pcre2_compile_context_free.3 \ + doc/pcre2_config.3 \ + doc/pcre2_convert_context_copy.3 \ + doc/pcre2_convert_context_create.3 \ + doc/pcre2_convert_context_free.3 \ + doc/pcre2_converted_pattern_free.3 \ + doc/pcre2_dfa_match.3 \ + doc/pcre2_general_context_copy.3 \ + doc/pcre2_general_context_create.3 \ + doc/pcre2_general_context_free.3 \ + doc/pcre2_get_error_message.3 \ + doc/pcre2_get_mark.3 \ + doc/pcre2_get_match_data_heapframes_size.3 \ + doc/pcre2_get_match_data_size.3 \ + doc/pcre2_get_ovector_count.3 \ + doc/pcre2_get_ovector_pointer.3 \ + doc/pcre2_get_startchar.3 \ + doc/pcre2_jit_compile.3 \ + doc/pcre2_jit_free_unused_memory.3 \ + doc/pcre2_jit_match.3 \ + doc/pcre2_jit_stack_assign.3 \ + doc/pcre2_jit_stack_create.3 \ + doc/pcre2_jit_stack_free.3 \ + doc/pcre2_maketables.3 \ + doc/pcre2_maketables_free.3 \ + doc/pcre2_match.3 \ + doc/pcre2_match_context_copy.3 \ + doc/pcre2_match_context_create.3 \ + doc/pcre2_match_context_free.3 \ + doc/pcre2_match_data_create.3 \ + doc/pcre2_match_data_create_from_pattern.3 \ + doc/pcre2_match_data_free.3 \ + doc/pcre2_pattern_convert.3 \ + doc/pcre2_pattern_info.3 \ + doc/pcre2_serialize_decode.3 \ + doc/pcre2_serialize_encode.3 \ + doc/pcre2_serialize_free.3 \ + doc/pcre2_serialize_get_number_of_codes.3 \ + doc/pcre2_set_bsr.3 \ + doc/pcre2_set_callout.3 \ + doc/pcre2_set_character_tables.3 \ + doc/pcre2_set_compile_extra_options.3 \ + doc/pcre2_set_compile_recursion_guard.3 \ + doc/pcre2_set_depth_limit.3 \ + doc/pcre2_set_glob_escape.3 \ + doc/pcre2_set_glob_separator.3 \ + doc/pcre2_set_heap_limit.3 \ + doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_compiled_length.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_max_varlookbehind.3 \ + doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_newline.3 \ + doc/pcre2_set_parens_nest_limit.3 \ + doc/pcre2_set_recursion_limit.3 \ + doc/pcre2_set_recursion_memory_management.3 \ + doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_substitute.3 \ + doc/pcre2_substring_copy_byname.3 \ + doc/pcre2_substring_copy_bynumber.3 \ + doc/pcre2_substring_free.3 \ + doc/pcre2_substring_get_byname.3 \ + doc/pcre2_substring_get_bynumber.3 \ + doc/pcre2_substring_length_byname.3 \ + doc/pcre2_substring_length_bynumber.3 \ + doc/pcre2_substring_list_free.3 \ + doc/pcre2_substring_list_get.3 \ + doc/pcre2_substring_nametable_scan.3 \ + doc/pcre2_substring_number_from_name.3 \ + doc/pcre2api.3 \ + doc/pcre2build.3 \ + doc/pcre2callout.3 \ + doc/pcre2compat.3 \ + doc/pcre2convert.3 \ + doc/pcre2demo.3 \ + doc/pcre2grep.1 \ + doc/pcre2jit.3 \ + doc/pcre2limits.3 \ + doc/pcre2matching.3 \ + doc/pcre2partial.3 \ + doc/pcre2pattern.3 \ + doc/pcre2perform.3 \ + doc/pcre2posix.3 \ + doc/pcre2sample.3 \ + doc/pcre2serialize.3 \ + doc/pcre2syntax.3 \ + doc/pcre2test.1 \ + doc/pcre2unicode.3 + +# The Libtool libraries to install. We'll add to this later. + +lib_LTLIBRARIES = + +# Unit tests you want to run when people type 'make check'. +# TESTS is for binary unit tests, check_SCRIPTS for script-based tests + +TESTS = +check_SCRIPTS = +dist_noinst_SCRIPTS = + +# Some of the binaries we make are to be installed, and others are +# (non-user-visible) helper programs needed to build the libraries. + +bin_PROGRAMS = +noinst_PROGRAMS = + +# Additional files to delete on 'make clean', 'make distclean', +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. + +CLEANFILES = \ + pcre2_dftables \ + pcre2_jit_test \ + pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 \ + pcre2fuzzcheck-32 \ + pcre2demo + +DISTCLEANFILES = src/config.h.in~ +MAINTAINERCLEANFILES = + +# Additional files to bundle with the distribution, over and above what +# the Autotools include by default. + +EXTRA_DIST = + +# These files contain additional m4 macros that are used by autoconf. + +EXTRA_DIST += \ + m4/ax_pthread.m4 m4/pcre2_visibility.m4 + +# These files contain maintenance information + +EXTRA_DIST += \ + NON-AUTOTOOLS-BUILD \ + HACKING + +# These are support files for building under VMS + +EXTRA_DIST += \ + vms/configure.com \ + vms/openvms_readme.txt \ + vms/pcre2.h_patch \ + vms/stdint.h + +# These files are used in the preparation of a release + +EXTRA_DIST += \ + PrepareRelease \ + CheckMan \ + CleanTxt \ + Detrail \ + 132html \ + doc/index.html.src + +# These files are usable versions of pcre2.h and config.h that are distributed +# for the benefit of people who are building PCRE2 manually, without the +# Autotools support. + +EXTRA_DIST += \ + src/pcre2.h.generic \ + src/config.h.generic + +# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE +# version number. Therefore, we can create the generic version just by copying. + +src/pcre2.h.generic: src/pcre2.h.in configure.ac + rm -f $@ + cp -p src/pcre2.h $@ + +# It is more complicated for config.h.generic. We need the version that results +# from a default configuration so as to get all the default values for PCRE +# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by +# doing a configure in a temporary directory. However, some trickery is needed, +# because the source directory may already be configured. If you just try +# running configure in a new directory, it complains. For this reason, we move +# config.status out of the way while doing the default configuration. The +# resulting config.h is munged by perl to put #ifdefs round any #defines for +# macros with values, and to #undef all boolean macros such as HAVE_xxx and +# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. + +src/config.h.generic: configure.ac + rm -rf $@ _generic + mkdir _generic + cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside + cd _generic && $(abs_top_srcdir)/configure || : + cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs + test -f _generic/src/config.h + perl -n \ + -e 'BEGIN{$$blank=0;}' \ + -e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \ + -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \ + -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \ + -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \ + -e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \ + _generic/src/config.h >$@ + rm -rf _generic + +MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic + +# These are the header files we'll install. We do not distribute pcre2.h +# because it is generated from pcre2.h.in. + +nodist_include_HEADERS = src/pcre2.h +include_HEADERS = src/pcre2posix.h + +# This is the "config" script. + +bin_SCRIPTS = pcre2-config + +## --------------------------------------------------------------- +## The pcre2_dftables program is used to rebuild character tables before +## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an +## installed program. The default (when --enable-rebuild-chartables is not +## specified) is to copy a distributed set of tables that are defined for ASCII +## code. In this case, pcre2_dftables is not needed. + +if WITH_REBUILD_CHARTABLES +noinst_PROGRAMS += pcre2_dftables +pcre2_dftables_SOURCES = src/pcre2_dftables.c +src/pcre2_chartables.c: pcre2_dftables$(EXEEXT) + rm -f $@ + ./pcre2_dftables$(EXEEXT) $@ +else +src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist + rm -f $@ + $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c +endif # WITH_REBUILD_CHARTABLES + +BUILT_SOURCES = src/pcre2_chartables.c +NODIST_SOURCES = src/pcre2_chartables.c + +## Define the list of common sources, then arrange to build whichever of the +## 8-, 16-, or 32-bit libraries are configured. + +COMMON_SOURCES = \ + src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c \ + src/pcre2_compile.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_script_run.c \ + src/pcre2_serialize.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substitute.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c + +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +EXTRA_DIST += src/pcre2_ucptables.c + +if WITH_PCRE2_8 +lib_LTLIBRARIES += libpcre2-8.la +libpcre2_8_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_8_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_8_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=8 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_8_la_LIBADD = +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +lib_LTLIBRARIES += libpcre2-16.la +libpcre2_16_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_16_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_16_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=16 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_16_la_LIBADD = +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +lib_LTLIBRARIES += libpcre2-32.la +libpcre2_32_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_32_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_32_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=32 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_32_la_LIBADD = +endif # WITH_PCRE2_32 + +# The pcre2_chartables.c.dist file is the default version of +# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. + +EXTRA_DIST += src/pcre2_chartables.c.dist +CLEANFILES += src/pcre2_chartables.c + +# The JIT compiler lives in a separate directory, but its files are #included +# when pcre2_jit_compile.c is processed, so they must be distributed. + +EXTRA_DIST += \ + src/sljit/sljitConfig.h \ + src/sljit/sljitConfigCPU.h \ + src/sljit/sljitConfigInternal.h \ + src/sljit/sljitLir.c \ + src/sljit/sljitLir.h \ + src/sljit/sljitNativeARM_32.c \ + src/sljit/sljitNativeARM_64.c \ + src/sljit/sljitNativeARM_T2_32.c \ + src/sljit/sljitNativeLOONGARCH_64.c \ + src/sljit/sljitNativeMIPS_32.c \ + src/sljit/sljitNativeMIPS_64.c \ + src/sljit/sljitNativeMIPS_common.c \ + src/sljit/sljitNativePPC_32.c \ + src/sljit/sljitNativePPC_64.c \ + src/sljit/sljitNativePPC_common.c \ + src/sljit/sljitNativeRISCV_32.c \ + src/sljit/sljitNativeRISCV_64.c \ + src/sljit/sljitNativeRISCV_common.c \ + src/sljit/sljitNativeS390X.c \ + src/sljit/sljitNativeX86_32.c \ + src/sljit/sljitNativeX86_64.c \ + src/sljit/sljitNativeX86_common.c \ + src/sljit/sljitSerialize.c \ + src/sljit/sljitUtils.c \ + src/sljit/allocator_src/sljitExecAllocatorApple.c \ + src/sljit/allocator_src/sljitExecAllocatorCore.c \ + src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \ + src/sljit/allocator_src/sljitExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitExecAllocatorWindows.c \ + src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \ + src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitWXExecAllocatorWindows.c + +# Some of the JIT sources are also in separate files that are #included. + +EXTRA_DIST += \ + src/pcre2_jit_match.c \ + src/pcre2_jit_misc.c + +if WITH_PCRE2_8 +libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS) +endif # WITH_PCRE2_32 + +if WITH_VALGRIND +if WITH_PCRE2_8 +libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_32 +endif # WITH_VALGRIND + +if WITH_GCOV +if WITH_PCRE2_8 +libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_32 +endif # WITH_GCOV + +## A version of the 8-bit library that has a POSIX API. + +if WITH_PCRE2_8 +lib_LTLIBRARIES += libpcre2-posix.la +libpcre2_posix_la_SOURCES = src/pcre2posix.c +libpcre2_posix_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=8 @PCRE2POSIX_CFLAG@ \ + $(VISIBILITY_CFLAGS) $(AM_CFLAGS) +libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS) +libpcre2_posix_la_LIBADD = libpcre2-8.la +if WITH_GCOV +libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +## Build pcre2grep and optional fuzzer stuff if the 8-bit library is enabled + +if WITH_PCRE2_8 +bin_PROGRAMS += pcre2grep +pcre2grep_SOURCES = src/pcre2grep.c +pcre2grep_CFLAGS = $(AM_CFLAGS) +pcre2grep_LDADD = $(LIBZ) $(LIBBZ2) +pcre2grep_LDADD += libpcre2-8.la +if WITH_GCOV +pcre2grep_CFLAGS += $(GCOV_CFLAGS) +pcre2grep_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +## If fuzzer support is enabled, build a non-distributed library containing the +## fuzzing function. Also build the standalone checking binary from the same +## source but using -DSTANDALONE. + +if WITH_FUZZ_SUPPORT +noinst_LIBRARIES = +if WITH_PCRE2_8 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a +_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +_libs_libpcre2_fuzzsupport_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-8 +pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +pcre2fuzzcheck_8_LDADD = libpcre2-8.la +if WITH_GCOV +pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a +_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +_libs_libpcre2_fuzzsupport_16_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-16 +pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +pcre2fuzzcheck_16_LDADD = libpcre2-16.la +if WITH_GCOV +pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a +_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +_libs_libpcre2_fuzzsupport_32_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-32 +pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +pcre2fuzzcheck_32_LDADD = libpcre2-32.la +if WITH_GCOV +pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_32 + +endif # WITH_FUZZ_SUPPORT + +## -------- Testing ---------- + +## If the 8-bit library is enabled, build the POSIX wrapper test program and +## arrange for it to run. + +if WITH_PCRE2_8 +TESTS += pcre2posix_test +noinst_PROGRAMS += pcre2posix_test +pcre2posix_test_SOURCES = src/pcre2posix_test.c +pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@ +pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la +endif # WITH_PCRE2_8 + +## If JIT support is enabled, arrange for the JIT test program to run. + +if WITH_JIT +TESTS += pcre2_jit_test +noinst_PROGRAMS += pcre2_jit_test +pcre2_jit_test_SOURCES = src/pcre2_jit_test.c +pcre2_jit_test_CFLAGS = $(AM_CFLAGS) +pcre2_jit_test_LDADD = +if WITH_PCRE2_8 +pcre2_jit_test_LDADD += libpcre2-8.la +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +pcre2_jit_test_LDADD += libpcre2-16.la +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +pcre2_jit_test_LDADD += libpcre2-32.la +endif # WITH_PCRE2_32 +if WITH_GCOV +pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS) +pcre2_jit_test_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_JIT + +# Build the general pcre2test program. The file src/pcre2_printint.c is +# #included by pcre2test as many times as needed, at different code unit +# widths. + +bin_PROGRAMS += pcre2test +EXTRA_DIST += src/pcre2_printint.c +pcre2test_SOURCES = src/pcre2test.c +pcre2test_CFLAGS = $(AM_CFLAGS) +pcre2test_LDADD = $(LIBREADLINE) + +if WITH_PCRE2_8 +pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +pcre2test_LDADD += libpcre2-16.la +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +pcre2test_LDADD += libpcre2-32.la +endif # WITH_PCRE2_32 + +if WITH_VALGRIND +pcre2test_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_VALGRIND + +if WITH_GCOV +pcre2test_CFLAGS += $(GCOV_CFLAGS) +pcre2test_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV + +## The main library tests. Each test is a binary plus a script that runs that +## binary in various ways. We install these test binaries in case folks find it +## helpful. The two .bat files are for running the tests under Windows. + +TESTS += RunTest +EXTRA_DIST += RunTest.bat +dist_noinst_SCRIPTS += RunTest + +## When the 8-bit library is configured, pcre2grep will have been built. + +if WITH_PCRE2_8 +TESTS += RunGrepTest +EXTRA_DIST += RunGrepTest.bat +dist_noinst_SCRIPTS += RunGrepTest +endif # WITH_PCRE2_8 + +## Distribute all the test data files + +EXTRA_DIST += \ + testdata/grepbinary \ + testdata/grepfilelist \ + testdata/grepinput \ + testdata/grepinput3 \ + testdata/grepinput8 \ + testdata/grepinputC.bz2 \ + testdata/grepinputC.gz \ + testdata/grepinputM \ + testdata/grepinputv \ + testdata/grepinputx \ + testdata/greplist \ + testdata/grepnot.bz2 \ + testdata/grepoutput \ + testdata/grepoutput8 \ + testdata/grepoutputC \ + testdata/grepoutputCN \ + testdata/grepoutputCNU \ + testdata/grepoutputCU \ + testdata/grepoutputCbz2 \ + testdata/grepoutputCgz \ + testdata/grepoutputN \ + testdata/grepoutputUN \ + testdata/greppatN4 \ + testdata/testbtables \ + testdata/testinput1 \ + testdata/testinput2 \ + testdata/testinput3 \ + testdata/testinput4 \ + testdata/testinput5 \ + testdata/testinput6 \ + testdata/testinput7 \ + testdata/testinput8 \ + testdata/testinput9 \ + testdata/testinput10 \ + testdata/testinput11 \ + testdata/testinput12 \ + testdata/testinput13 \ + testdata/testinput14 \ + testdata/testinput15 \ + testdata/testinput16 \ + testdata/testinput17 \ + testdata/testinput18 \ + testdata/testinput19 \ + testdata/testinput20 \ + testdata/testinput21 \ + testdata/testinput22 \ + testdata/testinput23 \ + testdata/testinput24 \ + testdata/testinput25 \ + testdata/testinput26 \ + testdata/testinputEBC \ + testdata/testinputheap \ + testdata/testoutput1 \ + testdata/testoutput2 \ + testdata/testoutput3 \ + testdata/testoutput3A \ + testdata/testoutput3B \ + testdata/testoutput4 \ + testdata/testoutput5 \ + testdata/testoutput6 \ + testdata/testoutput7 \ + testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 \ + testdata/testoutput8-16-4 \ + testdata/testoutput8-32-2 \ + testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 \ + testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 \ + testdata/testoutput8-8-4 \ + testdata/testoutput9 \ + testdata/testoutput10 \ + testdata/testoutput11-16 \ + testdata/testoutput11-32 \ + testdata/testoutput12-16 \ + testdata/testoutput12-32 \ + testdata/testoutput13 \ + testdata/testoutput14-16 \ + testdata/testoutput14-32 \ + testdata/testoutput14-8 \ + testdata/testoutput15 \ + testdata/testoutput16 \ + testdata/testoutput17 \ + testdata/testoutput18 \ + testdata/testoutput19 \ + testdata/testoutput20 \ + testdata/testoutput21 \ + testdata/testoutput22-16 \ + testdata/testoutput22-32 \ + testdata/testoutput22-8 \ + testdata/testoutput23 \ + testdata/testoutput24 \ + testdata/testoutput25 \ + testdata/testoutput26 \ + testdata/testoutputEBC \ + testdata/testoutputheap-16 \ + testdata/testoutputheap-32 \ + testdata/testoutputheap-8 \ + testdata/valgrind-jit.supp \ + testdata/wintestinput3 \ + testdata/wintestoutput3 \ + perltest.sh + +# RunTest and RunGrepTest should clean up after themselves, but just in case +# they don't, add their working files to CLEANFILES. + +CLEANFILES += \ + testSinput \ + test3input \ + test3output \ + test3outputA \ + test3outputB \ + testtry \ + teststdout \ + teststderr \ + teststderrgrep \ + testtemp1grep \ + testtemp2grep \ + testtrygrep \ + testNinputgrep + +## ------------ End of testing ------------- + + +# PCRE2 demonstration program. Not built automatically. The point is that the +# users should build it themselves. So just distribute the source. + +EXTRA_DIST += src/pcre2demo.c + + +# We have .pc files for pkg-config users. + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = + +if WITH_PCRE2_8 +pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc +endif + +if WITH_PCRE2_16 +pkgconfig_DATA += libpcre2-16.pc +endif + +if WITH_PCRE2_32 +pkgconfig_DATA += libpcre2-32.pc +endif + + +# gcov/lcov code coverage reporting +# +# Coverage reporting targets: +# +# coverage: Create a coverage report from 'make check' +# coverage-baseline: Capture baseline coverage information +# coverage-reset: This zeros the coverage counters only +# coverage-report: This creates the coverage report only +# coverage-clean-report: This removes the generated coverage report +# without cleaning the coverage data itself +# coverage-clean-data: This removes the captured coverage data without +# removing the coverage files created at compile time (*.gcno) +# coverage-clean: This cleans all coverage data including the generated +# coverage report. + +if WITH_GCOV +COVERAGE_TEST_NAME = $(PACKAGE) +COVERAGE_NAME = $(PACKAGE)-$(VERSION) +COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info +COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage +COVERAGE_LCOV_EXTRA_FLAGS = +COVERAGE_GENHTML_EXTRA_FLAGS = + +coverage_quiet = $(coverage_quiet_$(V)) +coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY)) +coverage_quiet_0 = --quiet + +coverage-check: all + -$(MAKE) $(AM_MAKEFLAGS) -k check + +coverage-baseline: + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --output-file "$(COVERAGE_OUTPUT_FILE)" \ + --capture \ + --initial + +coverage-report: + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --capture \ + --output-file "$(COVERAGE_OUTPUT_FILE).tmp" \ + --test-name "$(COVERAGE_TEST_NAME)" \ + --no-checksum \ + --compat-libtool \ + $(COVERAGE_LCOV_EXTRA_FLAGS) + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --output-file "$(COVERAGE_OUTPUT_FILE)" \ + --remove "$(COVERAGE_OUTPUT_FILE).tmp" \ + "/tmp/*" \ + "/usr/include/*" \ + "$(includedir)/*" + -@rm -f "$(COVERAGE_OUTPUT_FILE).tmp" + LANG=C $(GENHTML) $(coverage_quiet) \ + --prefix $(top_builddir) \ + --output-directory "$(COVERAGE_OUTPUT_DIR)" \ + --title "$(PACKAGE) $(VERSION) Code Coverage Report" \ + --show-details "$(COVERAGE_OUTPUT_FILE)" \ + --legend \ + $(COVERAGE_GENHTML_EXTRA_FLAGS) + @echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html" + +coverage-reset: + -$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir) + +coverage-clean-report: + -rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp" + -rm -rf "$(COVERAGE_OUTPUT_DIR)" + +coverage-clean-data: + -find $(top_builddir) -name "*.gcda" -delete + +coverage-clean: coverage-reset coverage-clean-report coverage-clean-data + -find $(top_builddir) -name "*.gcno" -delete + +coverage-distclean: coverage-clean + +coverage: coverage-reset coverage-baseline coverage-check coverage-report +clean-local: coverage-clean +distclean-local: coverage-distclean + +.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean + +# Without coverage support, still arrange for 'make distclean' to get rid of +# any coverage files that may have been left from a different configuration. + +else + +coverage: + @echo "Configuring with --enable-coverage is required to generate code coverage report." + +DISTCLEANFILES += src/*.gcda src/*.gcno + +distclean-local: + rm -rf $(PACKAGE)-$(VERSION)-coverage* + +endif # WITH_GCOV + +## CMake support + +EXTRA_DIST += \ + cmake/COPYING-CMAKE-SCRIPTS \ + cmake/FindEditline.cmake \ + cmake/FindPackageHandleStandardArgs.cmake \ + cmake/FindReadline.cmake \ + cmake/pcre2-config-version.cmake.in \ + cmake/pcre2-config.cmake.in \ + CMakeLists.txt \ + config-cmake.h.in + +## end Makefile.am diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..5fb3982 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,3967 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +TESTS = $(am__EXEEXT_6) $(am__EXEEXT_7) RunTest $(am__append_43) +bin_PROGRAMS = $(am__EXEEXT_1) pcre2test$(EXEEXT) +noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) +@WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = pcre2_dftables +@WITH_PCRE2_8_TRUE@am__append_2 = libpcre2-8.la +@WITH_PCRE2_16_TRUE@am__append_3 = libpcre2-16.la +@WITH_PCRE2_32_TRUE@am__append_4 = libpcre2-32.la +@WITH_PCRE2_8_TRUE@@WITH_VALGRIND_TRUE@am__append_5 = $(VALGRIND_CFLAGS) +@WITH_PCRE2_16_TRUE@@WITH_VALGRIND_TRUE@am__append_6 = $(VALGRIND_CFLAGS) +@WITH_PCRE2_32_TRUE@@WITH_VALGRIND_TRUE@am__append_7 = $(VALGRIND_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_8 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_9 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_10 = $(GCOV_CFLAGS) +@WITH_PCRE2_8_TRUE@am__append_11 = libpcre2-posix.la +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_12 = $(GCOV_CFLAGS) +@WITH_PCRE2_8_TRUE@am__append_13 = pcre2grep +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_14 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_15 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_16 = .libs/libpcre2-fuzzsupport.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_17 = pcre2fuzzcheck-8 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_18 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_19 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__append_20 = .libs/libpcre2-fuzzsupport-16.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__append_21 = pcre2fuzzcheck-16 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_22 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_23 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__append_24 = .libs/libpcre2-fuzzsupport-32.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__append_25 = pcre2fuzzcheck-32 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_26 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_27 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_28 = pcre2posix_test +@WITH_PCRE2_8_TRUE@am__append_29 = pcre2posix_test +@WITH_JIT_TRUE@am__append_30 = pcre2_jit_test +@WITH_JIT_TRUE@am__append_31 = pcre2_jit_test +@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_32 = libpcre2-8.la +@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_33 = libpcre2-16.la +@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_34 = libpcre2-32.la +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_35 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_36 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_37 = libpcre2-8.la libpcre2-posix.la +@WITH_PCRE2_16_TRUE@am__append_38 = libpcre2-16.la +@WITH_PCRE2_32_TRUE@am__append_39 = libpcre2-32.la +@WITH_VALGRIND_TRUE@am__append_40 = $(VALGRIND_CFLAGS) +@WITH_GCOV_TRUE@am__append_41 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@am__append_42 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_43 = RunGrepTest +@WITH_PCRE2_8_TRUE@am__append_44 = RunGrepTest.bat +@WITH_PCRE2_8_TRUE@am__append_45 = RunGrepTest +@WITH_PCRE2_8_TRUE@am__append_46 = libpcre2-8.pc libpcre2-posix.pc +@WITH_PCRE2_16_TRUE@am__append_47 = libpcre2-16.pc +@WITH_PCRE2_32_TRUE@am__append_48 = libpcre2-32.pc +@WITH_GCOV_FALSE@am__append_49 = src/*.gcda src/*.gcno +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pcre2_visibility.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__dist_noinst_SCRIPTS_DIST) \ + $(dist_doc_DATA) $(dist_html_DATA) $(include_HEADERS) \ + $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/config.h +CONFIG_CLEAN_FILES = libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc \ + libpcre2-posix.pc pcre2-config src/pcre2.h +CONFIG_CLEAN_VPATH_FILES = +@WITH_PCRE2_8_TRUE@am__EXEEXT_1 = pcre2grep$(EXEEXT) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ + "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \ + "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" \ + "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" +@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = pcre2_dftables$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck-8$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__EXEEXT_4 = pcre2fuzzcheck-16$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__EXEEXT_5 = pcre2fuzzcheck-32$(EXEEXT) +@WITH_PCRE2_8_TRUE@am__EXEEXT_6 = pcre2posix_test$(EXEEXT) +@WITH_JIT_TRUE@am__EXEEXT_7 = pcre2_jit_test$(EXEEXT) +PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) +LIBRARIES = $(noinst_LIBRARIES) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(lib_LTLIBRARIES) +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +_libs_libpcre2_fuzzsupport_16_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_16_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +am__dirstamp = $(am__leading_dot)dirstamp +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__libs_libpcre2_fuzzsupport_16_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_16_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_16_a_OBJECTS) +_libs_libpcre2_fuzzsupport_32_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_32_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__libs_libpcre2_fuzzsupport_32_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_32_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_32_a_OBJECTS) +_libs_libpcre2_fuzzsupport_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__libs_libpcre2_fuzzsupport_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_a_OBJECTS) +libpcre2_16_la_DEPENDENCIES = +am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ + src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ + src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ + src/pcre2_script_run.c src/pcre2_serialize.c \ + src/pcre2_string_utils.c src/pcre2_study.c \ + src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \ + src/libpcre2_16_la-pcre2_chkdint.lo \ + src/libpcre2_16_la-pcre2_compile.lo \ + src/libpcre2_16_la-pcre2_config.lo \ + src/libpcre2_16_la-pcre2_context.lo \ + src/libpcre2_16_la-pcre2_convert.lo \ + src/libpcre2_16_la-pcre2_dfa_match.lo \ + src/libpcre2_16_la-pcre2_error.lo \ + src/libpcre2_16_la-pcre2_extuni.lo \ + src/libpcre2_16_la-pcre2_find_bracket.lo \ + src/libpcre2_16_la-pcre2_jit_compile.lo \ + src/libpcre2_16_la-pcre2_maketables.lo \ + src/libpcre2_16_la-pcre2_match.lo \ + src/libpcre2_16_la-pcre2_match_data.lo \ + src/libpcre2_16_la-pcre2_newline.lo \ + src/libpcre2_16_la-pcre2_ord2utf.lo \ + src/libpcre2_16_la-pcre2_pattern_info.lo \ + src/libpcre2_16_la-pcre2_script_run.lo \ + src/libpcre2_16_la-pcre2_serialize.lo \ + src/libpcre2_16_la-pcre2_string_utils.lo \ + src/libpcre2_16_la-pcre2_study.lo \ + src/libpcre2_16_la-pcre2_substitute.lo \ + src/libpcre2_16_la-pcre2_substring.lo \ + src/libpcre2_16_la-pcre2_tables.lo \ + src/libpcre2_16_la-pcre2_ucd.lo \ + src/libpcre2_16_la-pcre2_valid_utf.lo \ + src/libpcre2_16_la-pcre2_xclass.lo +@WITH_PCRE2_16_TRUE@am_libpcre2_16_la_OBJECTS = $(am__objects_1) +am__objects_2 = src/libpcre2_16_la-pcre2_chartables.lo +@WITH_PCRE2_16_TRUE@nodist_libpcre2_16_la_OBJECTS = $(am__objects_2) +libpcre2_16_la_OBJECTS = $(am_libpcre2_16_la_OBJECTS) \ + $(nodist_libpcre2_16_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_16_la_CFLAGS) $(CFLAGS) $(libpcre2_16_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@WITH_PCRE2_16_TRUE@am_libpcre2_16_la_rpath = -rpath $(libdir) +libpcre2_32_la_DEPENDENCIES = +am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ + src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ + src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ + src/pcre2_script_run.c src/pcre2_serialize.c \ + src/pcre2_string_utils.c src/pcre2_study.c \ + src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \ + src/libpcre2_32_la-pcre2_chkdint.lo \ + src/libpcre2_32_la-pcre2_compile.lo \ + src/libpcre2_32_la-pcre2_config.lo \ + src/libpcre2_32_la-pcre2_context.lo \ + src/libpcre2_32_la-pcre2_convert.lo \ + src/libpcre2_32_la-pcre2_dfa_match.lo \ + src/libpcre2_32_la-pcre2_error.lo \ + src/libpcre2_32_la-pcre2_extuni.lo \ + src/libpcre2_32_la-pcre2_find_bracket.lo \ + src/libpcre2_32_la-pcre2_jit_compile.lo \ + src/libpcre2_32_la-pcre2_maketables.lo \ + src/libpcre2_32_la-pcre2_match.lo \ + src/libpcre2_32_la-pcre2_match_data.lo \ + src/libpcre2_32_la-pcre2_newline.lo \ + src/libpcre2_32_la-pcre2_ord2utf.lo \ + src/libpcre2_32_la-pcre2_pattern_info.lo \ + src/libpcre2_32_la-pcre2_script_run.lo \ + src/libpcre2_32_la-pcre2_serialize.lo \ + src/libpcre2_32_la-pcre2_string_utils.lo \ + src/libpcre2_32_la-pcre2_study.lo \ + src/libpcre2_32_la-pcre2_substitute.lo \ + src/libpcre2_32_la-pcre2_substring.lo \ + src/libpcre2_32_la-pcre2_tables.lo \ + src/libpcre2_32_la-pcre2_ucd.lo \ + src/libpcre2_32_la-pcre2_valid_utf.lo \ + src/libpcre2_32_la-pcre2_xclass.lo +@WITH_PCRE2_32_TRUE@am_libpcre2_32_la_OBJECTS = $(am__objects_3) +am__objects_4 = src/libpcre2_32_la-pcre2_chartables.lo +@WITH_PCRE2_32_TRUE@nodist_libpcre2_32_la_OBJECTS = $(am__objects_4) +libpcre2_32_la_OBJECTS = $(am_libpcre2_32_la_OBJECTS) \ + $(nodist_libpcre2_32_la_OBJECTS) +libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_32_la_CFLAGS) $(CFLAGS) $(libpcre2_32_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@WITH_PCRE2_32_TRUE@am_libpcre2_32_la_rpath = -rpath $(libdir) +libpcre2_8_la_DEPENDENCIES = +am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h src/pcre2_maketables.c \ + src/pcre2_match.c src/pcre2_match_data.c src/pcre2_newline.c \ + src/pcre2_ord2utf.c src/pcre2_pattern_info.c \ + src/pcre2_script_run.c src/pcre2_serialize.c \ + src/pcre2_string_utils.c src/pcre2_study.c \ + src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \ + src/libpcre2_8_la-pcre2_chkdint.lo \ + src/libpcre2_8_la-pcre2_compile.lo \ + src/libpcre2_8_la-pcre2_config.lo \ + src/libpcre2_8_la-pcre2_context.lo \ + src/libpcre2_8_la-pcre2_convert.lo \ + src/libpcre2_8_la-pcre2_dfa_match.lo \ + src/libpcre2_8_la-pcre2_error.lo \ + src/libpcre2_8_la-pcre2_extuni.lo \ + src/libpcre2_8_la-pcre2_find_bracket.lo \ + src/libpcre2_8_la-pcre2_jit_compile.lo \ + src/libpcre2_8_la-pcre2_maketables.lo \ + src/libpcre2_8_la-pcre2_match.lo \ + src/libpcre2_8_la-pcre2_match_data.lo \ + src/libpcre2_8_la-pcre2_newline.lo \ + src/libpcre2_8_la-pcre2_ord2utf.lo \ + src/libpcre2_8_la-pcre2_pattern_info.lo \ + src/libpcre2_8_la-pcre2_script_run.lo \ + src/libpcre2_8_la-pcre2_serialize.lo \ + src/libpcre2_8_la-pcre2_string_utils.lo \ + src/libpcre2_8_la-pcre2_study.lo \ + src/libpcre2_8_la-pcre2_substitute.lo \ + src/libpcre2_8_la-pcre2_substring.lo \ + src/libpcre2_8_la-pcre2_tables.lo \ + src/libpcre2_8_la-pcre2_ucd.lo \ + src/libpcre2_8_la-pcre2_valid_utf.lo \ + src/libpcre2_8_la-pcre2_xclass.lo +@WITH_PCRE2_8_TRUE@am_libpcre2_8_la_OBJECTS = $(am__objects_5) +am__objects_6 = src/libpcre2_8_la-pcre2_chartables.lo +@WITH_PCRE2_8_TRUE@nodist_libpcre2_8_la_OBJECTS = $(am__objects_6) +libpcre2_8_la_OBJECTS = $(am_libpcre2_8_la_OBJECTS) \ + $(nodist_libpcre2_8_la_OBJECTS) +libpcre2_8_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libpcre2_8_la_CFLAGS) \ + $(CFLAGS) $(libpcre2_8_la_LDFLAGS) $(LDFLAGS) -o $@ +@WITH_PCRE2_8_TRUE@am_libpcre2_8_la_rpath = -rpath $(libdir) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_DEPENDENCIES = libpcre2-8.la +am__libpcre2_posix_la_SOURCES_DIST = src/pcre2posix.c +@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_OBJECTS = \ +@WITH_PCRE2_8_TRUE@ src/libpcre2_posix_la-pcre2posix.lo +libpcre2_posix_la_OBJECTS = $(am_libpcre2_posix_la_OBJECTS) +libpcre2_posix_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_posix_la_CFLAGS) $(CFLAGS) \ + $(libpcre2_posix_la_LDFLAGS) $(LDFLAGS) -o $@ +@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_rpath = -rpath $(libdir) +am__pcre2_dftables_SOURCES_DIST = src/pcre2_dftables.c +@WITH_REBUILD_CHARTABLES_TRUE@am_pcre2_dftables_OBJECTS = \ +@WITH_REBUILD_CHARTABLES_TRUE@ src/pcre2_dftables.$(OBJEXT) +pcre2_dftables_OBJECTS = $(am_pcre2_dftables_OBJECTS) +pcre2_dftables_LDADD = $(LDADD) +am__pcre2_jit_test_SOURCES_DIST = src/pcre2_jit_test.c +@WITH_JIT_TRUE@am_pcre2_jit_test_OBJECTS = \ +@WITH_JIT_TRUE@ src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT) +pcre2_jit_test_OBJECTS = $(am_pcre2_jit_test_OBJECTS) +am__DEPENDENCIES_1 = +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__DEPENDENCIES_2 = \ +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@ $(am__DEPENDENCIES_1) +@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_32) \ +@WITH_JIT_TRUE@ $(am__append_33) $(am__append_34) \ +@WITH_JIT_TRUE@ $(am__DEPENDENCIES_2) +pcre2_jit_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2_jit_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \ + $@ +am__pcre2fuzzcheck_16_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am_pcre2fuzzcheck_16_OBJECTS = src/pcre2fuzzcheck_16-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_16_OBJECTS = $(am_pcre2fuzzcheck_16_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ libpcre2-16.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__DEPENDENCIES_3) +pcre2fuzzcheck_16_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2fuzzcheck_32_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am_pcre2fuzzcheck_32_OBJECTS = src/pcre2fuzzcheck_32-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_32_OBJECTS = $(am_pcre2fuzzcheck_32_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ libpcre2-32.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__DEPENDENCIES_4) +pcre2fuzzcheck_32_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2fuzzcheck_8_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am_pcre2fuzzcheck_8_OBJECTS = src/pcre2fuzzcheck_8-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_8_OBJECTS = $(am_pcre2fuzzcheck_8_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_5 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_5) +pcre2fuzzcheck_8_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2grep_SOURCES_DIST = src/pcre2grep.c +@WITH_PCRE2_8_TRUE@am_pcre2grep_OBJECTS = \ +@WITH_PCRE2_8_TRUE@ src/pcre2grep-pcre2grep.$(OBJEXT) +pcre2grep_OBJECTS = $(am_pcre2grep_OBJECTS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_6 = \ +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1) +@WITH_PCRE2_8_TRUE@pcre2grep_DEPENDENCIES = $(am__DEPENDENCIES_1) \ +@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1) libpcre2-8.la \ +@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_6) +pcre2grep_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2grep_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__pcre2posix_test_SOURCES_DIST = src/pcre2posix_test.c +@WITH_PCRE2_8_TRUE@am_pcre2posix_test_OBJECTS = src/pcre2posix_test-pcre2posix_test.$(OBJEXT) +pcre2posix_test_OBJECTS = $(am_pcre2posix_test_OBJECTS) +@WITH_PCRE2_8_TRUE@pcre2posix_test_DEPENDENCIES = libpcre2-posix.la \ +@WITH_PCRE2_8_TRUE@ libpcre2-8.la +pcre2posix_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2posix_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am_pcre2test_OBJECTS = src/pcre2test-pcre2test.$(OBJEXT) +pcre2test_OBJECTS = $(am_pcre2test_OBJECTS) +@WITH_GCOV_TRUE@am__DEPENDENCIES_7 = $(am__DEPENDENCIES_1) +pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_37) \ + $(am__append_38) $(am__append_39) $(am__DEPENDENCIES_7) +pcre2test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2test_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__dist_noinst_SCRIPTS_DIST = RunTest RunGrepTest +SCRIPTS = $(bin_SCRIPTS) $(dist_noinst_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo \ + src/$(DEPDIR)/pcre2_dftables.Po \ + src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2grep-pcre2grep.Po \ + src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po \ + src/$(DEPDIR)/pcre2test-pcre2test.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(_libs_libpcre2_fuzzsupport_16_a_SOURCES) \ + $(_libs_libpcre2_fuzzsupport_32_a_SOURCES) \ + $(_libs_libpcre2_fuzzsupport_a_SOURCES) \ + $(libpcre2_16_la_SOURCES) $(nodist_libpcre2_16_la_SOURCES) \ + $(libpcre2_32_la_SOURCES) $(nodist_libpcre2_32_la_SOURCES) \ + $(libpcre2_8_la_SOURCES) $(nodist_libpcre2_8_la_SOURCES) \ + $(libpcre2_posix_la_SOURCES) $(pcre2_dftables_SOURCES) \ + $(pcre2_jit_test_SOURCES) $(pcre2fuzzcheck_16_SOURCES) \ + $(pcre2fuzzcheck_32_SOURCES) $(pcre2fuzzcheck_8_SOURCES) \ + $(pcre2grep_SOURCES) $(pcre2posix_test_SOURCES) \ + $(pcre2test_SOURCES) +DIST_SOURCES = $(am___libs_libpcre2_fuzzsupport_16_a_SOURCES_DIST) \ + $(am___libs_libpcre2_fuzzsupport_32_a_SOURCES_DIST) \ + $(am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST) \ + $(am__libpcre2_16_la_SOURCES_DIST) \ + $(am__libpcre2_32_la_SOURCES_DIST) \ + $(am__libpcre2_8_la_SOURCES_DIST) \ + $(am__libpcre2_posix_la_SOURCES_DIST) \ + $(am__pcre2_dftables_SOURCES_DIST) \ + $(am__pcre2_jit_test_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_16_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_32_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_8_SOURCES_DIST) \ + $(am__pcre2grep_SOURCES_DIST) \ + $(am__pcre2posix_test_SOURCES_DIST) $(pcre2test_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +man1dir = $(mandir)/man1 +man3dir = $(mandir)/man3 +NROFF = nroff +MANS = $(dist_man_MANS) +DATA = $(dist_doc_DATA) $(dist_html_DATA) $(pkgconfig_DATA) +HEADERS = $(include_HEADERS) $(nodist_include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +AM_RECURSIVE_TARGETS = cscope check recheck +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ + $(srcdir)/libpcre2-16.pc.in $(srcdir)/libpcre2-32.pc.in \ + $(srcdir)/libpcre2-8.pc.in $(srcdir)/libpcre2-posix.pc.in \ + $(srcdir)/pcre2-config.in $(top_srcdir)/src/config.h.in \ + $(top_srcdir)/src/pcre2.h.in AUTHORS COPYING ChangeLog INSTALL \ + NEWS README ar-lib compile config.guess config.sub depcomp \ + install-sh ltmain.sh missing test-driver +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +DIST_ARCHIVES = $(distdir).tar.gz $(distdir).tar.bz2 $(distdir).zip +GZIP_ENV = --best +DIST_TARGETS = dist-bzip2 dist-gzip dist-zip +# Exists only to be overridden by the user if desired. +AM_DISTCHECK_DVI_TARGET = dvi +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CET_CFLAGS = @CET_CFLAGS@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +EXTRA_LIBPCRE2_16_LDFLAGS = @EXTRA_LIBPCRE2_16_LDFLAGS@ +EXTRA_LIBPCRE2_32_LDFLAGS = @EXTRA_LIBPCRE2_32_LDFLAGS@ +EXTRA_LIBPCRE2_8_LDFLAGS = @EXTRA_LIBPCRE2_8_LDFLAGS@ +EXTRA_LIBPCRE2_POSIX_LDFLAGS = @EXTRA_LIBPCRE2_POSIX_LDFLAGS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GCOV_CFLAGS = @GCOV_CFLAGS@ +GCOV_CXXFLAGS = @GCOV_CXXFLAGS@ +GCOV_LIBS = @GCOV_LIBS@ +GENHTML = @GENHTML@ +GREP = @GREP@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LCOV = @LCOV@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBBZ2 = @LIBBZ2@ +LIBOBJS = @LIBOBJS@ +LIBREADLINE = @LIBREADLINE@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBZ = @LIBZ@ +LIB_POSTFIX = @LIB_POSTFIX@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PCRE2POSIX_CFLAG = @PCRE2POSIX_CFLAG@ +PCRE2_DATE = @PCRE2_DATE@ +PCRE2_MAJOR = @PCRE2_MAJOR@ +PCRE2_MINOR = @PCRE2_MINOR@ +PCRE2_PRERELEASE = @PCRE2_PRERELEASE@ +PCRE2_STATIC_CFLAG = @PCRE2_STATIC_CFLAG@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SHTOOL = @SHTOOL@ +STRIP = @STRIP@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +enable_pcre2_16 = @enable_pcre2_16@ +enable_pcre2_32 = @enable_pcre2_32@ +enable_pcre2_8 = @enable_pcre2_8@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = subdir-objects +ACLOCAL_AMFLAGS = -I m4 +AM_CPPFLAGS = "-I$(srcdir)/src" +dist_doc_DATA = \ + AUTHORS \ + COPYING \ + ChangeLog \ + LICENCE \ + NEWS \ + README \ + doc/pcre2.txt \ + doc/pcre2-config.txt \ + doc/pcre2grep.txt \ + doc/pcre2test.txt + +dist_html_DATA = \ + doc/html/NON-AUTOTOOLS-BUILD.txt \ + doc/html/README.txt \ + doc/html/index.html \ + doc/html/pcre2-config.html \ + doc/html/pcre2.html \ + doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ + doc/html/pcre2_code_free.html \ + doc/html/pcre2_compile.html \ + doc/html/pcre2_compile_context_copy.html \ + doc/html/pcre2_compile_context_create.html \ + doc/html/pcre2_compile_context_free.html \ + doc/html/pcre2_config.html \ + doc/html/pcre2_convert_context_copy.html \ + doc/html/pcre2_convert_context_create.html \ + doc/html/pcre2_convert_context_free.html \ + doc/html/pcre2_converted_pattern_free.html \ + doc/html/pcre2_dfa_match.html \ + doc/html/pcre2_general_context_copy.html \ + doc/html/pcre2_general_context_create.html \ + doc/html/pcre2_general_context_free.html \ + doc/html/pcre2_get_error_message.html \ + doc/html/pcre2_get_mark.html \ + doc/html/pcre2_get_match_data_heapframes_size.html \ + doc/html/pcre2_get_match_data_size.html \ + doc/html/pcre2_get_ovector_count.html \ + doc/html/pcre2_get_ovector_pointer.html \ + doc/html/pcre2_get_startchar.html \ + doc/html/pcre2_jit_compile.html \ + doc/html/pcre2_jit_free_unused_memory.html \ + doc/html/pcre2_jit_match.html \ + doc/html/pcre2_jit_stack_assign.html \ + doc/html/pcre2_jit_stack_create.html \ + doc/html/pcre2_jit_stack_free.html \ + doc/html/pcre2_maketables.html \ + doc/html/pcre2_maketables_free.html \ + doc/html/pcre2_match.html \ + doc/html/pcre2_match_context_copy.html \ + doc/html/pcre2_match_context_create.html \ + doc/html/pcre2_match_context_free.html \ + doc/html/pcre2_match_data_create.html \ + doc/html/pcre2_match_data_create_from_pattern.html \ + doc/html/pcre2_match_data_free.html \ + doc/html/pcre2_pattern_convert.html \ + doc/html/pcre2_pattern_info.html \ + doc/html/pcre2_serialize_decode.html \ + doc/html/pcre2_serialize_encode.html \ + doc/html/pcre2_serialize_free.html \ + doc/html/pcre2_serialize_get_number_of_codes.html \ + doc/html/pcre2_set_bsr.html \ + doc/html/pcre2_set_callout.html \ + doc/html/pcre2_set_character_tables.html \ + doc/html/pcre2_set_compile_extra_options.html \ + doc/html/pcre2_set_compile_recursion_guard.html \ + doc/html/pcre2_set_depth_limit.html \ + doc/html/pcre2_set_glob_escape.html \ + doc/html/pcre2_set_glob_separator.html \ + doc/html/pcre2_set_heap_limit.html \ + doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_compiled_length.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_max_varlookbehind.html \ + doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_newline.html \ + doc/html/pcre2_set_parens_nest_limit.html \ + doc/html/pcre2_set_recursion_limit.html \ + doc/html/pcre2_set_recursion_memory_management.html \ + doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_substitute.html \ + doc/html/pcre2_substring_copy_byname.html \ + doc/html/pcre2_substring_copy_bynumber.html \ + doc/html/pcre2_substring_free.html \ + doc/html/pcre2_substring_get_byname.html \ + doc/html/pcre2_substring_get_bynumber.html \ + doc/html/pcre2_substring_length_byname.html \ + doc/html/pcre2_substring_length_bynumber.html \ + doc/html/pcre2_substring_list_free.html \ + doc/html/pcre2_substring_list_get.html \ + doc/html/pcre2_substring_nametable_scan.html \ + doc/html/pcre2_substring_number_from_name.html \ + doc/html/pcre2api.html \ + doc/html/pcre2build.html \ + doc/html/pcre2callout.html \ + doc/html/pcre2compat.html \ + doc/html/pcre2convert.html \ + doc/html/pcre2demo.html \ + doc/html/pcre2grep.html \ + doc/html/pcre2jit.html \ + doc/html/pcre2limits.html \ + doc/html/pcre2matching.html \ + doc/html/pcre2partial.html \ + doc/html/pcre2pattern.html \ + doc/html/pcre2perform.html \ + doc/html/pcre2posix.html \ + doc/html/pcre2sample.html \ + doc/html/pcre2serialize.html \ + doc/html/pcre2syntax.html \ + doc/html/pcre2test.html \ + doc/html/pcre2unicode.html + +dist_man_MANS = \ + doc/pcre2-config.1 \ + doc/pcre2.3 \ + doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ + doc/pcre2_code_free.3 \ + doc/pcre2_compile.3 \ + doc/pcre2_compile_context_copy.3 \ + doc/pcre2_compile_context_create.3 \ + doc/pcre2_compile_context_free.3 \ + doc/pcre2_config.3 \ + doc/pcre2_convert_context_copy.3 \ + doc/pcre2_convert_context_create.3 \ + doc/pcre2_convert_context_free.3 \ + doc/pcre2_converted_pattern_free.3 \ + doc/pcre2_dfa_match.3 \ + doc/pcre2_general_context_copy.3 \ + doc/pcre2_general_context_create.3 \ + doc/pcre2_general_context_free.3 \ + doc/pcre2_get_error_message.3 \ + doc/pcre2_get_mark.3 \ + doc/pcre2_get_match_data_heapframes_size.3 \ + doc/pcre2_get_match_data_size.3 \ + doc/pcre2_get_ovector_count.3 \ + doc/pcre2_get_ovector_pointer.3 \ + doc/pcre2_get_startchar.3 \ + doc/pcre2_jit_compile.3 \ + doc/pcre2_jit_free_unused_memory.3 \ + doc/pcre2_jit_match.3 \ + doc/pcre2_jit_stack_assign.3 \ + doc/pcre2_jit_stack_create.3 \ + doc/pcre2_jit_stack_free.3 \ + doc/pcre2_maketables.3 \ + doc/pcre2_maketables_free.3 \ + doc/pcre2_match.3 \ + doc/pcre2_match_context_copy.3 \ + doc/pcre2_match_context_create.3 \ + doc/pcre2_match_context_free.3 \ + doc/pcre2_match_data_create.3 \ + doc/pcre2_match_data_create_from_pattern.3 \ + doc/pcre2_match_data_free.3 \ + doc/pcre2_pattern_convert.3 \ + doc/pcre2_pattern_info.3 \ + doc/pcre2_serialize_decode.3 \ + doc/pcre2_serialize_encode.3 \ + doc/pcre2_serialize_free.3 \ + doc/pcre2_serialize_get_number_of_codes.3 \ + doc/pcre2_set_bsr.3 \ + doc/pcre2_set_callout.3 \ + doc/pcre2_set_character_tables.3 \ + doc/pcre2_set_compile_extra_options.3 \ + doc/pcre2_set_compile_recursion_guard.3 \ + doc/pcre2_set_depth_limit.3 \ + doc/pcre2_set_glob_escape.3 \ + doc/pcre2_set_glob_separator.3 \ + doc/pcre2_set_heap_limit.3 \ + doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_compiled_length.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_max_varlookbehind.3 \ + doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_newline.3 \ + doc/pcre2_set_parens_nest_limit.3 \ + doc/pcre2_set_recursion_limit.3 \ + doc/pcre2_set_recursion_memory_management.3 \ + doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_substitute.3 \ + doc/pcre2_substring_copy_byname.3 \ + doc/pcre2_substring_copy_bynumber.3 \ + doc/pcre2_substring_free.3 \ + doc/pcre2_substring_get_byname.3 \ + doc/pcre2_substring_get_bynumber.3 \ + doc/pcre2_substring_length_byname.3 \ + doc/pcre2_substring_length_bynumber.3 \ + doc/pcre2_substring_list_free.3 \ + doc/pcre2_substring_list_get.3 \ + doc/pcre2_substring_nametable_scan.3 \ + doc/pcre2_substring_number_from_name.3 \ + doc/pcre2api.3 \ + doc/pcre2build.3 \ + doc/pcre2callout.3 \ + doc/pcre2compat.3 \ + doc/pcre2convert.3 \ + doc/pcre2demo.3 \ + doc/pcre2grep.1 \ + doc/pcre2jit.3 \ + doc/pcre2limits.3 \ + doc/pcre2matching.3 \ + doc/pcre2partial.3 \ + doc/pcre2pattern.3 \ + doc/pcre2perform.3 \ + doc/pcre2posix.3 \ + doc/pcre2sample.3 \ + doc/pcre2serialize.3 \ + doc/pcre2syntax.3 \ + doc/pcre2test.1 \ + doc/pcre2unicode.3 + + +# The Libtool libraries to install. We'll add to this later. +lib_LTLIBRARIES = $(am__append_2) $(am__append_3) $(am__append_4) \ + $(am__append_11) +check_SCRIPTS = +dist_noinst_SCRIPTS = RunTest $(am__append_45) + +# Additional files to delete on 'make clean', 'make distclean', +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. + +# RunTest and RunGrepTest should clean up after themselves, but just in case +# they don't, add their working files to CLEANFILES. +CLEANFILES = pcre2_dftables pcre2_jit_test pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 pcre2fuzzcheck-32 pcre2demo \ + src/pcre2_chartables.c testSinput test3input test3output \ + test3outputA test3outputB testtry teststdout teststderr \ + teststderrgrep testtemp1grep testtemp2grep testtrygrep \ + testNinputgrep +DISTCLEANFILES = src/config.h.in~ $(am__append_49) +MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic + +# Additional files to bundle with the distribution, over and above what +# the Autotools include by default. + +# These files contain additional m4 macros that are used by autoconf. + +# These files contain maintenance information + +# These are support files for building under VMS + +# These files are used in the preparation of a release + +# These files are usable versions of pcre2.h and config.h that are distributed +# for the benefit of people who are building PCRE2 manually, without the +# Autotools support. + +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +# The pcre2_chartables.c.dist file is the default version of +# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. + +# The JIT compiler lives in a separate directory, but its files are #included +# when pcre2_jit_compile.c is processed, so they must be distributed. + +# Some of the JIT sources are also in separate files that are #included. + +# PCRE2 demonstration program. Not built automatically. The point is that the +# users should build it themselves. So just distribute the source. +EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ + NON-AUTOTOOLS-BUILD HACKING vms/configure.com \ + vms/openvms_readme.txt vms/pcre2.h_patch vms/stdint.h \ + PrepareRelease CheckMan CleanTxt Detrail 132html \ + doc/index.html.src src/pcre2.h.generic src/config.h.generic \ + src/pcre2_ucptables.c src/pcre2_chartables.c.dist \ + src/sljit/sljitConfig.h src/sljit/sljitConfigCPU.h \ + src/sljit/sljitConfigInternal.h src/sljit/sljitLir.c \ + src/sljit/sljitLir.h src/sljit/sljitNativeARM_32.c \ + src/sljit/sljitNativeARM_64.c src/sljit/sljitNativeARM_T2_32.c \ + src/sljit/sljitNativeLOONGARCH_64.c \ + src/sljit/sljitNativeMIPS_32.c src/sljit/sljitNativeMIPS_64.c \ + src/sljit/sljitNativeMIPS_common.c \ + src/sljit/sljitNativePPC_32.c src/sljit/sljitNativePPC_64.c \ + src/sljit/sljitNativePPC_common.c \ + src/sljit/sljitNativeRISCV_32.c \ + src/sljit/sljitNativeRISCV_64.c \ + src/sljit/sljitNativeRISCV_common.c \ + src/sljit/sljitNativeS390X.c src/sljit/sljitNativeX86_32.c \ + src/sljit/sljitNativeX86_64.c \ + src/sljit/sljitNativeX86_common.c src/sljit/sljitSerialize.c \ + src/sljit/sljitUtils.c \ + src/sljit/allocator_src/sljitExecAllocatorApple.c \ + src/sljit/allocator_src/sljitExecAllocatorCore.c \ + src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \ + src/sljit/allocator_src/sljitExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitExecAllocatorWindows.c \ + src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \ + src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \ + src/sljit/allocator_src/sljitWXExecAllocatorWindows.c \ + src/pcre2_jit_match.c src/pcre2_jit_misc.c \ + src/pcre2_printint.c RunTest.bat $(am__append_44) \ + testdata/grepbinary testdata/grepfilelist testdata/grepinput \ + testdata/grepinput3 testdata/grepinput8 \ + testdata/grepinputC.bz2 testdata/grepinputC.gz \ + testdata/grepinputM testdata/grepinputv testdata/grepinputx \ + testdata/greplist testdata/grepnot.bz2 testdata/grepoutput \ + testdata/grepoutput8 testdata/grepoutputC \ + testdata/grepoutputCN testdata/grepoutputCNU \ + testdata/grepoutputCU testdata/grepoutputCbz2 \ + testdata/grepoutputCgz testdata/grepoutputN \ + testdata/grepoutputUN testdata/greppatN4 testdata/testbtables \ + testdata/testinput1 testdata/testinput2 testdata/testinput3 \ + testdata/testinput4 testdata/testinput5 testdata/testinput6 \ + testdata/testinput7 testdata/testinput8 testdata/testinput9 \ + testdata/testinput10 testdata/testinput11 testdata/testinput12 \ + testdata/testinput13 testdata/testinput14 testdata/testinput15 \ + testdata/testinput16 testdata/testinput17 testdata/testinput18 \ + testdata/testinput19 testdata/testinput20 testdata/testinput21 \ + testdata/testinput22 testdata/testinput23 testdata/testinput24 \ + testdata/testinput25 testdata/testinput26 \ + testdata/testinputEBC testdata/testinputheap \ + testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \ + testdata/testoutput3A testdata/testoutput3B \ + testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \ + testdata/testoutput7 testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 testdata/testoutput8-16-4 \ + testdata/testoutput8-32-2 testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 testdata/testoutput8-8-4 \ + testdata/testoutput9 testdata/testoutput10 \ + testdata/testoutput11-16 testdata/testoutput11-32 \ + testdata/testoutput12-16 testdata/testoutput12-32 \ + testdata/testoutput13 testdata/testoutput14-16 \ + testdata/testoutput14-32 testdata/testoutput14-8 \ + testdata/testoutput15 testdata/testoutput16 \ + testdata/testoutput17 testdata/testoutput18 \ + testdata/testoutput19 testdata/testoutput20 \ + testdata/testoutput21 testdata/testoutput22-16 \ + testdata/testoutput22-32 testdata/testoutput22-8 \ + testdata/testoutput23 testdata/testoutput24 \ + testdata/testoutput25 testdata/testoutput26 \ + testdata/testoutputEBC testdata/testoutputheap-16 \ + testdata/testoutputheap-32 testdata/testoutputheap-8 \ + testdata/valgrind-jit.supp testdata/wintestinput3 \ + testdata/wintestoutput3 perltest.sh src/pcre2demo.c \ + cmake/COPYING-CMAKE-SCRIPTS cmake/FindEditline.cmake \ + cmake/FindPackageHandleStandardArgs.cmake \ + cmake/FindReadline.cmake cmake/pcre2-config-version.cmake.in \ + cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in + +# These are the header files we'll install. We do not distribute pcre2.h +# because it is generated from pcre2.h.in. +nodist_include_HEADERS = src/pcre2.h +include_HEADERS = src/pcre2posix.h + +# This is the "config" script. +bin_SCRIPTS = pcre2-config +@WITH_REBUILD_CHARTABLES_TRUE@pcre2_dftables_SOURCES = src/pcre2_dftables.c +BUILT_SOURCES = src/pcre2_chartables.c +NODIST_SOURCES = src/pcre2_chartables.c +COMMON_SOURCES = \ + src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c \ + src/pcre2_compile.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_script_run.c \ + src/pcre2_serialize.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substitute.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c + +@WITH_PCRE2_8_TRUE@libpcre2_8_la_SOURCES = \ +@WITH_PCRE2_8_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_8_TRUE@nodist_libpcre2_8_la_SOURCES = \ +@WITH_PCRE2_8_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_8_TRUE@libpcre2_8_la_CFLAGS = -DPCRE2_CODE_UNIT_WIDTH=8 \ +@WITH_PCRE2_8_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) $(am__append_5) \ +@WITH_PCRE2_8_TRUE@ $(am__append_8) +@WITH_PCRE2_8_TRUE@libpcre2_8_la_LIBADD = +@WITH_PCRE2_16_TRUE@libpcre2_16_la_SOURCES = \ +@WITH_PCRE2_16_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_16_TRUE@nodist_libpcre2_16_la_SOURCES = \ +@WITH_PCRE2_16_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_16_TRUE@libpcre2_16_la_CFLAGS = \ +@WITH_PCRE2_16_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=16 \ +@WITH_PCRE2_16_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_16_TRUE@ $(AM_CFLAGS) $(am__append_6) \ +@WITH_PCRE2_16_TRUE@ $(am__append_9) +@WITH_PCRE2_16_TRUE@libpcre2_16_la_LIBADD = +@WITH_PCRE2_32_TRUE@libpcre2_32_la_SOURCES = \ +@WITH_PCRE2_32_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_32_TRUE@nodist_libpcre2_32_la_SOURCES = \ +@WITH_PCRE2_32_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_32_TRUE@libpcre2_32_la_CFLAGS = \ +@WITH_PCRE2_32_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=32 \ +@WITH_PCRE2_32_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_32_TRUE@ $(AM_CFLAGS) $(am__append_7) \ +@WITH_PCRE2_32_TRUE@ $(am__append_10) +@WITH_PCRE2_32_TRUE@libpcre2_32_la_LIBADD = +@WITH_PCRE2_8_TRUE@libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS) +@WITH_PCRE2_16_TRUE@libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS) +@WITH_PCRE2_32_TRUE@libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_SOURCES = src/pcre2posix.c +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_CFLAGS = \ +@WITH_PCRE2_8_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=8 \ +@WITH_PCRE2_8_TRUE@ @PCRE2POSIX_CFLAG@ $(VISIBILITY_CFLAGS) \ +@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) $(am__append_12) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_LIBADD = libpcre2-8.la +@WITH_PCRE2_8_TRUE@pcre2grep_SOURCES = src/pcre2grep.c +@WITH_PCRE2_8_TRUE@pcre2grep_CFLAGS = $(AM_CFLAGS) $(am__append_14) +@WITH_PCRE2_8_TRUE@pcre2grep_LDADD = $(LIBZ) $(LIBBZ2) libpcre2-8.la \ +@WITH_PCRE2_8_TRUE@ $(am__append_15) +@WITH_FUZZ_SUPPORT_TRUE@noinst_LIBRARIES = $(am__append_16) \ +@WITH_FUZZ_SUPPORT_TRUE@ $(am__append_20) $(am__append_24) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_18) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_19) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=16 \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__append_22) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ libpcre2-16.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__append_23) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=32 \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__append_26) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ libpcre2-32.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__append_27) +@WITH_PCRE2_8_TRUE@pcre2posix_test_SOURCES = src/pcre2posix_test.c +@WITH_PCRE2_8_TRUE@pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@ +@WITH_PCRE2_8_TRUE@pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la +@WITH_JIT_TRUE@pcre2_jit_test_SOURCES = src/pcre2_jit_test.c +@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_35) +@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_32) \ +@WITH_JIT_TRUE@ $(am__append_33) $(am__append_34) \ +@WITH_JIT_TRUE@ $(am__append_36) +pcre2test_SOURCES = src/pcre2test.c +pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_40) $(am__append_41) +pcre2test_LDADD = $(LIBREADLINE) $(am__append_37) $(am__append_38) \ + $(am__append_39) $(am__append_42) + +# We have .pc files for pkg-config users. +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = $(am__append_46) $(am__append_47) $(am__append_48) + +# gcov/lcov code coverage reporting +# +# Coverage reporting targets: +# +# coverage: Create a coverage report from 'make check' +# coverage-baseline: Capture baseline coverage information +# coverage-reset: This zeros the coverage counters only +# coverage-report: This creates the coverage report only +# coverage-clean-report: This removes the generated coverage report +# without cleaning the coverage data itself +# coverage-clean-data: This removes the captured coverage data without +# removing the coverage files created at compile time (*.gcno) +# coverage-clean: This cleans all coverage data including the generated +# coverage report. +@WITH_GCOV_TRUE@COVERAGE_TEST_NAME = $(PACKAGE) +@WITH_GCOV_TRUE@COVERAGE_NAME = $(PACKAGE)-$(VERSION) +@WITH_GCOV_TRUE@COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info +@WITH_GCOV_TRUE@COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage +@WITH_GCOV_TRUE@COVERAGE_LCOV_EXTRA_FLAGS = +@WITH_GCOV_TRUE@COVERAGE_GENHTML_EXTRA_FLAGS = +@WITH_GCOV_TRUE@coverage_quiet = $(coverage_quiet_$(V)) +@WITH_GCOV_TRUE@coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY)) +@WITH_GCOV_TRUE@coverage_quiet_0 = --quiet +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +src/config.h: src/stamp-h1 + @test -f $@ || rm -f src/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/stamp-h1 + +src/stamp-h1: $(top_srcdir)/src/config.h.in $(top_builddir)/config.status + @rm -f src/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status src/config.h +$(top_srcdir)/src/config.h.in: $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f src/stamp-h1 + touch $@ + +distclean-hdr: + -rm -f src/config.h src/stamp-h1 +libpcre2-8.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-8.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-16.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-16.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-32.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-32.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-posix.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-posix.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +pcre2-config: $(top_builddir)/config.status $(srcdir)/pcre2-config.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +src/pcre2.h: $(top_builddir)/config.status $(top_srcdir)/src/pcre2.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +src/$(am__dirstamp): + @$(MKDIR_P) src + @: > src/$(am__dirstamp) +src/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/$(DEPDIR) + @: > src/$(DEPDIR)/$(am__dirstamp) +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +.libs/$(am__dirstamp): + @$(MKDIR_P) .libs + @: > .libs/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport-16.a: $(_libs_libpcre2_fuzzsupport_16_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport-16.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_16_a_AR) .libs/libpcre2-fuzzsupport-16.a $(_libs_libpcre2_fuzzsupport_16_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_16_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport-16.a +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport-32.a: $(_libs_libpcre2_fuzzsupport_32_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport-32.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_32_a_AR) .libs/libpcre2-fuzzsupport-32.a $(_libs_libpcre2_fuzzsupport_32_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_32_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport-32.a +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport.a: $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_a_AR) .libs/libpcre2-fuzzsupport.a $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport.a +src/libpcre2_16_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-16.la: $(libpcre2_16_la_OBJECTS) $(libpcre2_16_la_DEPENDENCIES) $(EXTRA_libpcre2_16_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_16_la_LINK) $(am_libpcre2_16_la_rpath) $(libpcre2_16_la_OBJECTS) $(libpcre2_16_la_LIBADD) $(LIBS) +src/libpcre2_32_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-32.la: $(libpcre2_32_la_OBJECTS) $(libpcre2_32_la_DEPENDENCIES) $(EXTRA_libpcre2_32_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_32_la_LINK) $(am_libpcre2_32_la_rpath) $(libpcre2_32_la_OBJECTS) $(libpcre2_32_la_LIBADD) $(LIBS) +src/libpcre2_8_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-8.la: $(libpcre2_8_la_OBJECTS) $(libpcre2_8_la_DEPENDENCIES) $(EXTRA_libpcre2_8_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_8_la_LINK) $(am_libpcre2_8_la_rpath) $(libpcre2_8_la_OBJECTS) $(libpcre2_8_la_LIBADD) $(LIBS) +src/libpcre2_posix_la-pcre2posix.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-posix.la: $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_DEPENDENCIES) $(EXTRA_libpcre2_posix_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_posix_la_LINK) $(am_libpcre2_posix_la_rpath) $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_LIBADD) $(LIBS) +src/pcre2_dftables.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2_dftables$(EXEEXT): $(pcre2_dftables_OBJECTS) $(pcre2_dftables_DEPENDENCIES) $(EXTRA_pcre2_dftables_DEPENDENCIES) + @rm -f pcre2_dftables$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pcre2_dftables_OBJECTS) $(pcre2_dftables_LDADD) $(LIBS) +src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2_jit_test$(EXEEXT): $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_DEPENDENCIES) $(EXTRA_pcre2_jit_test_DEPENDENCIES) + @rm -f pcre2_jit_test$(EXEEXT) + $(AM_V_CCLD)$(pcre2_jit_test_LINK) $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_LDADD) $(LIBS) +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-16$(EXEEXT): $(pcre2fuzzcheck_16_OBJECTS) $(pcre2fuzzcheck_16_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_16_DEPENDENCIES) + @rm -f pcre2fuzzcheck-16$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_16_LINK) $(pcre2fuzzcheck_16_OBJECTS) $(pcre2fuzzcheck_16_LDADD) $(LIBS) +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-32$(EXEEXT): $(pcre2fuzzcheck_32_OBJECTS) $(pcre2fuzzcheck_32_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_32_DEPENDENCIES) + @rm -f pcre2fuzzcheck-32$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_32_LINK) $(pcre2fuzzcheck_32_OBJECTS) $(pcre2fuzzcheck_32_LDADD) $(LIBS) +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-8$(EXEEXT): $(pcre2fuzzcheck_8_OBJECTS) $(pcre2fuzzcheck_8_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_8_DEPENDENCIES) + @rm -f pcre2fuzzcheck-8$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_8_LINK) $(pcre2fuzzcheck_8_OBJECTS) $(pcre2fuzzcheck_8_LDADD) $(LIBS) +src/pcre2grep-pcre2grep.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2grep$(EXEEXT): $(pcre2grep_OBJECTS) $(pcre2grep_DEPENDENCIES) $(EXTRA_pcre2grep_DEPENDENCIES) + @rm -f pcre2grep$(EXEEXT) + $(AM_V_CCLD)$(pcre2grep_LINK) $(pcre2grep_OBJECTS) $(pcre2grep_LDADD) $(LIBS) +src/pcre2posix_test-pcre2posix_test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2posix_test$(EXEEXT): $(pcre2posix_test_OBJECTS) $(pcre2posix_test_DEPENDENCIES) $(EXTRA_pcre2posix_test_DEPENDENCIES) + @rm -f pcre2posix_test$(EXEEXT) + $(AM_V_CCLD)$(pcre2posix_test_LINK) $(pcre2posix_test_OBJECTS) $(pcre2posix_test_LDADD) $(LIBS) +src/pcre2test-pcre2test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2test$(EXEEXT): $(pcre2test_OBJECTS) $(pcre2test_DEPENDENCIES) $(EXTRA_pcre2test_DEPENDENCIES) + @rm -f pcre2test$(EXEEXT) + $(AM_V_CCLD)$(pcre2test_LINK) $(pcre2test_OBJECTS) $(pcre2test_LDADD) $(LIBS) +install-binSCRIPTS: $(bin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f src/*.$(OBJEXT) + -rm -f src/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_dftables.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2test-pcre2test.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/libpcre2_16_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_16_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_16_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_16_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Tpo -c -o src/libpcre2_16_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_16_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_16_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Tpo -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_16_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_16_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_16_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_16_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Tpo -c -o src/libpcre2_16_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_16_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_16_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Tpo -c -o src/libpcre2_16_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_16_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_16_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_16_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_16_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_16_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Tpo -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_16_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_16_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Tpo -c -o src/libpcre2_16_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_16_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_16_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_16_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_16_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_16_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_16_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Tpo -c -o src/libpcre2_16_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_16_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_16_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Tpo -c -o src/libpcre2_16_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_16_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_16_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Tpo -c -o src/libpcre2_16_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_16_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_16_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Tpo -c -o src/libpcre2_16_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_16_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_16_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_16_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_16_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_16_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_16_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_16_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_16_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_16_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_16_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo -c -o src/libpcre2_16_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_16_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_16_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Tpo -c -o src/libpcre2_16_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_16_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_16_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Tpo -c -o src/libpcre2_16_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_16_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_16_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Tpo -c -o src/libpcre2_16_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_16_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_16_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Tpo -c -o src/libpcre2_16_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_16_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_16_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Tpo -c -o src/libpcre2_16_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_16_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_16_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Tpo -c -o src/libpcre2_16_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_16_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_16_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_16_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_16_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_16_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Tpo -c -o src/libpcre2_16_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_16_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_16_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Tpo -c -o src/libpcre2_16_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_16_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_32_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_32_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_32_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_32_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Tpo -c -o src/libpcre2_32_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_32_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_32_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Tpo -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_32_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_32_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_32_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_32_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Tpo -c -o src/libpcre2_32_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_32_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_32_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Tpo -c -o src/libpcre2_32_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_32_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_32_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_32_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_32_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_32_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Tpo -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_32_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_32_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Tpo -c -o src/libpcre2_32_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_32_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_32_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_32_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_32_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_32_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_32_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Tpo -c -o src/libpcre2_32_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_32_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_32_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Tpo -c -o src/libpcre2_32_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_32_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_32_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Tpo -c -o src/libpcre2_32_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_32_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_32_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Tpo -c -o src/libpcre2_32_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_32_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_32_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_32_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_32_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_32_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_32_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_32_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_32_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_32_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_32_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo -c -o src/libpcre2_32_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_32_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_32_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Tpo -c -o src/libpcre2_32_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_32_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_32_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Tpo -c -o src/libpcre2_32_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_32_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_32_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Tpo -c -o src/libpcre2_32_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_32_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_32_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Tpo -c -o src/libpcre2_32_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_32_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_32_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Tpo -c -o src/libpcre2_32_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_32_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_32_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Tpo -c -o src/libpcre2_32_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_32_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_32_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_32_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_32_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_32_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Tpo -c -o src/libpcre2_32_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_32_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_32_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Tpo -c -o src/libpcre2_32_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_32_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_8_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_8_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_8_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_8_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Tpo -c -o src/libpcre2_8_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_8_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_8_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Tpo -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_8_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_8_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_8_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_8_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Tpo -c -o src/libpcre2_8_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_8_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_8_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Tpo -c -o src/libpcre2_8_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_8_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_8_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_8_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_8_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_8_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Tpo -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_8_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_8_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Tpo -c -o src/libpcre2_8_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_8_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_8_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_8_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_8_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_8_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_8_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Tpo -c -o src/libpcre2_8_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_8_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_8_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Tpo -c -o src/libpcre2_8_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_8_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_8_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Tpo -c -o src/libpcre2_8_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_8_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_8_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Tpo -c -o src/libpcre2_8_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_8_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_8_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_8_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_8_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_8_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_8_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_8_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_8_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_8_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_8_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo -c -o src/libpcre2_8_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_8_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_8_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Tpo -c -o src/libpcre2_8_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_8_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_8_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Tpo -c -o src/libpcre2_8_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_8_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_8_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Tpo -c -o src/libpcre2_8_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_8_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_8_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Tpo -c -o src/libpcre2_8_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_8_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_8_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Tpo -c -o src/libpcre2_8_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_8_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_8_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Tpo -c -o src/libpcre2_8_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_8_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_8_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_8_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_8_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_8_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Tpo -c -o src/libpcre2_8_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_8_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_8_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Tpo -c -o src/libpcre2_8_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_8_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_posix_la-pcre2posix.lo: src/pcre2posix.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_posix_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_posix_la-pcre2posix.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Tpo -c -o src/libpcre2_posix_la-pcre2posix.lo `test -f 'src/pcre2posix.c' || echo '$(srcdir)/'`src/pcre2posix.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Tpo src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix.c' object='src/libpcre2_posix_la-pcre2posix.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_posix_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_posix_la-pcre2posix.lo `test -f 'src/pcre2posix.c' || echo '$(srcdir)/'`src/pcre2posix.c + +src/pcre2_jit_test-pcre2_jit_test.o: src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -MT src/pcre2_jit_test-pcre2_jit_test.o -MD -MP -MF src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo -c -o src/pcre2_jit_test-pcre2_jit_test.o `test -f 'src/pcre2_jit_test.c' || echo '$(srcdir)/'`src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_test.c' object='src/pcre2_jit_test-pcre2_jit_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -c -o src/pcre2_jit_test-pcre2_jit_test.o `test -f 'src/pcre2_jit_test.c' || echo '$(srcdir)/'`src/pcre2_jit_test.c + +src/pcre2_jit_test-pcre2_jit_test.obj: src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -MT src/pcre2_jit_test-pcre2_jit_test.obj -MD -MP -MF src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo -c -o src/pcre2_jit_test-pcre2_jit_test.obj `if test -f 'src/pcre2_jit_test.c'; then $(CYGPATH_W) 'src/pcre2_jit_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_jit_test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_test.c' object='src/pcre2_jit_test-pcre2_jit_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -c -o src/pcre2_jit_test-pcre2_jit_test.obj `if test -f 'src/pcre2_jit_test.c'; then $(CYGPATH_W) 'src/pcre2_jit_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_jit_test.c'; fi` + +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2grep-pcre2grep.o: src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -MT src/pcre2grep-pcre2grep.o -MD -MP -MF src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo -c -o src/pcre2grep-pcre2grep.o `test -f 'src/pcre2grep.c' || echo '$(srcdir)/'`src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo src/$(DEPDIR)/pcre2grep-pcre2grep.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2grep.c' object='src/pcre2grep-pcre2grep.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -c -o src/pcre2grep-pcre2grep.o `test -f 'src/pcre2grep.c' || echo '$(srcdir)/'`src/pcre2grep.c + +src/pcre2grep-pcre2grep.obj: src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -MT src/pcre2grep-pcre2grep.obj -MD -MP -MF src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo -c -o src/pcre2grep-pcre2grep.obj `if test -f 'src/pcre2grep.c'; then $(CYGPATH_W) 'src/pcre2grep.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2grep.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo src/$(DEPDIR)/pcre2grep-pcre2grep.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2grep.c' object='src/pcre2grep-pcre2grep.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -c -o src/pcre2grep-pcre2grep.obj `if test -f 'src/pcre2grep.c'; then $(CYGPATH_W) 'src/pcre2grep.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2grep.c'; fi` + +src/pcre2posix_test-pcre2posix_test.o: src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -MT src/pcre2posix_test-pcre2posix_test.o -MD -MP -MF src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo -c -o src/pcre2posix_test-pcre2posix_test.o `test -f 'src/pcre2posix_test.c' || echo '$(srcdir)/'`src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix_test.c' object='src/pcre2posix_test-pcre2posix_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -c -o src/pcre2posix_test-pcre2posix_test.o `test -f 'src/pcre2posix_test.c' || echo '$(srcdir)/'`src/pcre2posix_test.c + +src/pcre2posix_test-pcre2posix_test.obj: src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -MT src/pcre2posix_test-pcre2posix_test.obj -MD -MP -MF src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo -c -o src/pcre2posix_test-pcre2posix_test.obj `if test -f 'src/pcre2posix_test.c'; then $(CYGPATH_W) 'src/pcre2posix_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2posix_test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix_test.c' object='src/pcre2posix_test-pcre2posix_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -c -o src/pcre2posix_test-pcre2posix_test.obj `if test -f 'src/pcre2posix_test.c'; then $(CYGPATH_W) 'src/pcre2posix_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2posix_test.c'; fi` + +src/pcre2test-pcre2test.o: src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -MT src/pcre2test-pcre2test.o -MD -MP -MF src/$(DEPDIR)/pcre2test-pcre2test.Tpo -c -o src/pcre2test-pcre2test.o `test -f 'src/pcre2test.c' || echo '$(srcdir)/'`src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2test-pcre2test.Tpo src/$(DEPDIR)/pcre2test-pcre2test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2test.c' object='src/pcre2test-pcre2test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -c -o src/pcre2test-pcre2test.o `test -f 'src/pcre2test.c' || echo '$(srcdir)/'`src/pcre2test.c + +src/pcre2test-pcre2test.obj: src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -MT src/pcre2test-pcre2test.obj -MD -MP -MF src/$(DEPDIR)/pcre2test-pcre2test.Tpo -c -o src/pcre2test-pcre2test.obj `if test -f 'src/pcre2test.c'; then $(CYGPATH_W) 'src/pcre2test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2test-pcre2test.Tpo src/$(DEPDIR)/pcre2test-pcre2test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2test.c' object='src/pcre2test-pcre2test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -c -o src/pcre2test-pcre2test.obj `if test -f 'src/pcre2test.c'; then $(CYGPATH_W) 'src/pcre2test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2test.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf src/.libs src/_libs + +distclean-libtool: + -rm -f libtool config.lt +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-man3: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man3dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man3dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man3dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.3[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man3dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man3dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man3dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man3dir)" || exit $$?; }; \ + done; } + +uninstall-man3: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man3dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.3[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man3dir)'; $(am__uninstall_files_from_dir) +install-dist_docDATA: $(dist_doc_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(docdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(docdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(docdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(docdir)" || exit $$?; \ + done + +uninstall-dist_docDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(docdir)'; $(am__uninstall_files_from_dir) +install-dist_htmlDATA: $(dist_html_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done + +uninstall-dist_htmlDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir) +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nodist_includeHEADERS: $(nodist_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-nodist_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_SCRIPTS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_SCRIPTS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +pcre2posix_test.log: pcre2posix_test$(EXEEXT) + @p='pcre2posix_test$(EXEEXT)'; \ + b='pcre2posix_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +pcre2_jit_test.log: pcre2_jit_test$(EXEEXT) + @p='pcre2_jit_test$(EXEEXT)'; \ + b='pcre2_jit_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +RunTest.log: RunTest + @p='RunTest'; \ + b='RunTest'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +RunGrepTest.log: RunGrepTest + @p='RunGrepTest'; \ + b='RunGrepTest'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-zstd: distdir + tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + *.tar.zst*) \ + zstd -dc $(distdir).tar.zst | $(am__untar) ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) $(AM_DISTCHECK_DVI_TARGET) \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_SCRIPTS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) $(LIBRARIES) $(LTLIBRARIES) $(SCRIPTS) \ + $(MANS) $(DATA) $(HEADERS) +install-binPROGRAMS: install-libLTLIBRARIES + +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f .libs/$(am__dirstamp) + -rm -f src/$(DEPDIR)/$(am__dirstamp) + -rm -f src/$(am__dirstamp) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +@WITH_GCOV_FALSE@clean-local: +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool clean-local clean-noinstLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo + -rm -f src/$(DEPDIR)/pcre2_dftables.Po + -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po + -rm -f src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po + -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-local distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-dist_docDATA install-dist_htmlDATA \ + install-includeHEADERS install-man \ + install-nodist_includeHEADERS install-pkgconfigDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS install-binSCRIPTS \ + install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 install-man3 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo + -rm -f src/$(DEPDIR)/pcre2_dftables.Po + -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po + -rm -f src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po + -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS \ + uninstall-dist_docDATA uninstall-dist_htmlDATA \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES \ + uninstall-man uninstall-nodist_includeHEADERS \ + uninstall-pkgconfigDATA + +uninstall-man: uninstall-man1 uninstall-man3 + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles am--refresh check \ + check-TESTS check-am clean clean-binPROGRAMS clean-cscope \ + clean-generic clean-libLTLIBRARIES clean-libtool clean-local \ + clean-noinstLIBRARIES clean-noinstPROGRAMS cscope \ + cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + dist-zstd distcheck distclean distclean-compile \ + distclean-generic distclean-hdr distclean-libtool \ + distclean-local distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-binPROGRAMS install-binSCRIPTS \ + install-data install-data-am install-dist_docDATA \ + install-dist_htmlDATA install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-man1 install-man3 \ + install-nodist_includeHEADERS install-pdf install-pdf-am \ + install-pkgconfigDATA install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + recheck tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS uninstall-binSCRIPTS \ + uninstall-dist_docDATA uninstall-dist_htmlDATA \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES \ + uninstall-man uninstall-man1 uninstall-man3 \ + uninstall-nodist_includeHEADERS uninstall-pkgconfigDATA + +.PRECIOUS: Makefile + + +# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE +# version number. Therefore, we can create the generic version just by copying. + +src/pcre2.h.generic: src/pcre2.h.in configure.ac + rm -f $@ + cp -p src/pcre2.h $@ + +# It is more complicated for config.h.generic. We need the version that results +# from a default configuration so as to get all the default values for PCRE +# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by +# doing a configure in a temporary directory. However, some trickery is needed, +# because the source directory may already be configured. If you just try +# running configure in a new directory, it complains. For this reason, we move +# config.status out of the way while doing the default configuration. The +# resulting config.h is munged by perl to put #ifdefs round any #defines for +# macros with values, and to #undef all boolean macros such as HAVE_xxx and +# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. + +src/config.h.generic: configure.ac + rm -rf $@ _generic + mkdir _generic + cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside + cd _generic && $(abs_top_srcdir)/configure || : + cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs + test -f _generic/src/config.h + perl -n \ + -e 'BEGIN{$$blank=0;}' \ + -e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \ + -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \ + -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \ + -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \ + -e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \ + _generic/src/config.h >$@ + rm -rf _generic +@WITH_REBUILD_CHARTABLES_TRUE@src/pcre2_chartables.c: pcre2_dftables$(EXEEXT) +@WITH_REBUILD_CHARTABLES_TRUE@ rm -f $@ +@WITH_REBUILD_CHARTABLES_TRUE@ ./pcre2_dftables$(EXEEXT) $@ +@WITH_REBUILD_CHARTABLES_FALSE@src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist +@WITH_REBUILD_CHARTABLES_FALSE@ rm -f $@ +@WITH_REBUILD_CHARTABLES_FALSE@ $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c + +@WITH_GCOV_TRUE@coverage-check: all +@WITH_GCOV_TRUE@ -$(MAKE) $(AM_MAKEFLAGS) -k check + +@WITH_GCOV_TRUE@coverage-baseline: +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --capture \ +@WITH_GCOV_TRUE@ --initial + +@WITH_GCOV_TRUE@coverage-report: +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --capture \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE).tmp" \ +@WITH_GCOV_TRUE@ --test-name "$(COVERAGE_TEST_NAME)" \ +@WITH_GCOV_TRUE@ --no-checksum \ +@WITH_GCOV_TRUE@ --compat-libtool \ +@WITH_GCOV_TRUE@ $(COVERAGE_LCOV_EXTRA_FLAGS) +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --remove "$(COVERAGE_OUTPUT_FILE).tmp" \ +@WITH_GCOV_TRUE@ "/tmp/*" \ +@WITH_GCOV_TRUE@ "/usr/include/*" \ +@WITH_GCOV_TRUE@ "$(includedir)/*" +@WITH_GCOV_TRUE@ -@rm -f "$(COVERAGE_OUTPUT_FILE).tmp" +@WITH_GCOV_TRUE@ LANG=C $(GENHTML) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --prefix $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-directory "$(COVERAGE_OUTPUT_DIR)" \ +@WITH_GCOV_TRUE@ --title "$(PACKAGE) $(VERSION) Code Coverage Report" \ +@WITH_GCOV_TRUE@ --show-details "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --legend \ +@WITH_GCOV_TRUE@ $(COVERAGE_GENHTML_EXTRA_FLAGS) +@WITH_GCOV_TRUE@ @echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html" + +@WITH_GCOV_TRUE@coverage-reset: +@WITH_GCOV_TRUE@ -$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir) + +@WITH_GCOV_TRUE@coverage-clean-report: +@WITH_GCOV_TRUE@ -rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp" +@WITH_GCOV_TRUE@ -rm -rf "$(COVERAGE_OUTPUT_DIR)" + +@WITH_GCOV_TRUE@coverage-clean-data: +@WITH_GCOV_TRUE@ -find $(top_builddir) -name "*.gcda" -delete + +@WITH_GCOV_TRUE@coverage-clean: coverage-reset coverage-clean-report coverage-clean-data +@WITH_GCOV_TRUE@ -find $(top_builddir) -name "*.gcno" -delete + +@WITH_GCOV_TRUE@coverage-distclean: coverage-clean + +@WITH_GCOV_TRUE@coverage: coverage-reset coverage-baseline coverage-check coverage-report +@WITH_GCOV_TRUE@clean-local: coverage-clean +@WITH_GCOV_TRUE@distclean-local: coverage-distclean + +@WITH_GCOV_TRUE@.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean + +# Without coverage support, still arrange for 'make distclean' to get rid of +# any coverage files that may have been left from a different configuration. + +@WITH_GCOV_FALSE@coverage: +@WITH_GCOV_FALSE@ @echo "Configuring with --enable-coverage is required to generate code coverage report." + +@WITH_GCOV_FALSE@distclean-local: +@WITH_GCOV_FALSE@ rm -rf $(PACKAGE)-$(VERSION)-coverage* + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..5f8dde3 --- /dev/null +++ b/NEWS @@ -0,0 +1,492 @@ +News about PCRE2 releases +------------------------- + + +Version 10.44 07-June-2024 +-------------------------- + +This is mostly a bug-fix and tidying release. There is one new function, to set +a maximum size for a compiled pattern. The maximum name length for groups is +increased to 128. Some auxiliary files for building under VMS are added. + + +Version 10.43 16-February-2024 +------------------------------ + +There are quite a lot of changes in this release (see ChangeLog and git log for +a list). Those that are not bugfixes or code tidies are: + +* The JIT code no longer supports ARMv5 architecture. + +* A new function pcre2_get_match_data_heapframes_size() for finer heap control. + +* New option flags to restrict the interaction between ASCII and non-ASCII + characters for caseless matching and \d and friends. There are also new + pattern constructs to control these flags from within a pattern. + +* Upgrade to Unicode 15.0.0. + +* Treat a NULL pattern with zero length as an empty string. + +* Added support for limited-length variable-length lookbehind assertions, with + a default maximum length of 255 characters (same as Perl) but with a function + to adjust the limit. + +* Support for LoongArch in JIT. + +* Perl changed the meaning of (for example) {,3} which did not used to be + recognized as a quantifier. Now it means {0,3} and PCRE2 has also changed. + Note that {,} is still not a quantifier. + +* Following Perl, allow spaces and tabs after { and before } in all Perl- + compatible items that use braces, and also around commas in quantifiers. The + one exception in PCRE2 is \u{...}, which is from ECMAScript, not Perl, and + PCRE2 follows ECMAScript usage. + +* Changed the meaning of \w and its synonyms and derivatives (\b and \B) in UCP + mode to follow Perl. It now matches characters whose general categories are L + or N or whose particular categories are Mn (non-spacing mark) or Pc + (combining punctuation). + +* Changed the default meaning of [:xdigit:] in UCP mode to follow Perl. It now + matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can + be used to keep it ASCII only. + +* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp, + --case-restrict and --posix-digit. + +* Add --group-separator and --no-group-separator to pcre2grep. + + +Version 10.42 11-December-2022 +------------------------------ + +This is an unexpectedly early release to fix a problem that was introduced in +10.41. ChangeLog number 19 (GitHub #139) added the default definition of +PCRE2_CALL_CONVENTION to pcre2posix.c instead of pcre2posix.h, which meant that +programs including pcre2posix.h but not pcre2.h couldn't compile. A new test +that checks this case has been added. + +A couple of other minor issues are also fixed, and a patch for an intermittent +JIT fault is also included. See ChangeLog and the Git log. + + +Version 10.41 06-December-2022 +------------------------------ + +This is another mainly bug-fixing and code-tidying release. There is one +significant upgrade to pcre2grep: it now behaves like GNU grep when matching +more than one pattern and a later pattern matches at an earlier point in the +subject when the matched substrings are being identified by colour or by +offsets. + + +Version 10.40 15-April-2022 +--------------------------- + +This is mostly a bug-fixing and code-tidying release. However, there are some +extensions to Unicode property handling: + +* Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +* A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +As always, see ChangeLog for a list of all changes (also the Git log). + + +Version 10.39 29-October-2021 +----------------------------- + +This release is happening soon after 10.38 because the bug fix is important. + +1. Fix incorrect detection of alternatives in first character search in JIT. + +2. Update to Unicode 14.0.0. + +3. Some code cleanups (see ChangeLog). + + +Version 10.38 01-October-2021 +----------------------------- + +As well as some bug fixes and tidies (as always, see ChangeLog for details), +the documentation is updated to list the new URLs, following the move of the +source repository to GitHub and the mailing list to Google Groups. + +* The CMake build system can now build both static and shared libraries in one +go. + +* Following Perl's lead, \K is now locked out in lookaround assertions by +default, but an option is provided to re-enable the previous behaviour. + + +Version 10.37 26-May-2021 +------------------------- + +A few more bug fixes and tidies. The only change of real note is the removal of +the actual POSIX names regcomp etc. from the POSIX wrapper library because +these have caused issues for some applications (see 10.33 #2 below). + + +Version 10.36 04-December-2020 +------------------------------ + +Again, mainly bug fixes and tidies. The only enhancements are the addition of +GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the +handling of substitution strings for both -O and callouts in pcre2grep, with +the addition of $x{...} and $o{...} to allow for characters whose code points +are greater than 255 in Unicode mode. + +NOTE: there is an outstanding issue with JIT support for MacOS on arm64 +hardware. For details, please see Bugzilla issue #2618. + + +Version 10.35 15-April-2020 +--------------------------- + +Bugfixes, tidies, and a few new enhancements. + +1. Capturing groups that contain recursive backreferences to themselves are no +longer automatically atomic, because the restriction is no longer necessary +as a result of the 10.30 restructuring. + +2. Several new options for pcre2_substitute(). + +3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode +character properties are used for upper/lower case computations on characters +whose code points are greater than 127. + +4. The character tables (for low-valued characters) can now more easily be +saved and restored in binary. + +5. Updated to Unicode 13.0.0. + + +Version 10.34 21-November-2019 +------------------------------ + +Another release with a few enhancements as well as bugfixes and tidies. The +main new features are: + +1. There is now some support for matching in invalid UTF strings. + +2. Non-atomic positive lookarounds are implemented in the pcre2_match() +interpreter, but not in JIT. + +3. Added two new functions: pcre2_get_match_data_size() and +pcre2_maketables_free(). + +4. Upgraded to Unicode 12.1.0. + + +Version 10.33 16-April-2019 +--------------------------- + +Yet more bugfixes, tidies, and a few enhancements, summarized here (see +ChangeLog for the full list): + +1. Callouts from pcre2_substitute() are now available. + +2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments, while still exporting the POSIX names for +pre-existing programs that use them. + +3. Some new options: + + (a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n. + + (b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...} + construct. + + (c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be + made, instead of just remembering a pointer. + +4. Some new Perl features: + + (a) Perl 5.28's experimental alphabetic names for atomic groups and + lookaround assertions, for example, (*pla:...) and (*atomic:...). + + (b) The new Perl "script run" features (*script_run:...) and + (*atomic_script_run:...) aka (*sr:...) and (*asr:...). + + (c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in + capture group names. + +5. --disable-percent-zt disables the use of %zu and %td in formatting strings +in pcre2test. They were already automatically disabled for VC and older C +compilers. + +6. Some changes related to callouts in pcre2grep: + + (a) Support for running an external program under VMS has been added, in + addition to Windows and fork() support. + + (b) --disable-pcre2grep-callout-fork restricts the callout support in + to the inbuilt echo facility. + + +Version 10.32 10-September-2018 +------------------------------- + +This is another mainly bugfix and tidying release with a few minor +enhancements. These are the main ones: + +1. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +2. ./configure now supports --enable-jit=auto, which automatically enables JIT +if the hardware supports it. + +3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for +local workspace and local ovectors. Instead, an initial block of stack is +reserved, but if this is insufficient, heap memory is used. The heap limit +parameter now applies to pcre2_dfa_match(). + +4. Updated to Unicode version 11.0.0. + +5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +6. Added support for \N{U+dddd}, but only in Unicode mode. + +7. Added support for (?^) to unset all imnsx options. + + +Version 10.31 12-February-2018 +------------------------------ + +This is mainly a bugfix and tidying release (see ChangeLog for full details). +However, there are some minor enhancements. + +1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the +extra compile time options. + +3. There are now public names for all the pcre2_compile() error numbers. + +4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. + + +Version 10.30 14-August-2017 +---------------------------- + +The full list of changes that includes bugfixes and tidies is, as always, in +ChangeLog. These are the most important new features: + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the system stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +previously hard-to-do issues. For patterns that have a lot of backtracking, the +heap is now used, and there is an explicit limit on the amount, settable by +pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained, +but is renamed as "depth limit" (though the old names remain for +compatibility). + +There is also a change in the way callouts from pcre2_match() are handled. The +offset_vector field in the callout block is no longer a pointer to the +actual ovector that was passed to the matching function in the match data +block. Instead it points to an internal ovector of a size large enough to hold +all possible captured substrings in the pattern. + +2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at +the end of the subject. + +3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and +pcre2test is upgraded to support it. Setting within the pattern by (?xx) is +also supported. + +4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +5. Additional compile options in the compile context are now available, and the +first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and +PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +6. The newline type PCRE2_NEWLINE_NUL is now available. + +7. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. + +8. The option REG_PEND (a GNU extension) is now available for the POSIX +wrapper. Also there is a new option PCRE2_LITERAL which is used to support +REG_NOSPEC. + +9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the +benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented +using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This +is tidier and also fixes some bugs. + +10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0. + +11. There are some experimental functions for converting foreign patterns +(globs and POSIX patterns) into PCRE2 patterns. + + +Version 10.23 14-February-2017 +------------------------------ + +1. ChangeLog has the details of a lot of bug fixes and tidies. + +2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed (see ChangeLog for +details.) + +3. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +4. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +5. pcre2grep now automatically expands its buffer up to a maximum set by +--max-buffer-size. + +6. The -t option (grand total) has been added to pcre2grep. + +7. A new function called pcre2_code_copy_with_tables() exists to copy a +compiled pattern along with a private copy of the character tables that is +uses. + +8. A user supplied a number of patches to upgrade pcre2grep under Windows and +tidy the code. + +9. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.22 29-July-2016 +-------------------------- + +1. ChangeLog has the details of a number of bug fixes. + +2. The POSIX wrapper function regcomp() did not used to support back references +and subroutine calls if called with the REG_NOSUB option. It now does. + +3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled +pattern. + +4. Support for string callouts is added to pcre2grep. + +5. Added the PCRE2_NO_JIT option to pcre2_match(). + +6. The pcre2_get_error_message() function now returns with a negative error +code if the error number it is given is unknown. + +7. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.21 12-January-2016 +----------------------------- + +1. Many bugs have been fixed. A large number of them were provoked only by very +strange pattern input, and were discovered by fuzzers. Some others were +discovered by code auditing. See ChangeLog for details. + +2. The Unicode tables have been updated to Unicode version 8.0.0. + +3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +4. There have been a number of enhancements to the pcre2_substitute() function, +giving more flexibility to replacement facilities. It is now also possible to +cause the function to return the needed buffer size if the one given is too +small. + +5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such +as (*THEN:name) to be processed for backslashes and to take note of +PCRE2_EXTENDED. + +6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a +pattern uses \C, and --never-backslash-C makes it possible to compile a version +PCRE2 in which the use of \C is always forbidden. + +7. A limit to the length of pattern that can be handled can now be set by +calling pcre2_set_max_pattern_length(). + +8. When matching an unanchored pattern, a match can be required to begin within +a given number of code units after the start of the subject by calling +pcre2_set_offset_limit(). + +9. The pcre2test program has been extended to test new facilities, and it can +now run the tests when LF on its own is not a valid newline sequence. + +10. The RunTest script has also been updated to enable more tests to be run. + +11. There have been some minor performance enhancements. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments and the pcre2_callout_enumerate() function +have been implemented. + +2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added. + +3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a +subject in multiline mode. + +4. The way named subpatterns are handled has been refactored. The previous +approach had several bugs. + +5. The handling of \c in EBCDIC environments has been changed to conform to the +perlebcdic document. This is an incompatible change. + +6. Bugs have been mended, many of them discovered by fuzzers. + + +Version 10.10 06-March-2015 +--------------------------- + +1. Serialization and de-serialization functions have been added to the API, +making it possible to save and restore sets of compiled patterns, though +restoration must be done in the same environment that was used for compilation. + +2. The (*NO_JIT) feature has been added; this makes it possible for a pattern +creator to specify that JIT is not to be used. + +3. A number of bugs have been fixed. In particular, bugs that caused building +on Windows using CMake to fail have been mended. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. New programs are recommended to use the +new library. Programs that use the original (PCRE1) API will need changing +before linking with the new library. + +**** diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD new file mode 100644 index 0000000..851976a --- /dev/null +++ b/NON-AUTOTOOLS-BUILD @@ -0,0 +1,430 @@ +Building PCRE2 without using autotools +-------------------------------------- + +This document contains the following sections: + + General + Generic instructions for the PCRE2 C libraries + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE2 on Windows with CMake + Building PCRE2 on Windows with Visual Studio + Testing with RunTest.bat + Building PCRE2 on native z/OS and z/VM + Building PCRE2 under VMS + + +GENERAL + +The source of the PCRE2 libraries consists entirely of code written in Standard +C, and so should compile successfully on any system that has a Standard C +compiler and library. + +The PCRE2 distribution includes a "configure" file for use by the +configure/make (autotools) build system, as found in many Unix-like +environments. The README file contains information about the options for +"configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE2 on Windows with CMake" below. + +Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs +under the names src/config.h.generic and src/pcre2.h.generic. These are +provided for those who build PCRE2 without using "configure" or CMake. If you +use "configure" or CMake, the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES + +There are three possible PCRE2 libraries, each handling data with a specific +code unit width: 8, 16, or 32 bits. You can build any combination of them. The +following are generic instructions for building a PCRE2 C library "by hand". If +you are going to use CMake, this section does not apply to you; you can skip +ahead to the CMake section. Note that the settings concerned with 8-bit, +16-bit, and 32-bit code units relate to the type of data string that PCRE2 +processes. They are NOT referring to the underlying operating system bit width. +You do not have to do anything special to compile in a 64-bit environment, for +example. + + (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the + macro settings that it contains to whatever is appropriate for your + environment. In particular, you can alter the definition of the NEWLINE + macro to specify what character(s) you want to be interpreted as line + terminators by default. You need to #define at least one of + SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which + libraries you are going to build. You must set all that apply. + + When you subsequently compile any of the PCRE2 modules, you must specify + -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the + sources. + + An alternative approach is not to edit src/config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in src/config.h are used has changed between releases. (In the + configure/make world, this is handled automatically.) When upgrading to a + new release, you are strongly advised to review src/config.h.generic + before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_DLFCN_H). + + (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. + + (3) EITHER: + Copy or rename file src/pcre2_chartables.c.dist as + src/pcre2_chartables.c. + + OR: + Compile src/pcre2_dftables.c as a stand-alone program (using + -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with + the single argument "src/pcre2_chartables.c". This generates a set of + standard character tables and writes them to that file. The tables are + generated using the default C locale for your system. If you want to use + a locale that is specified by LC_xxx environment variables, add the -L + option to the pcre2_dftables command. You must use this method if you + are building on a system that uses EBCDIC code. + + The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can + specify alternative tables at run time. + + (4) For a library that supports 8-bit code units in the character strings that + it processes, compile the following source files from the src directory, + setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set + -DHAVE_CONFIG_H if you have set up src/config.h with your configuration, + or else use other -D settings to change the configuration as required. + + pcre2_auto_possess.c + pcre2_chkdint.c + pcre2_chartables.c + pcre2_compile.c + pcre2_config.c + pcre2_context.c + pcre2_convert.c + pcre2_dfa_match.c + pcre2_error.c + pcre2_extuni.c + pcre2_find_bracket.c + pcre2_jit_compile.c + pcre2_maketables.c + pcre2_match.c + pcre2_match_data.c + pcre2_newline.c + pcre2_ord2utf.c + pcre2_pattern_info.c + pcre2_script_run.c + pcre2_serialize.c + pcre2_string_utils.c + pcre2_study.c + pcre2_substitute.c + pcre2_substring.c + pcre2_tables.c + pcre2_ucd.c + pcre2_valid_utf.c + pcre2_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE2 header files are first + sought in the src directory under the current directory. Otherwise you run + the risk of picking up a previously-installed file from somewhere else. + + Note that you must compile pcre2_jit_compile.c, even if you have not + defined SUPPORT_JIT in src/config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre2_jit_compile.c #includes other files from the sljit subdirectory, + all of whose names begin with "sljit". It also #includes + src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile + those yourself. + + Note also that the pcre2_fuzzsupport.c file contains special code that is + useful to those who want to run fuzzing tests on the PCRE2 library. Unless + you are doing that, you can ignore it. + + (5) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the PCRE2 C 8-bit library, + typically called something like libpcre2-8. If your system has static and + shared libraries, you may have to do this once for each type. + + (6) If you want to build a library that supports 16-bit or 32-bit code units, + set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4 + above. If you want to build more than one PCRE2 library, repeat steps 4 + and 5 as necessary. + + (7) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the src/pcre2posix.h file and then + compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix + library. If targeting a DLL in Windows, make sure to include + -DPCRE2POSIX_SHARED with your compiler flags. + + (8) The pcre2test program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you specfied in + src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if + necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the + appropriate library/ies. If you compiled an 8-bit library, pcre2test also + needs the pcre2posix wrapper library. + + (9) Run pcre2test on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE2" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcre2test with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for Unicode support, and will not run if you have + built PCRE2 without it. See the comments at the start of each testinput + file. If you have a suitable Unix-like shell, the RunTest script will run + the appropriate tests for you. The command "RunTest list" will output a + list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. + +(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested + by running pcre2test with the -jit option. This is done automatically by + the RunTest script. You might also like to build and run the freestanding + JIT test program, src/pcre2_jit_test.c. + +(11) The pcre2test program tests the POSIX wrapper library, but there is also a + freestanding test program in src/pcre2posix_test.c. It must be linked with + both the pcre2posix library and the 8-bit PCRE2 library. + +(12) If you want to use the pcre2grep command, compile and link + src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need + the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +Prior to release 10.30 the default system stack size of 1MiB in some Windows +environments caused issues with some tests. This should no longer be the case +for 10.30 and later releases. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE2 library in the form of +a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in a +PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE2 using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE2 under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + +On both MinGW and Cygwin, PCRE2 should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre2-8 and libpcre2-posix. These +are independent libraries: when you link with libpcre2-posix you must also link +with libpcre2-8, which contains the basic functions. + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE2 to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE2 are in UNIX format, with LF +characters as line terminators. Unless your PCRE2 library uses a default +newline option that includes LF as a valid newline, it may be necessary to +change the line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE2 ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE2 source and build +directories. + +The following instructions were contributed by a PCRE1 user, but they should +also work for PCRE2. If they are not followed exactly, errors may occur. In the +event that errors do occur, it is recommended that you delete the CMake cache +before attempting to repeat the CMake build process. In the CMake GUI, the +cache can be deleted by selecting "File > Delete Cache". + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. + +4. Run cmake-gui from the Shell environment of your build tool, for example, + Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try + to start Cmake from the Windows Start menu, as this can lead to errors. + +5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. + +6. Hit the "Configure" button. + +7. Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + +8. The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. + +9. Hit "Configure" again. The adjacent "Generate" button should now be + active. + +10. Hit "Generate". + +11. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + +12. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + +BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO + +The code currently cannot be compiled without an inttypes.h header, which is +available only with Visual Studio 2013 or newer. However, this portable and +permissively-licensed implementation of the stdint.h header could be used as an +alternative: + + http://www.azillionmonkeys.com/qed/pstdint.h + +Just rename it and drop it into the top level of the build tree. + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. The pcre2_test.bat script runs RunTest.bat with correct source and +exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe + have been created. + +2. Edit RunTest.bat to identify the full or relative location of + the pcre2 source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre2\pcre2-10.00 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre2_jit_test.exe. + + +BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment it should be possible to build PCRE2 in the same way as in other +systems, with the EBCDIC related configuration settings, but it is not known if +anybody has tried this. + +In native z/OS (without UNIX System Services) and in z/VM, special ports are +required. For details, please see file 939 on this web site: + + http://www.cbttape.org + +Everything in that location, source and executable, is in EBCDIC and native +z/OS file formats. The port provides an API for LE languages such as COBOL and +for the z/OS and z/VM versions of the Rexx languages. + + +BUILDING PCRE2 UNDER VMS + +Alexey Chuphin has contributed some auxiliary files for building PCRE2 under +OpenVMS. They are in the "vms" directory in the distribution tarball. Please +read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep +programs contain some VMS-specific code. + +=========================== +Last Updated: 16 April 2024 +=========================== diff --git a/PrepareRelease b/PrepareRelease new file mode 100755 index 0000000..0dc6e30 --- /dev/null +++ b/PrepareRelease @@ -0,0 +1,257 @@ +#/bin/sh + +# Script to prepare the files for building a PCRE2 release. It does some +# processing of the documentation, detrails files, and creates pcre2.h.generic +# and config.h.generic (for use by builders who can't run ./configure). + +# You must run this script before runnning "make dist". If its first argument +# is "doc", it stops after preparing the documentation. There are no other +# arguments. The script makes use of the following files: + +# 132html A Perl script that converts a .1 or .3 man page into HTML. It +# "knows" the relevant troff constructs that are used in the PCRE2 +# man pages. + +# CheckMan A Perl script that checks man pages for typos in the mark up. + +# CleanTxt A Perl script that cleans up the output of "nroff -man" by +# removing backspaces and other redundant text so as to produce +# a readable .txt file. + +# Detrail A Perl script that removes trailing spaces from files. + +# doc/index.html.src +# A file that is copied as index.html into the doc/html directory +# when the HTML documentation is built. It works like this so that +# doc/html can be deleted and re-created from scratch. + +# README & NON-AUTOTOOLS-BUILD +# These files are copied into the doc/html directory, with .txt +# extensions so that they can by hyperlinked from the HTML +# documentation, because some people just go to the HTML without +# looking for text files. + + +# First, sort out the documentation. Remove pcre2demo.3 first because it won't +# pass the markup check (it is created below, using markup that none of the +# other pages use). + +cd doc +echo Processing documentation + +/bin/rm -f pcre2demo.3 + +# Check the remaining man pages + +perl ../CheckMan *.1 *.3 +if [ $? != 0 ] ; then exit 1; fi + +# Make Text form of the documentation. It needs some mangling to make it +# tidy for online reading. Concatenate all the .3 stuff, but omit the +# individual function pages. + +cat <pcre2.txt +----------------------------------------------------------------------------- +This file contains a concatenation of the PCRE2 man pages, converted to plain +text format for ease of searching with a text editor, or for use on systems +that do not have a man page processor. The small individual files that give +synopses of each function in the library have not been included. Neither has +the pcre2demo program. There are separate text files for the pcre2grep and +pcre2test commands. +----------------------------------------------------------------------------- + + +End + +echo "Making pcre2.txt" +for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \ + pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \ + pcre2posix pcre2sample pcre2serialize pcre2syntax \ + pcre2unicode ; do + echo " Processing $file.3" + nroff -c -man $file.3 >$file.rawtxt + perl ../CleanTxt <$file.rawtxt >>pcre2.txt + /bin/rm $file.rawtxt + echo "------------------------------------------------------------------------------" >>pcre2.txt + if [ "$file" != "pcre2sample" ] ; then + echo " " >>pcre2.txt + echo " " >>pcre2.txt + fi +done + +# The three commands +for file in pcre2test pcre2grep pcre2-config ; do + echo Making $file.txt + nroff -c -man $file.1 >$file.rawtxt + perl ../CleanTxt <$file.rawtxt >$file.txt + /bin/rm $file.rawtxt +done + + +# Make pcre2demo.3 from the pcre2demo.c source file + +echo "Making pcre2demo.3" +perl <<"END" >pcre2demo.3 + use Time::Piece; + open(VH, "<", "../src/config.h.generic") || die "Failed to open src/config.h.generic\n"; + open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n"; + open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n"; + my $version; + while () + { + chomp; + if ( /^#define PACKAGE_STRING "([^"]+)"/ ) { $version = $1 ; last } + } + my $t = localtime; + print OUT ".TH PCRE2DEMO 3 \"", $t->strftime('%e %B %Y'), '" "', $version, "\"\n" . + ".\\\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!\n" . + ".SH NAME\n" . + "PCRE2DEMO - A demonstration C program for PCRE2\n" . + ".SH \"SOURCE CODE\"\n" . + ".rs\n" . + ".sp\n" . + ".\\\" Start example.\n" . + ".de EX\n" . + ". do ds mF \\\\n[.fam]\n" . + ". nr mE \\\\n(.f\n" . + ". nf\n" . + ". nh\n" . + ". do fam C\n" . + ". ft CW\n" . + "..\n" . + ".\n" . + ".\n" . + ".\\\" End example.\n" . + ".de EE\n" . + ". do fam \\\\*(mF\n" . + ". ft \\\\n(mE\n" . + ". fi\n" . + ". hy \\\\n(HY\n" . + "..\n" . + ".\n" . + ".RS -7\n" . + ".EX\n" ; + while () + { + s/\\/\\e/g; + print OUT; + } + print OUT ".EE\n"; + close(IN); + close(OUT); +END +if [ $? != 0 ] ; then exit 1; fi + + +# Make HTML form of the documentation. + +echo "Making HTML documentation" +/bin/rm html/* +cp index.html.src html/index.html +cp ../README html/README.txt +cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt + +for file in *.1 ; do + base=`basename $file .1` + echo " Making $base.html" + perl ../132html -toc $base <$file >html/$base.html +done + +# Exclude table of contents for function summaries. It seems that expr +# forces an anchored regex. Also exclude them for small pages that have +# only one section. + +for file in *.3 ; do + base=`basename $file .3` + toc=-toc + if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi + if [ "$base" = "pcre2sample" ] || \ + [ "$base" = "pcre2compat" ] || \ + [ "$base" = "pcre2demo" ] || \ + [ "$base" = "pcre2limits" ] || \ + [ "$base" = "pcre2unicode" ] ; then + toc="" + fi + echo " Making $base.html" + perl ../132html $toc $base <$file >html/$base.html + if [ $? != 0 ] ; then exit 1; fi +done + +# End of documentation processing; stop if only documentation required. + +cd .. +echo Documentation done +if [ "$1" = "doc" ] ; then exit; fi + +# These files are detrailed; do not detrail the test data because there may be +# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF +# line endings and the detrail script removes all trailing white space. The +# configure files are also omitted from the detrailing. + +files="\ + Makefile.am \ + configure.ac \ + README \ + LICENCE \ + COPYING \ + AUTHORS \ + NEWS \ + NON-AUTOTOOLS-BUILD \ + INSTALL \ + 132html \ + CleanTxt \ + Detrail \ + ChangeLog \ + CMakeLists.txt \ + RunGrepTest \ + RunTest \ + pcre2-config.in \ + perltest.sh \ + libpcre2-8.pc.in \ + libpcre2-16.pc.in \ + libpcre2-32.pc.in \ + libpcre2-posix.pc.in \ + src/pcre2_dftables.c \ + src/pcre2.h.in \ + src/pcre2_auto_possess.c \ + src/pcre2_compile.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_match.c \ + src/pcre2_jit_misc.c \ + src/pcre2_jit_test.c \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_printint.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c \ + src/pcre2demo.c \ + src/pcre2grep.c \ + src/pcre2posix.c \ + src/pcre2posix.h \ + src/pcre2test.c" + +echo Detrailing +perl ./Detrail $files doc/p* doc/html/* + +echo Done + +#End diff --git a/README b/README new file mode 100644 index 0000000..dab5e94 --- /dev/null +++ b/README @@ -0,0 +1,956 @@ +README file for PCRE2 (Perl-compatible regular expression library) +------------------------------------------------------------------ + +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features, and the internals have been improved. The original PCRE1 library is +now obsolete and no longer maintained. The latest release of PCRE2 is available +in .tar.gz, tar.bz2, or .zip form from this GitHub repository: + +https://github.com/PCRE2Project/pcre2/releases + +There is a mailing list for discussion about the development of PCRE2 at +pcre2-dev@googlegroups.com. You can subscribe by sending an email to +pcre2-dev+subscribe@googlegroups.com. + +You can access the archives and also subscribe or manage your subscription +here: + +https://groups.google.com/g/pcre2-dev + +Please read the NEWS file if you are upgrading from a previous release. The +contents of this README file are: + + The PCRE2 APIs + Documentation for PCRE2 + Building PCRE2 on non-Unix-like systems + Building PCRE2 without using autotools + Building PCRE2 using autotools + Retrieving configuration information + Shared libraries + Cross-compiling using autotools + Making new tarballs + Testing PCRE2 + Character tables + File manifest + + +The PCRE2 APIs +-------------- + +PCRE2 is written in C, and it has its own API. There are three sets of +functions, one for the 8-bit library, which processes strings of bytes, one for +the 16-bit library, which processes strings of 16-bit values, and one for the +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. + +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. + +The header file for the POSIX-style functions is called pcre2posix.h. The +official POSIX name is regex.h, but I did not want to risk possible problems +with existing files of that name by distributing it that way. To use PCRE2 with +an existing program that uses the POSIX API, pcre2posix.h will have to be +renamed or pointed at by a link (or the program modified, of course). See the +pcre2posix documentation for more details. + + +Documentation for PCRE2 +----------------------- + +If you install PCRE2 in the normal way on a Unix-like system, you will end up +with a set of man pages whose names all start with "pcre2". The one that is +just called "pcre2" lists all the others. In addition to these man pages, the +PCRE2 documentation is supplied in two other forms: + + 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and + doc/pcre2test.txt in the source distribution. The first of these is a + concatenation of the text forms of all the section 3 man pages except the + listing of pcre2demo.c and those that summarize individual functions. The + other two are the text forms of the section 1 man pages for the pcre2grep + and pcre2test commands. These text forms are provided for ease of scanning + with text editors or similar tools. They are installed in + /share/doc/pcre2, where is the installation prefix + (defaulting to /usr/local). + + 2. A set of files containing all the documentation in HTML form, hyperlinked + in various ways, and rooted in a file called index.html, is distributed in + doc/html and installed in /share/doc/pcre2/html. + + +Building PCRE2 on non-Unix-like systems +--------------------------------------- + +For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if +your system supports the use of "configure" and "make" you may be able to build +PCRE2 using autotools in the same way as for many Unix-like systems. + +PCRE2 can also be configured using CMake, which can be run in various ways +(command line, GUI, etc). This creates Makefiles, solution files, etc. The file +NON-AUTOTOOLS-BUILD has information about CMake. + +PCRE2 has been compiled on many different operating systems. It should be +straightforward to build PCRE2 on any system that has a Standard C compiler and +library, because it uses only Standard C functions. + + +Building PCRE2 without using autotools +-------------------------------------- + +The use of autotools (in particular, libtool) is problematic in some +environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD +file for ways of building PCRE2 without using autotools. + + +Building PCRE2 using autotools +------------------------------ + +The following instructions assume the use of the widely used "configure; make; +make install" (autotools) process. + +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set +to the directory where you want the files to be created. This command is a +standard GNU "autoconf" configuration script, for which generic instructions +are supplied in the file INSTALL. + +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + +Most commonly, people build PCRE2 within its own distribution directory, and in +this case, on many systems, just running "./configure" is sufficient. However, +the usual methods of changing standard defaults are available. For example: + +CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local + +This command specifies that the C compiler should be run with the flags '-O2 +-Wall' instead of the default, and that "make install" should install PCRE2 +under /opt/local instead of the default /usr/local. + +If you want to build in a different directory, just run "configure" with that +directory as current. For example, suppose you have unpacked the PCRE2 source +into /source/pcre2/pcre2-xxx, but you want to build it in +/build/pcre2/pcre2-xxx: + +cd /build/pcre2/pcre2-xxx +/source/pcre2/pcre2-xxx/configure + +PCRE2 is written in C and is normally compiled as a C library. However, it is +possible to build it as a C++ library, though the provided building apparatus +does not have any features to support this. + +There are some optional features that can be included or omitted from the PCRE2 +library. They are also documented in the pcre2build man page. + +. By default, both shared and static libraries are built. You can change this + by adding one of these options to the "configure" command: + + --disable-shared + --disable-static + + Setting --disable-shared ensures that PCRE2 libraries are built as static + libraries. The binaries that are then created as part of the build process + (for example, pcre2test and pcre2grep) are linked statically with one or more + PCRE2 libraries, but may also be dynamically linked with other libraries such + as libc. If you want these binaries to be fully statically linked, you can + set LDFLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. See also + "Shared libraries" below. + +. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to + the "configure" command, the 16-bit library is also built. If you add + --enable-pcre2-32 to the "configure" command, the 32-bit library is also + built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 + to disable building the 8-bit library. + +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware + architectures. If you try to enable it on an unsupported architecture, there + will be a compile time error. If in doubt, use --enable-jit=auto, which + enables JIT only if the current hardware is supported. + +. If you are enabling JIT under SELinux environment you may also want to add + --enable-jit-sealloc, which enables the use of an executable memory allocator + that is compatible with SELinux. Warning: this allocator is experimental! + It does not support fork() operation and may crash when no disk space is + available. This option has no effect if JIT is disabled. + +. If you do not want to make use of the default support for UTF-8 Unicode + character strings in the 8-bit library, UTF-16 Unicode character strings in + the 16-bit library, or UTF-32 Unicode character strings in the 32-bit + library, you can add --disable-unicode to the "configure" command. This + reduces the size of the libraries. It is not possible to configure one + library with Unicode support, and another without, in the same configuration. + It is also not possible to use --enable-ebcdic (see below) with Unicode + support, so if this option is set, you must also use --disable-unicode. + + When Unicode support is available, the use of a UTF encoding still has to be + enabled by setting the PCRE2_UTF option at run time or starting a pattern + with (*UTF). When PCRE2 is compiled with Unicode support, its input can only + either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. + + As well as supporting UTF strings, Unicode support includes support for the + \P, \p, and \X sequences that recognize Unicode character properties. + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). + +. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any + of the preceding, or any of the Unicode newline sequences, or the NUL (zero) + character as indicating the end of a line. Whatever you specify at build time + is the default; the caller of PCRE2 can change the selection at run time. The + default newline indicator is a single LF character (the Unix standard). You + can specify the default newline indicator by adding --enable-newline-is-cr, + --enable-newline-is-lf, --enable-newline-is-crlf, + --enable-newline-is-anycrlf, --enable-newline-is-any, or + --enable-newline-is-nul to the "configure" command, respectively. + +. By default, the sequence \R in a pattern matches any Unicode line ending + sequence. This is independent of the option specifying what PCRE2 considers + to be the end of a line (see above). However, the caller of PCRE2 can + restrict \R to match only CR, LF, or CRLF. You can make this the default by + adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). + +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + +. PCRE2 has a counter that limits the depth of nesting of parentheses in a + pattern. This limits the amount of system stack that a pattern uses when it + is compiled. The default is 250, but you can change it by setting, for + example, + + --with-parens-nest-limit=500 + +. PCRE2 has a counter that can be set to limit the amount of computing resource + it uses when matching a pattern. If the limit is exceeded during a match, the + match fails. The default is ten million. You can change the default by + setting, for example, + + --with-match-limit=500000 + + on the "configure" command. This is just the default; individual calls to + pcre2_match() or pcre2_dfa_match() can supply their own value. There is more + discussion in the pcre2api man page (search for pcre2_set_match_limit). + +. There is a separate counter that limits the depth of nested backtracking + (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a + matching process, which indirectly limits the amount of heap memory that is + used, and in the case of pcre2_dfa_match() the amount of stack as well. This + counter also has a default of ten million, which is essentially "unlimited". + You can change the default by setting, for example, + + --with-match-limit-depth=5000 + + There is more discussion in the pcre2api man page (search for + pcre2_set_depth_limit). + +. You can also set an explicit limit on the amount of heap memory used by + the pcre2_match() and pcre2_dfa_match() interpreters: + + --with-heap-limit=500 + + The units are kibibytes (units of 1024 bytes). This limit does not apply when + the JIT optimization (which has its own memory control features) is used. + There is more discussion on the pcre2api man page (search for + pcre2_set_heap_limit). + +. In the 8-bit library, the default maximum compiled pattern size is around + 64 kibibytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. + +. Lookbehind assertions in which one or more branches can match a variable + number of characters are supported only if there is a maximum matching length + for each top-level branch. There is a limit to this maximum that defaults to + 255 characters. You can alter this default by a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbehind(). + Lookbehind assertions in which every branch matches a fixed number of + characters (not necessarily all the same) are not constrained by this limit. + +. For speed, PCRE2 uses four tables for manipulating and identifying characters + whose code point values are less than 256. By default, it uses a set of + tables for ASCII encoding that is part of the distribution. If you specify + + --enable-rebuild-chartables + + a program called pcre2_dftables is compiled and run in the default C locale + when you obey "make". It builds a source file called pcre2_chartables.c. If + you do not specify this option, pcre2_chartables.c is created as a copy of + pcre2_chartables.c.dist. See "Character tables" below for further + information. + +. It is possible to compile PCRE2 for use on systems that use EBCDIC as their + character code (as opposed to ASCII/Unicode) by specifying + + --enable-ebcdic --disable-unicode + + This automatically implies --enable-rebuild-chartables (see above). However, + when PCRE2 is built this way, it always operates in EBCDIC. It cannot support + both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, + which specifies that the code value for the EBCDIC NL character is 0x25 + instead of the default 0x15. + +. If you specify --enable-debug, additional debugging code is included in the + build. This option is intended for use by the PCRE2 maintainers. + +. In environments where valgrind is installed, if you specify + + --enable-valgrind + + PCRE2 will use valgrind annotations to mark certain memory regions as + unaddressable. This allows it to detect invalid memory accesses, and is + mostly useful for debugging PCRE2 itself. + +. In environments where the gcc compiler is used and lcov is installed, if you + specify + + --enable-coverage + + the build process implements a code coverage report for the test suite. The + report is generated by running "make coverage". If ccache is installed on + your system, it must be disabled when building PCRE2 for coverage reporting. + You can do this by setting the environment variable CCACHE_DISABLE=1 before + running "make" to build PCRE2. There is more information about coverage + reporting in the "pcre2build" documentation. + +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. There is support for calling external programs during matching in the + pcre2grep command, using PCRE2's callout facility with string arguments. This + support can be disabled by adding --disable-pcre2grep-callout to the + "configure" command. There are two kinds of callout: one that generates + output from inbuilt code, and another that calls an external program. The + latter has special support for Windows and VMS; otherwise it assumes the + existence of the fork() function. This facility can be disabled by adding + --disable-pcre2grep-callout-fork to the "configure" command. + +. The pcre2grep program currently supports only 8-bit data files, and so + requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use + libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by + specifying one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + Of course, the relevant libraries must be installed on your system. + +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: + + --with-pcre2grep-bufsize=51200 + + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. + +. It is possible to compile pcre2test so that it links with the libreadline + or libedit libraries, by specifying, respectively, + + --enable-pcre2test-libreadline or --enable-pcre2test-libedit + + If this is done, when pcre2test's input is from a terminal, it reads it using + the readline() function. This provides line-editing and history facilities. + Note that libreadline is GPL-licenced, so if you distribute a binary of + pcre2test linked in this way, there may be licensing issues. These can be + avoided by linking with libedit (which has a BSD licence) instead. + + Enabling libreadline causes the -lreadline option to be added to the + pcre2test build. In many operating environments with a system-installed + readline library this is sufficient. However, in some environments (e.g. if + an unmodified distribution version of readline is in use), it may be + necessary to specify something like LIBS="-lncurses" as well. This is + because, to quote the readline INSTALL, "Readline uses the termcap functions, + but does not link with the termcap or curses library itself, allowing + applications which link with readline the option to choose an appropriate + library." If you get error messages about missing functions tgetstr, tgetent, + tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses + library should fix it. + +. The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in + environments other than Microsoft Visual Studio versions earlier than 2013 + when __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating C99). However, there is at least one environment that + claims to be C99 but does not support these modifiers. If + --disable-percent-zt is specified, no use is made of the z or t modifiers. + Instead of %td or %zu, %lu is used, with a cast for size_t values. + +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit + library. If set, it causes an extra library called libpcre2-fuzzsupport.a to + be built, but not installed. This contains a single function called + LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the + length of the string. When called, this function tries to compile the string + as a pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. + Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to + be created. This is normally run under valgrind or used when PCRE2 is + compiled with address sanitizing enabled. It calls the fuzzing function and + outputs information about what it is doing. The input strings are specified + by arguments: if an argument starts with "=" the rest of it is a literal + input string. Otherwise, it is assumed to be a file name, and the contents + of the file are the test string. + +. Releases before 10.30 could be compiled with --disable-stack-for-recursion, + which caused pcre2_match() to use individual blocks on the heap for + backtracking instead of recursive function calls (which use the stack). This + is now obsolete because pcre2_match() was refactored always to use the heap + (in a much more efficient way than before). This option is retained for + backwards compatibility, but has no effect other than to output a warning. + +The "configure" script builds the following files for the basic C library: + +. Makefile the makefile that builds the library +. src/config.h build-time configuration options for the library +. src/pcre2.h the public PCRE2 header file +. pcre2-config script that shows the building settings such as CFLAGS + that were set for "configure" +. libpcre2-8.pc ) +. libpcre2-16.pc ) data for the pkg-config command +. libpcre2-32.pc ) +. libpcre2-posix.pc ) +. libtool script that builds shared and/or static libraries + +Versions of config.h and pcre2.h are distributed in the src directory of PCRE2 +tarballs under the names config.h.generic and pcre2.h.generic. These are +provided for those who have to build PCRE2 without using "configure" or CMake. +If you use "configure" or CMake, the .generic versions are not used. + +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". This builds whichever of the +libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test +program called pcre2test. If you enabled JIT support with --enable-jit, another +test program called pcre2_jit_test is built as well. If the 8-bit library is +built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also +built. Running "make" with the -j option may speed up compilation on +multiprocessor systems. + +The command "make check" runs all the appropriate tests. Details of the PCRE2 +tests are given below in a separate section of this document. The -j option of +"make" can also be used when running the tests. + +You can use "make install" to install PCRE2 into live directories on your +system. The following are installed (file names are all relative to the + that is set when "configure" is run): + + Commands (bin): + pcre2test + pcre2grep (if 8-bit support is enabled) + pcre2-config + + Libraries (lib): + libpcre2-8 (if 8-bit support is enabled) + libpcre2-16 (if 16-bit support is enabled) + libpcre2-32 (if 32-bit support is enabled) + libpcre2-posix (if 8-bit support is enabled) + + Configuration information (lib/pkgconfig): + libpcre2-8.pc + libpcre2-16.pc + libpcre2-32.pc + libpcre2-posix.pc + + Header files (include): + pcre2.h + pcre2posix.h + + Man pages (share/man/man{1,3}): + pcre2grep.1 + pcre2test.1 + pcre2-config.1 + pcre2.3 + pcre2*.3 (lots more pages, all starting "pcre2") + + HTML documentation (share/doc/pcre2/html): + index.html + *.html (lots more pages, hyperlinked from index.html) + + Text file documentation (share/doc/pcre2): + AUTHORS + COPYING + ChangeLog + LICENCE + NEWS + README + pcre2.txt (a concatenation of the man(3) pages) + pcre2test.txt the pcre2test man page + pcre2grep.txt the pcre2grep man page + pcre2-config.txt the pcre2-config man page + +If you want to remove PCRE2 from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information +------------------------------------ + +Running "make install" installs the command pcre2-config, which can be used to +recall information about the PCRE2 configuration and installation. For example: + + pcre2-config --version + +prints the version number, and + + pcre2-config --libs8 + +outputs information about where the 8-bit library is installed. This command +can be included in makefiles for programs that use PCRE2, saving the programmer +from having to remember too many details. Run pcre2-config with no arguments to +obtain a list of possible arguments. + +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: + + pkg-config --libs libpcre2-16 + +The data is held in *.pc files that are installed in a directory called +/lib/pkgconfig. + + +Shared libraries +---------------- + +The default distribution builds PCRE2 as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the +"configure" process. + +The libtool script is used to compile and link both shared and static +libraries. They are placed in a subdirectory called .libs when they are newly +built. The programs pcre2test and pcre2grep are built to use these uninstalled +libraries (by means of wrapper scripts in the case of shared libraries). When +you use "make install" to install shared libraries, pcre2grep and pcre2test are +automatically re-built to use the newly installed shared libraries before being +installed themselves. However, the versions left in the build directory still +use the uninstalled libraries. + +To build PCRE2 using static libraries only you must use --disable-shared when +configuring it. For example: + +./configure --prefix=/usr/gnu --disable-shared + +Then run "make" in the usual way. Similarly, you can use --disable-static to +build only shared libraries. Note, however, that when you build only static +libraries, binary programs such as pcre2test and pcre2grep may still be +dynamically linked with other libraries (for example, libc) unless you set +LDFLAGS to --static when running "configure". + + +Cross-compiling using autotools +------------------------------- + +You can specify CC and CFLAGS in the normal way to the "configure" command, in +order to cross-compile PCRE2 for some other host. However, you should NOT +specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c +source file is compiled and run on the local host, in order to generate the +inbuilt character tables (the pcre2_chartables.c file). This will probably not +work, because pcre2_dftables.c needs to be compiled with the local compiler, +not the cross compiler. + +When --enable-rebuild-chartables is not specified, pcre2_chartables.c is +created by making a copy of pcre2_chartables.c.dist, which is a default set of +tables that assumes ASCII code. Cross-compiling with the default tables should +not be a problem. + +If you need to modify the character tables when cross-compiling, you should +move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by +hand and run it on the local host to make a new version of +pcre2_chartables.c.dist. See the pcre2build section "Creating character tables +at build time" for more details. + + +Making new tarballs +------------------- + +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. + +If you have modified any of the man page sources in the doc directory, you +should first run the PrepareRelease script before making a distribution. This +script creates the .txt and HTML forms of the documentation from the man pages. + + +Testing PCRE2 +------------- + +To test the basic PCRE2 library on a Unix-like system, run the RunTest script. +There is another script called RunGrepTest that tests the pcre2grep command. +When the 8-bit library is built, a test program for the POSIX wrapper, called +pcre2posix_test, is compiled, and when JIT support is enabled, a test program +called pcre2_jit_test is built. The scripts and the program tests are all run +when you obey "make check". For other environments, see the instructions in +NON-AUTOTOOLS-BUILD. + +The RunTest script runs the pcre2test test program (which is documented in its +own man page) on each of the relevant testinput files in the testdata +directory, and compares the output with the contents of the corresponding +testoutput files. RunTest uses a file called testtry to hold the main output +from pcre2test. Other files whose names begin with "test" are used as working +files in some tests. + +Some tests are relevant only when certain build-time options were selected. For +example, the tests for UTF-8/16/32 features are run only when Unicode support +is available. RunTest outputs a comment when it skips a test. + +Many (but not all) of the tests that are not skipped are run twice if JIT +support is available. On the second run, JIT compilation is forced. This +testing can be suppressed by putting "-nojit" on the RunTest command line. + +The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit +libraries that are enabled. If you want to run just one set of tests, call +RunTest with either the -8, -16 or -32 option. + +If valgrind is installed, you can run the tests under it by putting "-valgrind" +on the RunTest command line. To run pcre2test on just one or more specific test +files, give their numbers as arguments to RunTest, for example: + + RunTest 2 7 11 + +You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the +end), or a number preceded by ~ to exclude a test. For example: + + Runtest 3-15 ~10 + +This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests +except test 13. Whatever order the arguments are in, the tests are always run +in numerical order. + +You can also call RunTest with the single argument "list" to cause it to output +a list of tests. + +The test sequence starts with "test 0", which is a special test that has no +input file, and whose output is not checked. This is because it will be +different on different hardware and with different configurations. The test +exists in order to exercise some of pcre2test's code that would not otherwise +be run. + +Tests 1 and 2 can always be run, as they expect only plain text strings (not +UTF) and make no use of Unicode properties. The first test file can be fed +directly into the perltest.sh script to check that Perl gives the same results. +The only difference you should see is in the first few lines, where the Perl +version is given instead of the PCRE2 version. The second set of tests check +auxiliary functions, error detection, and run-time flags that are specific to +PCRE2. It also uses the debugging flags to check some of the internals of +pcre2_compile(). + +If you build PCRE2 with a locale setting that is not the standard C locale, the +character tables may be different (see next paragraph). In some cases, this may +cause failures in the second set of tests. For example, in a locale where the +isprint() function yields TRUE for characters in the range 128-255, the use of +[:isascii:] inside a character class defines a different set of characters, and +this shows up in this test as a difference in the compiled code, which is being +listed for checking. For example, where the comparison test output contains +[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other +cases. This is not a bug in PCRE2. + +Test 3 checks pcre2_maketables(), the facility for building a set of character +tables for a specific locale and using them instead of the default tables. The +script uses the "locale" command to check for the availability of the "fr_FR", +"french", or "fr" locale, and uses the first one that it finds. If the "locale" +command fails, or if its output doesn't include "fr_FR", "french", or "fr" in +the list of available locales, the third test cannot be run, and a comment is +output to say why. If running this test produces an error like this: + + ** Failed to set locale "fr_FR" + +it means that the given locale is not available on your system, despite being +listed by "locale". This does not mean that PCRE2 is broken. There are three +alternative output files for the third test, because three different versions +of the French locale have been encountered. The test passes if its output +matches any one of them. + +Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible +with the perltest.sh script, and test 5 checking PCRE2-specific things. + +Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in +non-UTF mode and UTF-mode with Unicode property support, respectively. + +Test 8 checks some internal offsets and code size features, but it is run only +when Unicode support is enabled. The output is different in 8-bit, 16-bit, and +32-bit modes and for different link sizes, so there are different output files +for each mode and link size. + +Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in +16-bit and 32-bit modes. These are tests that generate different output in +8-bit mode. Each pair are for general cases and Unicode support, respectively. + +Test 13 checks the handling of non-UTF characters greater than 255 by +pcre2_dfa_match() in 16-bit and 32-bit modes. + +Test 14 contains some special UTF and UCP tests that give different output for +different code unit widths. + +Test 15 contains a number of tests that must not be run with JIT. They check, +among other non-JIT things, the match-limiting features of the interpretive +matcher. + +Test 16 is run only when JIT support is not available. It checks that an +attempt to use JIT has the expected behaviour. + +Test 17 is run only when JIT support is available. It checks JIT complete and +partial modes, match-limiting under JIT, and other JIT-specific features. + +Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to +the 8-bit library, without and with Unicode support, respectively. + +Test 20 checks the serialization functions by writing a set of compiled +patterns to a file, and then reloading and checking them. + +Tests 21 and 22 test \C support when the use of \C is not locked out, without +and with UTF support, respectively. Test 23 tests \C when it is locked out. + +Tests 24 and 25 test the experimental pattern conversion functions, without and +with UTF support, respectively. + +Test 26 checks Unicode property support using tests that are generated +automatically from the Unicode data tables. + + +Character tables +---------------- + +For speed, PCRE2 uses four tables for manipulating and identifying characters +whose code point values are less than 256. By default, a set of tables that is +built into the library is used. The pcre2_maketables() function can be called +by an application to create a new set of tables in the current locale. This are +passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a +compile context. + +The source file called pcre2_chartables.c contains the default set of tables. +By default, this is created as a copy of pcre2_chartables.c.dist, which +contains tables for ASCII coding. However, if --enable-rebuild-chartables is +specified for ./configure, a new version of pcre2_chartables.c is built by the +program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C +character handling functions such as isalnum(), isalpha(), isupper(), +islower(), etc. to build the table sources. This means that the default C +locale that is set for your system will control the contents of these default +tables. You can change the default tables by editing pcre2_chartables.c and +then re-building PCRE2. If you do this, you should take care to ensure that the +file does not get automatically re-generated. The best way to do this is to +move pcre2_chartables.c.dist out of the way and replace it with your customized +tables. + +When the pcre2_dftables program is run as a result of specifying +--enable-rebuild-chartables, it uses the default C locale that is set on your +system. It does not pay attention to the LC_xxx environment variables. In other +words, it uses the system's default locale rather than whatever the compiling +user happens to have set. If you really do want to build a source set of +character tables in a locale that is specified by the LC_xxx variables, you can +run the pcre2_dftables program by hand with the -L option. For example: + + ./pcre2_dftables -L pcre2_chartables.c.special + +The second argument names the file where the source code for the tables is +written. The first two 256-byte tables provide lower casing and case flipping +functions, respectively. The next table consists of a number of 32-byte bit +maps which identify certain character classes such as digits, "word" +characters, white space, etc. These are used when building 32-byte bit maps +that represent character classes for code points less than 256. The final +256-byte table has bits indicating various character types, as follows: + + 1 white space character + 2 letter + 4 lower case letter + 8 decimal digit + 16 alphanumeric or '_' + +You can also specify -b (with or without -L) when running pcre2_dftables. This +causes the tables to be written in binary instead of as source code. A set of +binary tables can be loaded into memory by an application and passed to +pcre2_compile() in the same way as tables created dynamically by calling +pcre2_maketables(). The tables are just a string of bytes, independent of +hardware characteristics such as endianness. This means they can be bundled +with an application that runs in different environments, to ensure consistent +behaviour. + +See also the pcre2build section "Creating character tables at build time". + + +File manifest +------------- + +The distribution should contain the files listed below. + +(A) Source files for the PCRE2 library functions and their headers are found in + the src directory: + + src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c + when --enable-rebuild-chartables is specified + + src/pcre2_chartables.c.dist a default set of character tables that assume + ASCII coding; unless --enable-rebuild-chartables is + specified, used by copying to pcre2_chartables.c + + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) + + src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support + + src/config.h.in template for config.h, when built by "configure" + src/pcre2.h.in template for pcre2.h when built by "configure" + src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_internal.h header for internal use + src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_neon_inc.h header used by JIT + src/pcre2_jit_simd_inc.h header used by JIT + src/pcre2_ucp.h header for Unicode property handling + + sljit/* source files for the JIT compiler + +(B) Source files for programs that use PCRE2: + + src/pcre2demo.c simple demonstration of coding calls to PCRE2 + src/pcre2grep.c source of a grep utility that uses PCRE2 + src/pcre2test.c comprehensive test program + src/pcre2_jit_test.c JIT test program + src/pcre2posix_test.c POSIX wrapper API test program + +(C) Auxiliary files: + + 132html script to turn "man" pages into HTML + AUTHORS information about the author of PCRE2 + ChangeLog log of changes to the code + CleanTxt script to clean nroff output for txt man pages + Detrail script to remove trailing spaces + HACKING some notes about the internals of PCRE2 + INSTALL generic installation instructions + LICENCE conditions for the use of PCRE2 + COPYING the same, using GNU's standard name + Makefile.in ) template for Unix Makefile, which is built by + ) "configure" + Makefile.am ) the automake input that was used to create + ) Makefile.in + NEWS important changes in this release + NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools + PrepareRelease script to make preparations for "make dist" + README this file + RunTest a Unix shell script for running tests + RunGrepTest a Unix shell script for pcre2grep tests + aclocal.m4 m4 macros (generated by "aclocal") + config.guess ) files used by libtool, + config.sub ) used only when building a shared library + configure a configuring shell script (built by autoconf) + configure.ac ) the autoconf input that was used to build + ) "configure" and config.h + depcomp ) script to find program dependencies, generated by + ) automake + doc/*.3 man page sources for PCRE2 + doc/*.1 man page sources for pcre2grep and pcre2test + doc/index.html.src the base HTML page + doc/html/* HTML documentation + doc/pcre2.txt plain text version of the man pages + doc/pcre2test.txt plain text documentation of test program + install-sh a shell script for installing files + libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config + libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config + libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config + ltmain.sh file used to build a libtool script + missing ) common stub for a few missing GNU programs while + ) installing, generated by automake + mkinstalldirs script for making install directories + perltest.sh Script for running a Perl test program + pcre2-config.in source of script which retains PCRE2 information + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcre2grep tests + testdata/* other supporting test files + +(D) Auxiliary files for cmake support + + cmake/COPYING-CMAKE-SCRIPTS + cmake/FindPackageHandleStandardArgs.cmake + cmake/FindEditline.cmake + cmake/FindReadline.cmake + CMakeLists.txt + config-cmake.h.in + +(E) Auxiliary files for building PCRE2 "by hand" + + src/pcre2.h.generic ) a version of the public PCRE2 header file + ) for use in non-"configure" environments + src/config.h.generic ) a version of config.h for use in non-"configure" + ) environments + +(F) Auxiliary files for building PCRE2 under OpenVMS + + vms/configure.com ) + vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. + vms/pcre2.h_patch ) + vms/stdint.h ) + +Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com +Last updated: 15 April 2024 diff --git a/RunGrepTest b/RunGrepTest new file mode 100755 index 0000000..c382187 --- /dev/null +++ b/RunGrepTest @@ -0,0 +1,1067 @@ +#! /bin/sh + +# Run pcre2grep tests. The assumption is that the PCRE2 tests check the library +# itself. What we are checking here is the file handling and options that are +# supported by pcre2grep. This script must be run in the build directory. + +# CODING CONVENTIONS: +# * Put printf arguments in single, not double quotes to avoid unwanted +# escaping. +# * Use \0 for binary zero in printf, not \x0, for the benefit of older +# versions (and use octal for other special values). + +# Set the C locale, so that sort(1) behaves predictably. + +LC_ALL=C +export LC_ALL + +# Remove any non-default colouring and aliases that the caller may have set. + +unset PCRE2GREP_COLOUR PCRE2GREP_COLOR PCREGREP_COLOUR PCREGREP_COLOR +unset GREP_COLOR GREP_COLORS +unset cp ls mv rm + +# Remember the current (build) directory, set the program to be tested, and +# valgrind settings when requested. + +builddir=`pwd` +pcre2grep=$builddir/pcre2grep +pcre2test=$builddir/pcre2test + +if [ ! -x $pcre2grep ] ; then + echo "** $pcre2grep does not exist or is not executable." + exit 1 +fi + +if [ ! -x $pcre2test ] ; then + echo "** $pcre2test does not exist or is not executable." + exit 1 +fi + +valgrind= +while [ $# -gt 0 ] ; do + case $1 in + valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file";; + *) echo "RunGrepTest: Unknown argument $1"; exit 1;; + esac + shift +done + +vjs= +pcre2grep_version=`$pcre2grep -V` +if [ "$valgrind" = "" ] ; then + echo "Testing $pcre2grep_version" +else + echo "Testing $pcre2grep_version using valgrind" + $pcre2test -C jit >/dev/null + if [ $? -ne 0 ]; then + vjs="--suppressions=./testdata/valgrind-jit.supp" + fi +fi + +# Set up a suitable "diff" command for comparison. Some systems have a diff +# that lacks a -u option. Try to deal with this; better do the test for the -b +# option as well. + +cf="diff" +diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b" +diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u" +diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub" + +# Add a -a (always treat as text) if available. This was added in an attempt +# to get more detail from an Alpine Linux test failure on GitHub. + +$cf -a /dev/null /dev/null 2>/dev/null && cf="$cf -a" + +# Some tests involve NUL characters. It seems impossible to handle them easily +# in many operating systems. An earlier version of this script used sed to +# translate NUL into the string ZERO, but this didn't work on Solaris (aka +# SunOS), where the version of sed explicitly doesn't like them, and also MacOS +# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine, +# even when using GNU sed. A user suggested using tr instead, which +# necessitates translating to a single character. However, on (some versions +# of?) Solaris, the normal "tr" cannot handle binary zeros, but if +# /usr/xpg4/bin/tr is available, it can do so, so test for that. + +if [ -x /usr/xpg4/bin/tr ] ; then + tr=/usr/xpg4/bin/tr +else + tr=tr +fi + +# If this test is being run from "make check", $srcdir will be set. If not, set +# it to the current or parent directory, whichever one contains the test data. +# Subsequently, we run most of the pcre2grep tests in the source directory so +# that the file names in the output are always the same. + +if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then + if [ -d "./testdata" ] ; then + srcdir=. + elif [ -d "../testdata" ] ; then + srcdir=.. + else + echo "Cannot find the testdata directory" + exit 1 + fi +fi + +# Check for the availability of UTF-8 support + +$pcre2test -C unicode >/dev/null +utf8=$? + +# Check default newline convention. If it does not include LF, force LF. + +nl=`$pcre2test -C newline` +if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then + pcre2grep="$pcre2grep -N LF" + echo "Default newline setting forced to LF" +fi + +# ------ Function to run and check a special pcre2grep arguments test ------- + +checkspecial() + { + $valgrind $pcre2grep $1 >>testtrygrep 2>&1 + if [ $? -ne $2 ] ; then + echo "** pcre2grep $1 failed - check testtrygrep" + exit 1 + fi + } + +# ------ Normal tests ------ + +echo "Testing pcre2grep main features" + +echo "---------------------------- Test 1 ------------------------------" >testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 2 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 3 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 4 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 5 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 6 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 7 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 8 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 9 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 10 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 11 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 12 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 13 -----------------------------" >>testtrygrep +echo seventeen >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 14 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 15 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 16 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 17 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 18 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 19 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 20 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 21 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 22 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 23 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 24 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 25 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 26 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 27 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 28 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 29 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 30 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 31 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 32 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 33 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 34 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 35 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 36 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 37 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep +echo "RC=$?" >>testtrygrep +echo "======== STDERR ========" >>testtrygrep +cat teststderrgrep >>testtrygrep + +echo "---------------------------- Test 38 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 39 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 40 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 41 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 42 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 43 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 44 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 45 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 46 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +(cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 47 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Fx "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 48 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 49 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F -e DATA -e "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 50 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 51 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 52 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 53 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 54 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 55 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 56 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 57 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c -l --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 58 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 59 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 60 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 61 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 62 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $pcre2grep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 63 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1K --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 64 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 65 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 66 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 67 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 68 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 69 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 70 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 71 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 72 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 73 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 74 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 75 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 76 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 77 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 78 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 79 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 80 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 81 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 82 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 83 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 84 -----------------------------" >>testtrygrep +echo testdata/grepinput3 >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 85 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 86 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 87 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 88 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 89 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 90 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 91 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 92 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 93 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 94 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 95 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 96 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MC] 'fox' ./test* | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 97 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 98 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MC] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 99 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >testtemp2grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 100 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 101 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 102 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 103 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 104 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 105 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 106 -----------------------------" >>testtrygrep +(cd $srcdir; echo "a" | $valgrind $vjs $pcre2grep -M "|a" ) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 107 -----------------------------" >>testtrygrep +echo "a" >testtemp1grep +echo "aaaaa" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets --allow-lookaround-bsk '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 108 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -lq PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 109 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -cq --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 110 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 111 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 112 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 113 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --total-count --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 114 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 115 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tlc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 116 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MC] -th 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 117 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tch --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 118 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tL --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 119 -----------------------------" >>testtrygrep +printf '123\n456\n789\n---abc\ndef\nxyz\n---\n' >testNinputgrep +$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 120 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO 'XX$' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{12345678}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{123Z' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --output '$x{1234}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 121 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F '\E and (regex)' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 122 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 123 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 124 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 125 -----------------------------" >>testtrygrep +printf 'abcd\n' >testNinputgrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K.)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=.\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +GREP_COLORS='ms=1;20' $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 126 -----------------------------" >>testtrygrep +printf 'Next line pattern has binary zero\nABC\0XYZ\n' >testtemp1grep +printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep +$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep +echo "RC=$?" >>testtrygrep +printf 'Next line pattern is erroneous.\n^abc)(xy' >testtemp1grep +$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 127 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 128 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m1M -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 129 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 130 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 131 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 132 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 133 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 134 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1 -nH -O '=$x{41}$x423$o{103}$o1045=' 'fox' -) <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 135 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 136 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 137 -----------------------------" >>testtrygrep +printf 'Last line\nhas no newline' >testtemp1grep +$valgrind $vjs $pcre2grep -A1 Last testtemp1grep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 138 -----------------------------" >>testtrygrep +printf 'AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n' >testtemp1grep +$valgrind $vjs $pcre2grep --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 139 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-buffered 'fox' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 140 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 -A1 'brown' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 141 -----------------------------" >>testtrygrep +printf "$srcdir/testdata/grepinputv\n-\n" >testtemp1grep +printf 'This is a line from stdin.' >testtemp2grep +$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 142 -----------------------------" >>testtrygrep +printf "/does/not/exist\n" >testtemp1grep +printf 'This is a line from stdin.' >testtemp2grep +$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 143 -----------------------------" >>testtrygrep +printf 'fox|cat' >testtemp1grep +$valgrind $vjs $pcre2grep -f - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 144 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep -f /non/exist $srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 145 -----------------------------" >>testtrygrep +printf '*meta*\rdog.' >testtemp1grep +$valgrind $vjs $pcre2grep -Ncr -F -f testtemp1grep $srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 146 -----------------------------" >>testtrygrep +printf 'A123B' >testtemp1grep +$valgrind $vjs $pcre2grep -H -e '123|fox' - >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -h -e '123|fox' - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 147 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep -e '123|fox' -- -nonfile >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 148 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep --nonexist >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -n-n-bad >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --context >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --only-matching --output=xx >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --newline=badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -d badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -D badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --buffer-size=0 >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --exclude '(badpat' abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --include-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 149 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=binary "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=wrong "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +# This test runs the code that tests locale support. However, on some systems +# (e.g. Alpine Linux) there is no locale support and running this test just +# generates a "no match" result. Therefore, we test for locale support, and if +# it is found missing, we pretend that the test has run as expected so that the +# output matches. + +echo "---------------------------- Test 150 -----------------------------" >>testtrygrep +which locale >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE)" >>testtrygrep + echo "RC=2" >>testtrygrep +else + + (cd $srcdir; unset LC_ALL; env LC_CTYPE=badlocale $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep +fi + +echo "---------------------------- Test 151 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep + +echo "---------------------------- Test 152 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 153 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + + +# Now compare the results. + +$cf $srcdir/testdata/grepoutput testtrygrep +if [ $? != 0 ] ; then exit 1; fi + + +# These tests require UTF-8 support + +if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep UTF-8 features" + + echo "---------------------------- Test U1 ------------------------------" >testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U2 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U3 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any --allow-lookaround-bsk '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U4 ------------------------------" >>testtrygrep + printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep + (cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U5 ------------------------------" >>testtrygrep + printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep + (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' $builddir/testtemp1grep) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U6 -----------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U7 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -ui --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U8 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -UiEP --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U9 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U10 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u --posix-digit --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + $cf $srcdir/testdata/grepoutput8 testtrygrep + if [ $? != 0 ] ; then exit 1; fi + +else + echo "Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library" +fi + + +# We go to some contortions to try to ensure that the tests for the various +# newline settings will work in environments where the normal newline sequence +# is not \n. Do not use exported files, whose line endings might be changed. +# Instead, create an input file using printf so that its contents are exactly +# what we want. Note the messy fudge to get printf to write a string that +# starts with a hyphen. These tests are run in the build directory. + +echo "Testing pcre2grep newline settings" +printf 'abc\rdef\r\nghi\njkl' >testNinputgrep + +printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep +$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep +pattern=`printf 'def\rjkl'` +$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep + +printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep +printf 'xyz\0abc\0def' >testNinputgrep +$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +echo "" >>testtrygrep + +$cf $srcdir/testdata/grepoutputN testtrygrep +if [ $? != 0 ] ; then exit 1; fi + + +# These newline tests need UTF support. + +if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep newline settings with UTF-8 features" + + printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep + printf 'abc\341\210\264def\nxyz' >testNinputgrep + $valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" testNinputgrep >>testtrygrep + echo "" >>testtrygrep + + $cf $srcdir/testdata/grepoutputUN testtrygrep + if [ $? != 0 ] ; then exit 1; fi +else + echo "Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library" +fi + + +# If pcre2grep supports script callouts, run some tests on them. It is possible +# to restrict these callouts to the non-fork case, either for security, or for +# environments that do not support fork(). This is handled by comparing to a +# different output. + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then + echo "Testing pcre2grep script callouts" + $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep + $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep + + if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then + nonfork=1 + $cf $srcdir/testdata/grepoutputCN testtrygrep + else + nonfork=0 + $cf $srcdir/testdata/grepoutputC testtrygrep + fi + if [ $? != 0 ] ; then exit 1; fi + + # These callout tests need UTF support. + + if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep script callout with UTF-8 features" + $valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >testtrygrep + $valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep + + if [ $nonfork = 1 ] ; then + $cf $srcdir/testdata/grepoutputCNU testtrygrep + else + $cf $srcdir/testdata/grepoutputCU testtrygrep + fi + if [ $? != 0 ] ; then exit 1; fi + fi +else + echo "Script callouts are not supported" +fi + + +# Test reading .gz and .bz2 files when supported. + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.gz are read using zlib'; then + echo "Testing reading .gz file" + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.gz >testtrygrep + echo "RC=$?" >>testtrygrep + $cf $srcdir/testdata/grepoutputCgz testtrygrep + if [ $? != 0 ] ; then exit 1; fi +fi + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.bz2 are read using bzlib2'; then + echo "Testing reading .bz2 file" + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.bz2 >testtrygrep + echo "RC=$?" >>testtrygrep + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepnot.bz2 >>testtrygrep + echo "RC=$?" >>testtrygrep + $cf $srcdir/testdata/grepoutputCbz2 testtrygrep + if [ $? != 0 ] ; then exit 1; fi +fi + + +# Finally, some tests to exercise code that is not tested above, just to be +# sure that it runs OK. Doing this improves the coverage statistics. The output +# is not checked. + +echo "Testing miscellaneous pcre2grep arguments (unchecked)" +echo '' >testtrygrep +checkspecial '-xxxxx' 2 +checkspecial '--help' 0 +checkspecial '--line-buffered --colour=auto abc /dev/null' 1 +checkspecial '--line-buffered --color abc /dev/null' 1 +checkspecial '-dskip abc .' 1 +checkspecial '-Dread -Dskip abc /dev/null' 1 + +# Clean up local working files +rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep + +exit 0 + +# End diff --git a/RunGrepTest.bat b/RunGrepTest.bat new file mode 100644 index 0000000..4a095a3 --- /dev/null +++ b/RunGrepTest.bat @@ -0,0 +1,699 @@ +@echo off + +:: Run pcre2grep tests. The assumption is that the PCRE2 tests check the library +:: itself. What we are checking here is the file handling and options that are +:: supported by pcre2grep. This script must be run in the build directory. +:: (jmh: I've only tested in the main directory, using my own builds.) + +setlocal enabledelayedexpansion + +:: Remove any non-default colouring that the caller may have set. + +set PCRE2GREP_COLOUR= +set PCRE2GREP_COLOR= +set PCREGREP_COLOUR= +set PCREGREP_COLOR= +set GREP_COLORS= +set GREP_COLOR= + +:: Remember the current (build) directory and set the program to be tested. + +set builddir="%CD%" +set pcre2grep=%builddir%\pcre2grep.exe +set pcre2test=%builddir%\pcre2test.exe + +if NOT exist %pcre2grep% ( + echo ** %pcre2grep% does not exist. + exit /b 1 +) + +if NOT exist %pcre2test% ( + echo ** %pcre2test% does not exist. + exit /b 1 +) + +for /f "delims=" %%a in ('"%pcre2grep%" -V') do set pcre2grep_version=%%a +echo Testing %pcre2grep_version% + +:: Set up a suitable "diff" command for comparison. Some systems have a diff +:: that lacks a -u option. Try to deal with this; better do the test for the -b +:: option as well. Use FC if there's no diff, taking care to ignore equality. + +set cf= +set cfout= +diff -b nul nul 2>nul && set cf=diff -b +diff -u nul nul 2>nul && set cf=diff -u +diff -ub nul nul 2>nul && set cf=diff -ub +if NOT defined cf ( + set cf=fc /n + set "cfout=>testcf || (type testcf & cmd /c exit /b 1)" +) + +:: Set srcdir to the current or parent directory, whichever one contains the +:: test data. Subsequently, we run most of the pcre2grep tests in the source +:: directory so that the file names in the output are always the same. + +if NOT defined srcdir set srcdir=. +if NOT exist %srcdir%\testdata\ ( + if exist testdata\ ( + set srcdir=. + ) else if exist ..\testdata\ ( + set srcdir=.. + ) else if exist ..\..\testdata\ ( + set srcdir=..\.. + ) else ( + echo Cannot find the testdata directory + exit /b 1 + ) +) + +:: Check for the availability of UTF-8 support + +%pcre2test% -C unicode >nul +set utf8=%ERRORLEVEL% + +:: Check default newline convention. If it does not include LF, force LF. + +for /f %%a in ('"%pcre2test%" -C newline') do set nl=%%a +if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" ( + set pcre2grep=%pcre2grep% -N LF + echo Default newline setting forced to LF +) + +:: Create a simple printf via cscript/JScript (an actual printf may translate +:: LF to CRLF, which this one does not). + +echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js +set printf=cscript //nologo printf.js + +:: ------ Normal tests ------ + +echo Testing pcre2grep main features + +echo ---------------------------- Test 1 ------------------------------>testtrygrep +(pushd %srcdir% & %pcre2grep% PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 2 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^PATTERN" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 3 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 4 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -ic PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 5 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 6 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -inh PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 7 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -il PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 8 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -l PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 9 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -q PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 10 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 11 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 12 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -ix pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 13 ----------------------------->>testtrygrep +echo seventeen >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist -f %builddir%\testtemp1grep ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 14 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w pat ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 15 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "abc^*" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 16 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% abc ./testdata/grepinput ./testdata/nonexistfile & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 17 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -M "the\noutput" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 18 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn "(the\noutput|dog\.\n--)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 19 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mix "Pattern" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 20 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mixn "complete pair\nof lines" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 21 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 22 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 23 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 24 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 25 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 26 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 -B9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 27 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 28 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 29 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C12 -B10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 30 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inB3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 31 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inA3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 32 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L "fox" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 33 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 34 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -s "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 35 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 36 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 37 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(a+)*\d" ./testdata/grepinput & popd) >>testtrygrep 2>teststderrgrep +echo RC=^%ERRORLEVEL%>>testtrygrep +echo ======== STDERR ========>>testtrygrep +type teststderrgrep >>testtrygrep + +echo ---------------------------- Test 38 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% ">\x00<" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 39 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -A1 "before the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 40 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 "after the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 41 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -o "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 42 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -onH "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 43 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 44 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -e before -ezero -e after ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 45 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -f ./testdata/greplist -e binary ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 46 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 47 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Fx AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 48 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 49 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F -e DATA -e AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 50 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(abc|def|ghi|jkl)" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 51 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mv "brown\sfox" ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 52 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always jumps ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 53 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 54 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 55 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist --color=always ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 56 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 57 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 58 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 59 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 60 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 61 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 62 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --match-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 63 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --recursion-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 64 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o1 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 65 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 66 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 67 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o12 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 68 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching=2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 69 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn --colour=always pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 70 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 71 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 72 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 73 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 74 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 75 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 76 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 77 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 78 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 79 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 80 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 81 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 82 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 83 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 84 ----------------------------->>testtrygrep +echo testdata/grepinput3 >testtemp1grep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --file-list %builddir%\testtemp1grep "fox|complete|t7" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 85 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 86 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 87 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 88 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -v "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 89 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -I "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 90 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=without-match "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 91 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -a "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 92 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 93 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 94 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 "fox" ./testdata/grepinput* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 95 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 96 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 97 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 98 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 99 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>testtemp2grep +(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 100 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Ho2 --only-matching=1 -o3 "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 101 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator="|" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 102 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 103 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 104 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 105 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always "ipsum|" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 106 ----------------------------->>testtrygrep +(pushd %srcdir% & echo a| %pcre2grep% -M "|a" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 107 ----------------------------->>testtrygrep +echo a>testtemp1grep +echo aaaaa>>testtemp1grep +(pushd %srcdir% & %pcre2grep% --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 108 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -lq PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 109 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 110 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --om-separator / -Mo0 -o1 -o2 "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 111 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 112 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 113 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 114 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 115 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 116 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 117 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 118 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 119 ----------------------------->>testtrygrep +%printf% "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep +%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 120 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +:: Now compare the results. + +%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + + +:: These tests require UTF-8 support + +if %utf8% neq 0 ( + echo Testing pcre2grep UTF-8 features + + echo ---------------------------- Test U1 ------------------------------>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + echo ---------------------------- Test U2 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + echo ---------------------------- Test U3 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^%ERRORLEVEL%>>testtrygrep + + %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 + +) else ( + echo Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library +) + + +:: We go to some contortions to try to ensure that the tests for the various +:: newline settings will work in environments where the normal newline sequence +:: is not \n. Do not use exported files, whose line endings might be changed. +:: Instead, create an input file so that its contents are exactly what we want. +:: These tests are run in the build directory. + +echo Testing pcre2grep newline settings +%printf% "abc\rdef\r\nghi\njkl" >testNinputgrep + +echo ---------------------------- Test N1 ------------------------------>testtrygrep +%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N2 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N3 ------------------------------>>testtrygrep +for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a +%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N4 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep + +echo ---------------------------- Test N5 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +echo ---------------------------- Test N6 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep + +%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + +:: If pcre2grep supports script callouts, run some tests on them. + +%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported" +if %ERRORLEVEL% equ 0 ( + echo Testing pcre2grep script callouts + %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep + %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep + %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep + %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep + %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported" + if %ERRORLEVEL% equ 0 ( + %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout% + ) else ( + %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% + ) + if ERRORLEVEL 1 exit /b 1 +) else ( + echo Script callouts are not supported +) + +:: Finally, some tests to exercise code that is not tested above, just to be +:: sure that it runs OK. Doing this improves the coverage statistics. The output +:: is not checked. + +echo Testing miscellaneous pcre2grep arguments (unchecked) +%printf% "" >testtrygrep +call :checkspecial "-xxxxx" 2 || exit /b 1 +call :checkspecial "--help" 0 || exit /b 1 +call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1 + +:: Clean up local working files +del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep + +exit /b 0 + +:: ------ Function to run and check a special pcre2grep arguments test ------- + +:checkspecial + %pcre2grep% %~1 >>testtrygrep 2>&1 + if %ERRORLEVEL% neq %2 ( + echo ** pcre2grep %~1 failed - check testtrygrep + exit /b 1 + ) + exit /b 0 + +:: End diff --git a/RunTest b/RunTest new file mode 100755 index 0000000..d426a59 --- /dev/null +++ b/RunTest @@ -0,0 +1,916 @@ +#! /bin/sh + +############################################################################### +# Run the PCRE2 tests using the pcre2test program. The appropriate tests are +# selected, depending on which build-time options were used. +# +# When JIT support is available, all appropriate tests are run with and without +# JIT, unless "-nojit" is given on the command line. There are also two tests +# for JIT-specific features, one to be run when JIT support is available +# (unless "-nojit" is specified), and one when it is not. +# +# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also +# possible to select which to test by giving "-8", "-16" or "-32" on the +# command line. +# +# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are +# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the +# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10" +# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests +# except test 10. Whatever order the arguments are in, these tests are always +# run in numerical order. +# +# If no specific tests are selected (which is the case when this script is run +# via 'make check') the default is to run all the numbered tests. +# +# There may also be named (as well as numbered) tests for special purposes. At +# present there is just one, called "heap". This test's output contains the +# sizes of heap frames and frame vectors, which depend on the environment. It +# is therefore not run unless explicitly requested. +# +# Inappropriate tests are automatically skipped (with a comment to say so). For +# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT +# support is compiled, test 15 is skipped. +# +# Other arguments can be one of the words "-valgrind", "-valgrind-log", or +# "-sim" followed by an argument to run cross-compiled executables under a +# simulator, for example: +# +# RunTest 3 -sim "qemu-arm -s 8388608" +# +# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may +# be given without the leading "-" character. +# +# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need +# very much more stack than normal. In environments where the stack can be +# set at runtime, -bigstack sets a gigantic stack. +# +# There are two special cases where only one argument is allowed: +# +# If the first and only argument is "ebcdic", the script runs the special +# EBCDIC test that can be useful for checking certain EBCDIC features, even +# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for +# this test to be run. +# +# If the script is obeyed as "RunTest list", a list of available tests is +# output, but none of them are run. +############################################################################### + +# Define test titles in variables so that they can be output as a list. Some +# of them are modified (e.g. with -8 or -16) when used in the actual tests. + +title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)" +title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)" +title2="Test 2: API, errors, internals and non-Perl stuff" +title3="Test 3: Locale-specific features" +title4A="Test 4: UTF" +title4B=" and Unicode property support (compatible with Perl >= 5.10)" +title5A="Test 5: API, internals, and non-Perl stuff for UTF" +title5B=" and UCP support" +title6="Test 6: DFA matching main non-UTF, non-UCP functionality" +title7A="Test 7: DFA matching with UTF" +title7B=" and Unicode property support" +title8="Test 8: Internal offsets and code size tests" +title9="Test 9: Specials for the basic 8-bit library" +title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support" +title11="Test 11: Specials for the basic 16-bit and 32-bit libraries" +title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support" +title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries" +title14="Test 14: DFA specials for UTF and UCP support" +title15="Test 15: Non-JIT limits and other non-JIT tests" +title16="Test 16: JIT-specific features when JIT is not available" +title17="Test 17: JIT-specific features when JIT is available" +title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP" +title19="Test 19: Tests of the POSIX interface with UTF/UCP" +title20="Test 20: Serialization and code copy tests" +title21="Test 21: \C tests without UTF (supported for DFA matching)" +title22="Test 22: \C tests with UTF (not supported for DFA matching)" +title23="Test 23: \C disabled test" +title24="Test 24: Non-UTF pattern conversion tests" +title25="Test 25: UTF pattern conversion tests" +title26="Test 26: Auto-generated unicode property tests" +maxtest=26 +titleheap="Test 'heap': Environment-specific heap tests" + +if [ $# -eq 1 -a "$1" = "list" ]; then + echo $title0 + echo $title1 + echo $title2 "(not UTF or UCP)" + echo $title3 + echo $title4A $title4B + echo $title5A $title5B + echo $title6 + echo $title7A $title7B + echo $title8 + echo $title9 + echo $title10 + echo $title11 + echo $title12 + echo $title13 + echo $title14 + echo $title15 + echo $title16 + echo $title17 + echo $title18 + echo $title19 + echo $title20 + echo $title21 + echo $title22 + echo $title23 + echo $title24 + echo $title25 + echo $title26 + echo "" + echo $titleheap + echo "" + echo "Numbered tests are automatically run if nothing selected." + echo "Named tests must be explicitly selected." + exit 0 +fi + +# Set up a suitable "diff" command for comparison. Some systems +# have a diff that lacks a -u option. Try to deal with this. + +cf="diff" +diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u" + +# Find the test data + +if [ -n "$srcdir" -a -d "$srcdir" ] ; then + testdata="$srcdir/testdata" +elif [ -d "./testdata" ] ; then + testdata=./testdata +elif [ -d "../testdata" ] ; then + testdata=../testdata +else + echo "Cannot find the testdata directory" + exit 1 +fi + + +# ------ Function to check results of a test ------- + +# This function is called with three parameters: +# +# $1 the value of $? after a call to pcre2test +# $2 the suffix of the output file to compare with +# $3 the $opt value (empty, -jit, or -dfa) +# +# Note: must define using name(), not "function name", for Solaris. + +checkresult() + { + if [ $1 -ne 0 ] ; then + echo "** pcre2test failed - check testtry" + exit 1 + fi + case "$3" in + -jit) with=" with JIT";; + -dfa) with=" with DFA";; + *) with="";; + esac + $cf $testdata/testoutput$2 testtry + if [ $? != 0 ] ; then + echo "" + echo "** Test $2 failed$with" + exit 1 + fi + echo " OK$with" + } + + +# ------ Function to run and check a special pcre2test arguments test ------- + +checkspecial() + { + $valgrind $vjs ./pcre2test $1 >>testtry + if [ $? -ne 0 ] ; then + echo "** pcre2test $1 failed - check testtry" + exit 1 + fi + } + + +# ------ Special EBCDIC Test ------- + +if [ $# -eq 1 -a "$1" = "ebcdic" ]; then + $valgrind ./pcre2test -C ebcdic >/dev/null + ebcdic=$? + if [ $ebcdic -ne 1 ] ; then + echo "Cannot run EBCDIC tests: EBCDIC support not compiled" + exit 1 + fi + for opt in "" "-dfa"; do + ./pcre2test -q $opt $testdata/testinputEBC >testtry + checkresult $? EBC "$opt" + done +exit 0 +fi + + +# ------ Normal Tests ------ + +# Default values + +arg8= +arg16= +arg32= +nojit= +bigstack= +sim= +skip= +valgrind= +vjs= + +# This is in case the caller has set aliases (as I do - PH) +unset cp ls mv rm + +# Process options and select which tests to run; for those that are explicitly +# requested, check that the necessary optional facilities are available. + +do0=no +do1=no +do2=no +do3=no +do4=no +do5=no +do6=no +do7=no +do8=no +do9=no +do10=no +do11=no +do12=no +do13=no +do14=no +do15=no +do16=no +do17=no +do18=no +do19=no +do20=no +do21=no +do22=no +do23=no +do24=no +do25=no +do26=no +doheap=no + +while [ $# -gt 0 ] ; do + case $1 in + 0) do0=yes;; + 1) do1=yes;; + 2) do2=yes;; + 3) do3=yes;; + 4) do4=yes;; + 5) do5=yes;; + 6) do6=yes;; + 7) do7=yes;; + 8) do8=yes;; + 9) do9=yes;; + 10) do10=yes;; + 11) do11=yes;; + 12) do12=yes;; + 13) do13=yes;; + 14) do14=yes;; + 15) do15=yes;; + 16) do16=yes;; + 17) do17=yes;; + 18) do18=yes;; + 19) do19=yes;; + 20) do20=yes;; + 21) do21=yes;; + 22) do22=yes;; + 23) do23=yes;; + 24) do24=yes;; + 25) do25=yes;; + 26) do26=yes;; + heap) doheap=yes;; + -8) arg8=yes;; + -16) arg16=yes;; + -32) arg32=yes;; + bigstack|-bigstack) bigstack=yes;; + nojit|-nojit) nojit=yes;; + sim|-sim) shift; sim=$1;; + valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file";; + valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; + ~*) + if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then + skip="$skip `expr "$1" : '~\([0-9]*\)*$'`" + else + echo "Unknown option or test selector '$1'"; exit 1 + fi + ;; + *-*) + if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then + tf=`expr "$1" : '\([0-9]*\)'` + tt=`expr "$1" : '.*-\([0-9]*\)'` + if [ "$tt" = "" ] ; then tt=$maxtest; fi + if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then + echo "Invalid test range '$1'"; exit 1 + fi + while expr "$tf" "<=" "$tt" >/dev/null; do + eval do${tf}=yes + tf=`expr $tf + 1` + done + else + echo "Invalid test range '$1'"; exit 1 + fi + ;; + *) echo "Unknown option or test selector '$1'"; exit 1;; + esac + shift +done + +# Find which optional facilities are available. + +$sim ./pcre2test -C linksize >/dev/null +link_size=$? +if [ $link_size -lt 2 ] ; then + echo "RunTest: Failed to find internal link size" + exit 1 +fi +if [ $link_size -gt 4 ] ; then + echo "RunTest: Failed to find internal link size" + exit 1 +fi + +# If it is possible to set the system stack size and -bigstack was given, +# set up a large stack. + +$sim ./pcre2test -S 64 /dev/null /dev/null +support_setstack=$? +if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then + setstack="-S 64" +else + setstack="" +fi + +# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only +# one need be. + +$sim ./pcre2test -C pcre2-8 >/dev/null +support8=$? +$sim ./pcre2test -C pcre2-16 >/dev/null +support16=$? +$sim ./pcre2test -C pcre2-32 >/dev/null +support32=$? + +# \C may be disabled + +$sim ./pcre2test -C backslash-C >/dev/null +supportBSC=$? + +# Initialize all bitsizes skipped + +test8=skip +test16=skip +test32=skip + +# If no bitsize arguments, select all that are available + +if [ "$arg8$arg16$arg32" = "" ] ; then + if [ $support8 -ne 0 ] ; then + test8=-8 + fi + if [ $support16 -ne 0 ] ; then + test16=-16 + fi + if [ $support32 -ne 0 ] ; then + test32=-32 + fi + +# Otherwise, select requested bit sizes + +else + if [ "$arg8" = yes ] ; then + if [ $support8 -eq 0 ] ; then + echo "Cannot run 8-bit library tests: 8-bit library not compiled" + exit 1 + fi + test8=-8 + fi + if [ "$arg16" = yes ] ; then + if [ $support16 -eq 0 ] ; then + echo "Cannot run 16-bit library tests: 16-bit library not compiled" + exit 1 + fi + test16=-16 + fi + if [ "$arg32" = yes ] ; then + if [ $support32 -eq 0 ] ; then + echo "Cannot run 32-bit library tests: 32-bit library not compiled" + exit 1 + fi + test32=-32 + fi +fi + +# UTF support is implied by Unicode support, and it always applies to all bit +# sizes if both are supported; we can't have UTF-8 support without UTF-16 or +# UTF-32 support. + +$sim ./pcre2test -C unicode >/dev/null +utf=$? + +# When JIT is used with valgrind, we need to set up valgrind suppressions as +# otherwise there are a lot of false positive valgrind reports when the +# the hardware supports SSE2. + +jitopt= +$sim ./pcre2test -C jit >/dev/null +jit=$? +if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then + jitopt=-jit + if [ "$valgrind" != "" ] ; then + vjs="--suppressions=$testdata/valgrind-jit.supp" + fi +fi + +# If no specific tests were requested, select all the numbered tests. Those +# that are not relevant will be automatically skipped. + +if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ + $do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \ + $do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \ + $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ + $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ + $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \ + $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \ + ]; then + do0=yes + do1=yes + do2=yes + do3=yes + do4=yes + do5=yes + do6=yes + do7=yes + do8=yes + do9=yes + do10=yes + do11=yes + do12=yes + do13=yes + do14=yes + do15=yes + do16=yes + do17=yes + do18=yes + do19=yes + do20=yes + do21=yes + do22=yes + do23=yes + do24=yes + do25=yes + do26=yes +fi + +# Handle any explicit skips at this stage, so that an argument list may consist +# only of explicit skips. + +for i in $skip; do eval do$i=no; done + +# Show which release and which test data + +echo "" +echo PCRE2 C library tests using test data from $testdata +$sim ./pcre2test /dev/null +echo "" + +for bmode in "$test8" "$test16" "$test32"; do + case "$bmode" in + skip) continue;; + -16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi + bits=16; echo "---- Testing 16-bit library ----"; echo "";; + -32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi + bits=32; echo "---- Testing 32-bit library ----"; echo "";; + -8) bits=8; echo "---- Testing 8-bit library ----"; echo "";; + esac + + # Test 0 is a special test. Its output is not checked, because it will + # be different on different hardware and with different configurations. + # Running this test just exercises the code. + + if [ $do0 = yes ] ; then + echo $title0 + echo '/abc/jit,memory,framesize' >testSinput + echo ' abc' >>testSinput + echo '' >testtry + checkspecial '-C' + checkspecial '--help' + if [ $support_setstack -eq 0 ] ; then + checkspecial '-S 1 -t 10 testSinput' + fi + echo " OK" + fi + + # Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8 + + if [ $do1 = yes ] ; then + echo $title1 + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry + checkresult $? 1 "$opt" + done + fi + + # PCRE2 tests that are not Perl-compatible: API, errors, internals. We copy + # the testbtables file to the current directory for use by this test. + + if [ $do2 = yes ] ; then + echo $title2 "(excluding UTF-$bits)" + cp $testdata/testbtables . + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry + saverc=$? + if [ $saverc = 0 ] ; then + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,300 >>testtry + checkresult $? 2 "$opt" + else + checkresult $saverc 2 "$opt" + fi + done + fi + + # Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french" + # or "fr" locale is available. The first two are Unix-like standards; the + # last two are for Windows. Unfortunately, different versions of the French + # locale give different outputs for some items. This test passes if the + # output matches any one of the alternative output files. + + if [ $do3 = yes ] ; then + locale= + + # In some environments locales that are listed by the "locale -a" + # command do not seem to work with setlocale(). Therefore, we do + # a preliminary test to see if pcre2test can set one before going + # on to use it. + + for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do + locale -a | grep "^$loc\$" >/dev/null + if [ $? -eq 0 ] ; then + echo "/a/locale=$loc" | \ + $sim $valgrind ./pcre2test -q $bmode | \ + grep "Failed to set locale" >/dev/null + if [ $? -ne 0 ] ; then + locale=$loc + if [ "$locale" = "fr_FR" ] ; then + infile=$testdata/testinput3 + outfile=$testdata/testoutput3 + outfile2=$testdata/testoutput3A + outfile3=$testdata/testoutput3B + else + infile=test3input + outfile=test3output + outfile2=test3outputA + outfile3=test3outputB + sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input + sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output + sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA + sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB + fi + break + fi + fi + done + + if [ "$locale" != "" ] ; then + echo $title3 "(using '$locale' locale)" + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $infile testtry + if [ $? = 0 ] ; then + case "$opt" in + -jit) with=" with JIT";; + *) with="";; + esac + if $cf $outfile testtry >teststdout || \ + $cf $outfile2 testtry >teststdout || \ + $cf $outfile3 testtry >teststdout + then + echo " OK$with" + else + echo "** Locale test did not run successfully$with. The output did not match" + echo " $outfile, $outfile2 or $outfile3." + echo " This may mean that there is a problem with the locale settings rather" + echo " than a bug in PCRE2." + exit 1 + fi + else exit 1 + fi + done + else + echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA'," + echo "'fr' or 'french' locales can be set, or the \"locale\" command is" + echo "not available to check for them." + echo " " + fi + fi + + # Tests for UTF and Unicode property support + + if [ $do4 = yes ] ; then + echo ${title4A}-${bits}${title4B} + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry + checkresult $? 4 "$opt" + done + fi + fi + + if [ $do5 = yes ] ; then + echo ${title5A}-${bits}$title5B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry + checkresult $? 5 "$opt" + done + fi + fi + + # Tests for DFA matching support + + if [ $do6 = yes ] ; then + echo $title6 + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput6 testtry + checkresult $? 6 "" + fi + + if [ $do7 = yes ] ; then + echo ${title7A}-${bits}$title7B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry + checkresult $? 7 "" + fi + fi + + # Test of internal offsets and code sizes. This test is run only when there + # is UTF/UCP support. The actual tests are mostly the same as in some of the + # above, but in this test we inspect some offsets and sizes. This is a + # doublecheck for the maintainer, just in case something changes unexpectedly. + # The output from this test is different in 8-bit, 16-bit, and 32-bit modes + # and for different link sizes, so there are different output files for each + # mode and link size. + + if [ $do8 = yes ] ; then + echo $title8 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput8 testtry + checkresult $? 8-$bits-$link_size "" + fi + fi + + # Tests for 8-bit-specific features + + if [ "$do9" = yes ] ; then + echo $title9 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry + checkresult $? 9 "$opt" + done + fi + fi + + # Tests for UTF-8 and UCP 8-bit-specific features + + if [ "$do10" = yes ] ; then + echo $title10 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry + checkresult $? 10 "$opt" + done + fi + fi + + # Tests for 16-bit and 32-bit features. Output is different for the two widths. + + if [ $do11 = yes ] ; then + echo $title11 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry + checkresult $? 11-$bits "$opt" + done + fi + fi + + # Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output + # is different for the two widths. + + if [ $do12 = yes ] ; then + echo $title12 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry + checkresult $? 12-$bits "$opt" + done + fi + fi + + # Tests for 16/32-bit-specific features in DFA non-UTF modes + + if [ $do13 = yes ] ; then + echo $title13 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput13 testtry + checkresult $? 13 "" + fi + fi + + # Tests for DFA UTF and UCP features. Output is different for the different widths. + + if [ $do14 = yes ] ; then + echo $title14 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry + checkresult $? 14-$bits "" + fi + fi + + # Test non-JIT match and recursion limits + + if [ $do15 = yes ] ; then + echo $title15 + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput15 testtry + checkresult $? 15 "" + fi + + # Test JIT-specific features when JIT is not available + + if [ $do16 = yes ] ; then + echo $title16 + if [ $jit -ne 0 ] ; then + echo " Skipped because JIT is available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput16 testtry + checkresult $? 16 "" + fi + fi + + # Test JIT-specific features when JIT is available + + if [ $do17 = yes ] ; then + echo $title17 + if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then + echo " Skipped because JIT is not available or nojit was specified" + else + $sim $valgrind $vjs ./pcre2test -q $setstack $bmode $testdata/testinput17 testtry + checkresult $? 17 "" + fi + fi + + # Tests for the POSIX interface without UTF/UCP (8-bit only) + + if [ $do18 = yes ] ; then + echo $title18 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput18 testtry + checkresult $? 18 "" + fi + fi + + # Tests for the POSIX interface with UTF/UCP (8-bit only) + + if [ $do19 = yes ] ; then + echo $title19 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput19 testtry + checkresult $? 19 "" + fi + fi + + # Serialization tests + + if [ $do20 = yes ] ; then + echo $title20 + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput20 testtry + checkresult $? 20 "" + fi + + # \C tests without UTF - DFA matching is supported + + if [ "$do21" = yes ] ; then + echo $title21 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + else + for opt in "" $jitopt -dfa; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry + checkresult $? 21 "$opt" + done + fi + fi + + # \C tests with UTF - DFA matching is not supported for \C in UTF mode + + if [ "$do22" = yes ] ; then + echo $title22 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry + checkresult $? 22-$bits "$opt" + done + fi + fi + + # Test when \C is disabled + + if [ "$do23" = yes ] ; then + echo $title23 + if [ $supportBSC -ne 0 ] ; then + echo " Skipped because \C is not disabled" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput23 testtry + checkresult $? 23 "" + fi + fi + + # Non-UTF pattern conversion tests + + if [ "$do24" = yes ] ; then + echo $title24 + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput24 testtry + checkresult $? 24 "" + fi + + # UTF pattern conversion tests + + if [ "$do25" = yes ] ; then + echo $title25 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinput25 testtry + checkresult $? 25 "" + fi + fi + + # Auto-generated unicode property tests + + if [ $do26 = yes ] ; then + echo $title26 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry + checkresult $? 26 "$opt" + done + fi + fi + + # Manually selected heap tests - output may vary in different environments, + # which is why that are not automatically run. + + if [ $doheap = yes ] ; then + echo $titleheap + $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry + checkresult $? heap-$bits "" + fi + +# End of loop for 8/16/32-bit tests +done + +# Clean up local working files +rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry + +# End diff --git a/RunTest.bat b/RunTest.bat new file mode 100644 index 0000000..9f203fe --- /dev/null +++ b/RunTest.bat @@ -0,0 +1,528 @@ +@echo off +@rem +@rem MS Windows batch file to run pcre2test on testfiles with the correct +@rem options. This file must use CRLF linebreaks to function properly, +@rem and requires both pcre2test and pcre2grep. +@rem +@rem ------------------------ HISTORY ---------------------------------- +@rem This file was originally contributed to PCRE1 by Ralf Junker, and touched +@rem up by Daniel Richard G. Tests 10-12 added by Philip H. +@rem Philip H also changed test 3 to use "wintest" files. +@rem +@rem Updated by Tom Fortmann to support explicit test numbers on the command +@rem line. Added argument validation and added error reporting. +@rem +@rem Sheri Pierce added logic to skip feature dependent tests +@rem tests 4 5 7 10 12 14 19 and 22 require Unicode support +@rem 8 requires Unicode and link size 2 +@rem 16 requires absence of jit support +@rem 17 requires presence of jit support +@rem Sheri P also added override tests for study and jit testing +@rem Zoltan Herczeg added libpcre16 support +@rem Zoltan Herczeg added libpcre32 support +@rem ------------------------------------------------------------------- +@rem +@rem The file was converted for PCRE2 by PH, February 2015. +@rem Updated for new test 14 (moving others up a number), August 2015. +@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015. +@rem PH added missing "set type" for test 22, April 2016. +@rem PH added copy command for new testbtables file, November 2020 +@rem PH caused it to show comparison output when comparison faile, July 2023 +@rem PH updated unknown error number in test + + +setlocal enabledelayedexpansion +if [%srcdir%]==[] ( +if exist testdata\ set srcdir=.) +if [%srcdir%]==[] ( +if exist ..\testdata\ set srcdir=..) +if [%srcdir%]==[] ( +if exist ..\..\testdata\ set srcdir=..\..) +if NOT exist %srcdir%\testdata\ ( +Error: echo distribution testdata folder not found! +call :conferror +exit /b 1 +goto :eof +) + +if [%pcre2test%]==[] set pcre2test=.\pcre2test.exe + +echo source dir is %srcdir% +echo pcre2test=%pcre2test% + +if NOT exist %pcre2test% ( +echo Error: %pcre2test% not found! +echo. +call :conferror +exit /b 1 +) + +%pcre2test% -C linksize >NUL +set link_size=%ERRORLEVEL% +%pcre2test% -C pcre2-8 >NUL +set support8=%ERRORLEVEL% +%pcre2test% -C pcre2-16 >NUL +set support16=%ERRORLEVEL% +%pcre2test% -C pcre2-32 >NUL +set support32=%ERRORLEVEL% +%pcre2test% -C unicode >NUL +set unicode=%ERRORLEVEL% +%pcre2test% -C jit >NUL +set jit=%ERRORLEVEL% +%pcre2test% -C backslash-C >NUL +set supportBSC=%ERRORLEVEL% + +if %support8% EQU 1 ( +if not exist testout8 md testout8 +if not exist testoutjit8 md testoutjit8 +) + +if %support16% EQU 1 ( +if not exist testout16 md testout16 +if not exist testoutjit16 md testoutjit16 +) + +if %support16% EQU 1 ( +if not exist testout32 md testout32 +if not exist testoutjit32 md testoutjit32 +) + +set do1=no +set do2=no +set do3=no +set do4=no +set do5=no +set do6=no +set do7=no +set do8=no +set do9=no +set do10=no +set do11=no +set do12=no +set do13=no +set do14=no +set do15=no +set do16=no +set do17=no +set do18=no +set do19=no +set do20=no +set do21=no +set do22=no +set do23=no +set all=yes + +for %%a in (%*) do ( + set valid=no + for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do if %%v == %%a set valid=yes + if "!valid!" == "yes" ( + set do%%a=yes + set all=no +) else ( + echo Invalid test number - %%a! + echo Usage %0 [ test_number ] ... + echo Where test_number is one or more optional test numbers 1 through 23, default is all tests. + exit /b 1 +) +) +set failed="no" + +if "%all%" == "yes" ( + set do1=yes + set do2=yes + set do3=yes + set do4=yes + set do5=yes + set do6=yes + set do7=yes + set do8=yes + set do9=yes + set do10=no + set do11=yes + set do12=no + set do13=yes + set do14=yes + set do15=yes + set do16=yes + set do17=yes + set do18=yes + set do19=yes + set do20=yes + set do21=yes + set do22=yes + set do23=yes +) + +@echo RunTest.bat's pcre2test output is written to newly created subfolders +@echo named testout{8,16,32} and testoutjit{8,16,32}. +@echo. + +set mode= +set bits=8 + +:nextMode +if "%mode%" == "" ( + if %support8% EQU 0 goto modeSkip + echo. + echo ---- Testing 8-bit library ---- + echo. +) +if "%mode%" == "-16" ( + if %support16% EQU 0 goto modeSkip + echo. + echo ---- Testing 16-bit library ---- + echo. +) +if "%mode%" == "-32" ( + if %support32% EQU 0 goto modeSkip + echo. + echo ---- Testing 32-bit library ---- + echo. +) +if "%do1%" == "yes" call :do1 +if "%do2%" == "yes" call :do2 +if "%do3%" == "yes" call :do3 +if "%do4%" == "yes" call :do4 +if "%do5%" == "yes" call :do5 +if "%do6%" == "yes" call :do6 +if "%do7%" == "yes" call :do7 +if "%do8%" == "yes" call :do8 +if "%do9%" == "yes" call :do9 +if "%do10%" == "yes" call :do10 +if "%do11%" == "yes" call :do11 +if "%do12%" == "yes" call :do12 +if "%do13%" == "yes" call :do13 +if "%do14%" == "yes" call :do14 +if "%do15%" == "yes" call :do15 +if "%do16%" == "yes" call :do16 +if "%do17%" == "yes" call :do17 +if "%do18%" == "yes" call :do18 +if "%do19%" == "yes" call :do19 +if "%do20%" == "yes" call :do20 +if "%do21%" == "yes" call :do21 +if "%do22%" == "yes" call :do22 +if "%do23%" == "yes" call :do23 +:modeSkip +if "%mode%" == "" ( + set mode=-16 + set bits=16 + goto nextMode +) +if "%mode%" == "-16" ( + set mode=-32 + set bits=32 + goto nextMode +) + +@rem If mode is -32, testing is finished +if %failed% == "yes" ( +echo In above output, one or more of the various tests failed! +exit /b 1 +) +echo All OK +goto :eof + +:runsub +@rem Function to execute pcre2test and compare the output +@rem Arguments are as follows: +@rem +@rem 1 = test number +@rem 2 = outputdir +@rem 3 = test name use double quotes +@rem 4 - 9 = pcre2test options + +if [%1] == [] ( + echo Missing test number argument! + exit /b 1 +) + +if [%2] == [] ( + echo Missing outputdir! + exit /b 1 +) + +if [%3] == [] ( + echo Missing test name argument! + exit /b 1 +) + +if %1 == 8 ( + set outnum=8-%bits%-%link_size% +) else ( + set outnum=%1 +) +set testinput=testinput%1 +set testoutput=testoutput%outnum% +if exist %srcdir%\testdata\win%testinput% ( + set testinput=wintestinput%1 + set testoutput=wintestoutput%outnum% +) + +echo Test %1: %3 +%pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput% +if errorlevel 1 ( + echo. failed executing command-line: + echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput% + set failed="yes" + goto :eof +) else if [%1]==[2] ( + %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput% +) + +set type= +if [%1]==[11] ( + set type=-%bits% +) +if [%1]==[12] ( + set type=-%bits% +) +if [%1]==[14] ( + set type=-%bits% +) +if [%1]==[22] ( + set type=-%bits% +) + +fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL + +if errorlevel 1 ( + echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% + if [%1]==[3] ( + echo. + echo ** Test 3 failure usually means french locale is not + echo ** available on the system, rather than a bug or problem with PCRE2. + echo. + goto :eof +) + fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% + + set failed="yes" + goto :eof +) + +echo. Passed. +goto :eof + +:do1 +call :runsub 1 testout "Main non-UTF, non-UCP functionality (Compatible with Perl >= 5.10)" -q +if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do2 + copy /y %srcdir%\testdata\testbtables testbtables + call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q + if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do3 + call :runsub 3 testout "Locale-specific features" -q + if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do4 +if %unicode% EQU 0 ( + echo Test 4 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 4 testout "UTF-%bits% and Unicode property support - (Compatible with Perl >= 5.10)" -q + if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do5 +if %unicode% EQU 0 ( + echo Test 5 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits% and UCP" -q + if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do6 + call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q +goto :eof + +:do7 +if %unicode% EQU 0 ( + echo Test 7 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q + goto :eof + +:do8 +if NOT %link_size% EQU 2 ( + echo Test 8 Skipped because link size is not 2. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 8 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 8 testout "Internal offsets and code size tests" -q +goto :eof + +:do9 +if NOT %bits% EQU 8 ( + echo Test 9 Skipped when running 16/32-bit tests. + goto :eof +) + call :runsub 9 testout "Specials for the basic 8-bit library" -q + if %jit% EQU 1 call :runsub 9 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do10 +if NOT %bits% EQU 8 ( + echo Test 10 Skipped when running 16/32-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 10 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 10 testout "Specials for the 8-bit library with Unicode support" -q + if %jit% EQU 1 call :runsub 10 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do11 +if %bits% EQU 8 ( + echo Test 11 Skipped when running 8-bit tests. + goto :eof +) + call :runsub 11 testout "Specials for the basic 16/32-bit library" -q + if %jit% EQU 1 call :runsub 11 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do12 +if %bits% EQU 8 ( + echo Test 12 Skipped when running 8-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 12 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 12 testout "Specials for the 16/32-bit library with Unicode support" -q + if %jit% EQU 1 call :runsub 12 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do13 +if %bits% EQU 8 ( + echo Test 13 Skipped when running 8-bit tests. + goto :eof +) + call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q +goto :eof + +:do14 +if %unicode% EQU 0 ( + echo Test 14 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 14 testout "DFA specials for UTF and UCP support" -q + goto :eof + +:do15 +call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q +goto :eof + +:do16 +if %jit% EQU 1 ( + echo Test 16 Skipped due to presence of JIT support. + goto :eof +) + call :runsub 16 testout "JIT-specific features when JIT is not available" -q +goto :eof + +:do17 +if %jit% EQU 0 ( + echo Test 17 Skipped due to absence of JIT support. + goto :eof +) + call :runsub 17 testout "JIT-specific features when JIT is available" -q +goto :eof + +:do18 +if %bits% EQU 16 ( + echo Test 18 Skipped when running 16-bit tests. + goto :eof +) +if %bits% EQU 32 ( + echo Test 18 Skipped when running 32-bit tests. + goto :eof +) + call :runsub 18 testout "POSIX interface, excluding UTF-8 and UCP" -q +goto :eof + +:do19 +if %bits% EQU 16 ( + echo Test 19 Skipped when running 16-bit tests. + goto :eof +) +if %bits% EQU 32 ( + echo Test 19 Skipped when running 32-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 19 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q +goto :eof + +:do20 +call :runsub 20 testout "Serialization tests" -q +goto :eof + +:do21 +if %supportBSC% EQU 0 ( + echo Test 21 Skipped due to absence of backslash-C support. + goto :eof +) + call :runsub 21 testout "Backslash-C tests without UTF" -q + call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa + if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do22 +if %supportBSC% EQU 0 ( + echo Test 22 Skipped due to absence of backslash-C support. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 22 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 22 testout "Backslash-C tests with UTF" -q + if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do23 +if %supportBSC% EQU 1 ( + echo Test 23 Skipped due to presence of backslash-C support. + goto :eof +) + call :runsub 23 testout "Backslash-C disabled test" -q +goto :eof + +:conferror +@echo. +@echo Either your build is incomplete or you have a configuration error. +@echo. +@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS" +@echo project, pcre2_test.bat defines variables and automatically calls RunTest.bat. +@echo For manual testing of all available features, after configuring with cmake +@echo and building, you can run the built pcre2_test.bat. For best results with +@echo cmake builds and tests avoid directories with full path names that include +@echo spaces for source or build. +@echo. +@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed +@echo for input and verification should be found automatically when (from the +@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat +@echo runs all tests compatible with the linked pcre2 library but it can be given +@echo a test number as an argument. +@echo. +@echo If the build dir is not under the source dir you can either copy your exes +@echo to the source folder or copy RunTest.bat and the testdata folder to the +@echo location of your built exes and then run RunTest.bat. +@echo. +goto :eof diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..db0861a --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,1561 @@ +# generated automatically by aclocal 1.16.5 -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.72],, +[m4_warning([this file was generated for autoconf 2.72. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- +# serial 12 (pkg-config-0.29.2) + +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. + +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.2]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi +fi[]dnl +])dnl PKG_PROG_PKG_CONFIG + +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurrence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_default([$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. +m4_define([_PKG_CONFIG], +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried +fi[]dnl +])dnl _PKG_CONFIG + +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])dnl _PKG_SHORT_ERRORS_SUPPORTED + + +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $2]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + m4_default([$4], [AC_MSG_ERROR( +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT])[]dnl + ]) +elif test $pkg_failed = untried; then + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])[]dnl + ]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + $3 +fi[]dnl +])dnl PKG_CHECK_MODULES + + +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_INSTALLDIR + + +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_NOARCH_INSTALLDIR + + +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) + +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])dnl PKG_CHECK_VAR + +dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------ +dnl +dnl Prepare a "--with-" configure option using the lowercase +dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and +dnl PKG_CHECK_MODULES in a single macro. +AC_DEFUN([PKG_WITH_MODULES], +[ +m4_pushdef([with_arg], m4_tolower([$1])) + +m4_pushdef([description], + [m4_default([$5], [build with ]with_arg[ support])]) + +m4_pushdef([def_arg], [m4_default([$6], [auto])]) +m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) +m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) + +m4_case(def_arg, + [yes],[m4_pushdef([with_without], [--without-]with_arg)], + [m4_pushdef([with_without],[--with-]with_arg)]) + +AC_ARG_WITH(with_arg, + AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, + [AS_TR_SH([with_]with_arg)=def_arg]) + +AS_CASE([$AS_TR_SH([with_]with_arg)], + [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], + [auto],[PKG_CHECK_MODULES([$1],[$2], + [m4_n([def_action_if_found]) $3], + [m4_n([def_action_if_not_found]) $4])]) + +m4_popdef([with_arg]) +m4_popdef([description]) +m4_popdef([def_arg]) + +])dnl PKG_WITH_MODULES + +dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ----------------------------------------------- +dnl +dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES +dnl check._[VARIABLE-PREFIX] is exported as make variable. +AC_DEFUN([PKG_HAVE_WITH_MODULES], +[ +PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) + +AM_CONDITIONAL([HAVE_][$1], + [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) +])dnl PKG_HAVE_WITH_MODULES + +dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------------------ +dnl +dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after +dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make +dnl and preprocessor variable. +AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], +[ +PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) + +AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], + [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) +])dnl PKG_HAVE_DEFINE_WITH_MODULES + +# Copyright (C) 2002-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.5])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AR([ACT-IF-FAIL]) +# ------------------------- +# Try to determine the archiver interface, and trigger the ar-lib wrapper +# if it is needed. If the detection of archiver interface fails, run +# ACT-IF-FAIL (default is to abort configure with a proper error message). +AC_DEFUN([AM_PROG_AR], +[AC_BEFORE([$0], [LT_INIT])dnl +AC_BEFORE([$0], [AC_PROG_LIBTOOL])dnl +AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([ar-lib])dnl +AC_CHECK_TOOLS([AR], [ar lib "link -lib"], [false]) +: ${AR=ar} + +AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface], + [AC_LANG_PUSH([C]) + am_cv_ar_interface=ar + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])], + [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + ]) + AC_LANG_POP([C])]) + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + m4_default([$1], + [AC_MSG_ERROR([could not determine $AR interface])]) + ;; +esac +AC_SUBST([AR])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE="gmake" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +m4_ifdef([_$0_ALREADY_INIT], + [m4_fatal([$0 expanded multiple times +]m4_defn([_$0_ALREADY_INIT]))], + [m4_define([_$0_ALREADY_INIT], m4_expansion_stack)])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifset([AC_PACKAGE_NAME], [ok]):m4_ifset([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi +AC_SUBST([CTAGS]) +if test -z "$ETAGS"; then + ETAGS=etags +fi +AC_SUBST([ETAGS]) +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi +AC_SUBST([CSCOPE]) + +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([m4/ax_pthread.m4]) +m4_include([m4/libtool.m4]) +m4_include([m4/ltoptions.m4]) +m4_include([m4/ltsugar.m4]) +m4_include([m4/ltversion.m4]) +m4_include([m4/lt~obsolete.m4]) +m4_include([m4/pcre2_visibility.m4]) diff --git a/ar-lib b/ar-lib new file mode 100755 index 0000000..c349042 --- /dev/null +++ b/ar-lib @@ -0,0 +1,271 @@ +#! /bin/sh +# Wrapper for Microsoft lib.exe + +me=ar-lib +scriptversion=2019-07-04.01; # UTC + +# Copyright (C) 2010-2021 Free Software Foundation, Inc. +# Written by Peter Rosin . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + + +# func_error message +func_error () +{ + echo "$me: $1" 1>&2 + exit 1 +} + +file_conv= + +# func_file_conv build_file +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv in + mingw) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin | msys) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_at_file at_file operation archive +# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE +# for each of them. +# When interpreting the content of the @FILE, do NOT use func_file_conv, +# since the user would need to supply preconverted file names to +# binutils ar, at least for MinGW. +func_at_file () +{ + operation=$2 + archive=$3 + at_file_contents=`cat "$1"` + eval set x "$at_file_contents" + shift + + for member + do + $AR -NOLOGO $operation:"$member" "$archive" || exit $? + done +} + +case $1 in + '') + func_error "no command. Try '$0 --help' for more information." + ;; + -h | --h*) + cat <_FOUND variable. +# The package is found if all variables listed are TRUE. +# Example: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) +# +# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and +# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. +# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, +# independent whether QUIET was used or not. +# If it is found, the location is reported using the VAR1 argument, so +# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. +# If the second argument is DEFAULT_MSG, the message in the failure case will +# be "Could NOT find LibXml2", if you don't like this message you can specify +# your own custom failure message there. + +MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) + + IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + IF (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") + ELSE (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") + ENDIF (${_NAME}_FIND_REQUIRED) + ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + SET(_FAIL_MESSAGE "${_FAIL_MSG}") + ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + + STRING(TOUPPER ${_NAME} _NAME_UPPER) + + SET(${_NAME_UPPER}_FOUND TRUE) + IF(NOT ${_VAR1}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_VAR1}) + + FOREACH(_CURRENT_VAR ${ARGN}) + IF(NOT ${_CURRENT_VAR}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_CURRENT_VAR}) + ENDFOREACH(_CURRENT_VAR) + + IF (${_NAME_UPPER}_FOUND) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ELSE (${_NAME_UPPER}_FOUND) + IF (${_NAME}_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") + ELSE (${_NAME}_FIND_REQUIRED) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "${_FAIL_MESSAGE}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ENDIF (${_NAME}_FIND_REQUIRED) + ENDIF (${_NAME_UPPER}_FOUND) +ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/cmake/FindReadline.cmake b/cmake/FindReadline.cmake new file mode 100644 index 0000000..1d4cc55 --- /dev/null +++ b/cmake/FindReadline.cmake @@ -0,0 +1,29 @@ +# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake +# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS +# --> BSD licensed +# +# GNU Readline library finder +if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + set(READLINE_FOUND TRUE) +else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h + /usr/include/readline + ) + +# 2008-04-22 The next clause used to read like this: +# +# FIND_LIBRARY(READLINE_LIBRARY NAMES readline) +# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses ) +# include(FindPackageHandleStandardArgs) +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY ) +# +# I was advised to modify it such that it will find an ncurses library if +# required, but not if one was explicitly given, that is, it allows the +# default to be overridden. PH + + FIND_LIBRARY(READLINE_LIBRARY NAMES readline) + include(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY ) + + MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY) +endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) diff --git a/cmake/pcre2-config-version.cmake.in b/cmake/pcre2-config-version.cmake.in new file mode 100644 index 0000000..dac149e --- /dev/null +++ b/cmake/pcre2-config-version.cmake.in @@ -0,0 +1,15 @@ +set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@) +set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@) +set(PACKAGE_VERSION_PATCH 0) +set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0) + +# Check whether the requested PACKAGE_FIND_VERSION is compatible +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR + PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + set(PACKAGE_VERSION_COMPATIBLE TRUE) + if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() +endif() diff --git a/cmake/pcre2-config.cmake.in b/cmake/pcre2-config.cmake.in new file mode 100644 index 0000000..84eebad --- /dev/null +++ b/cmake/pcre2-config.cmake.in @@ -0,0 +1,148 @@ +# pcre2-config.cmake +# ---------------- +# +# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT. +# +# Static vs. shared +# ----------------- +# To make use of the static library instead of the shared one, one needs +# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package. +# Example: +# set(PCRE2_USE_STATIC_LIBS ON) +# find_package(PCRE2 CONFIG COMPONENTS 8BIT) +# +# This will define the following variables: +# +# PCRE2_FOUND - True if the system has the PCRE2 library. +# PCRE2_VERSION - The version of the PCRE2 library which was found. +# +# and the following imported targets: +# +# PCRE2::8BIT - The 8 bit PCRE2 library. +# PCRE2::16BIT - The 16 bit PCRE2 library. +# PCRE2::32BIT - The 32 bit PCRE2 library. +# PCRE2::POSIX - The POSIX PCRE2 library. + +set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@) +set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@) +set(PCRE2_8BIT_NAME pcre2-8) +set(PCRE2_16BIT_NAME pcre2-16) +set(PCRE2_32BIT_NAME pcre2-32) +set(PCRE2_POSIX_NAME pcre2-posix) +find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory") +if (PCRE2_USE_STATIC_LIBS) + if (MSVC) + set(PCRE2_8BIT_NAME pcre2-8-static) + set(PCRE2_16BIT_NAME pcre2-16-static) + set(PCRE2_32BIT_NAME pcre2-32-static) + set(PCRE2_POSIX_NAME pcre2-posix-static) + endif () + + set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) + set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) +else () + set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) + if (MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) + set(PCRE2_PREFIX "") + endif () + + set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + if (MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) + set(PCRE2_SUFFIX "-0.dll") + elseif(MSVC) + set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif () +endif () +find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library") +find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library") +find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library") +find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library") +unset(PCRE2_NON_STANDARD_LIB_PREFIX) +unset(PCRE2_NON_STANDARD_LIB_SUFFIX) +unset(PCRE2_8BIT_NAME) +unset(PCRE2_16BIT_NAME) +unset(PCRE2_32BIT_NAME) +unset(PCRE2_POSIX_NAME) + +# Set version +if (PCRE2_INCLUDE_DIR) + set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") +endif () + +# Which components have been found. +if (PCRE2_8BIT_LIBRARY) + set(PCRE2_8BIT_FOUND TRUE) +endif () +if (PCRE2_16BIT_LIBRARY) + set(PCRE2_16BIT_FOUND TRUE) +endif () +if (PCRE2_32BIT_LIBRARY) + set(PCRE2_32BIT_FOUND TRUE) +endif () +if (PCRE2_POSIX_LIBRARY) + set(PCRE2_POSIX_FOUND TRUE) +endif () + +# Check if at least one component has been specified. +list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS) +if (PCRE2_NCOMPONENTS LESS 1) + message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.") +endif () +unset(PCRE2_NCOMPONENTS) + +# When POSIX component has been specified make sure that also 8BIT component is specified. +set(PCRE2_8BIT_COMPONENT FALSE) +set(PCRE2_POSIX_COMPONENT FALSE) +foreach(component ${PCRE2_FIND_COMPONENTS}) + if (component STREQUAL "8BIT") + set(PCRE2_8BIT_COMPONENT TRUE) + elseif (component STREQUAL "POSIX") + set(PCRE2_POSIX_COMPONENT TRUE) + endif () +endforeach() + +if (PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) + message(FATAL_ERROR "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component.") +endif() +unset(PCRE2_8BIT_COMPONENT) +unset(PCRE2_POSIX_COMPONENT) + +include(FindPackageHandleStandardArgs) +set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") +find_package_handle_standard_args(PCRE2 + FOUND_VAR PCRE2_FOUND + REQUIRED_VARS PCRE2_INCLUDE_DIR + HANDLE_COMPONENTS + VERSION_VAR PCRE2_VERSION + CONFIG_MODE +) + +set(PCRE2_LIBRARIES) +if (PCRE2_FOUND) + foreach(component ${PCRE2_FIND_COMPONENTS}) + if (PCRE2_USE_STATIC_LIBS) + add_library(PCRE2::${component} STATIC IMPORTED) + target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC) + else () + add_library(PCRE2::${component} SHARED IMPORTED) + endif () + set_target_properties(PCRE2::${component} PROPERTIES + IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" + IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" + ) + if (component STREQUAL "POSIX") + set_target_properties(PCRE2::${component} PROPERTIES + INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" + LINK_LIBRARIES "PCRE2::8BIT" + ) + endif () + + set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) + mark_as_advanced(PCRE2_${component}_LIBRARY) + endforeach() +endif () + +mark_as_advanced( + PCRE2_INCLUDE_DIR +) diff --git a/compile b/compile new file mode 100755 index 0000000..df363c8 --- /dev/null +++ b/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/* | msys/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/config-cmake.h.in b/config-cmake.h.in new file mode 100644 index 0000000..6539d77 --- /dev/null +++ b/config-cmake.h.in @@ -0,0 +1,56 @@ +/* config.h for CMake builds */ + +#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1 +#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1 +#cmakedefine HAVE_DIRENT_H 1 +#cmakedefine HAVE_SYS_STAT_H 1 +#cmakedefine HAVE_SYS_TYPES_H 1 +#cmakedefine HAVE_UNISTD_H 1 +#cmakedefine HAVE_WINDOWS_H 1 + +#cmakedefine HAVE_BCOPY 1 +#cmakedefine HAVE_MEMFD_CREATE 1 +#cmakedefine HAVE_MEMMOVE 1 +#cmakedefine HAVE_SECURE_GETENV 1 +#cmakedefine HAVE_STRERROR 1 + +#cmakedefine SUPPORT_PCRE2_8 1 +#cmakedefine SUPPORT_PCRE2_16 1 +#cmakedefine SUPPORT_PCRE2_32 1 +#cmakedefine PCRE2_DEBUG 1 +#cmakedefine DISABLE_PERCENT_ZT 1 + +#cmakedefine SUPPORT_LIBBZ2 1 +#cmakedefine SUPPORT_LIBEDIT 1 +#cmakedefine SUPPORT_LIBREADLINE 1 +#cmakedefine SUPPORT_LIBZ 1 + +#cmakedefine SUPPORT_JIT 1 +#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1 +#cmakedefine SUPPORT_PCRE2GREP_JIT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1 +#cmakedefine SUPPORT_UNICODE 1 +#cmakedefine SUPPORT_VALGRIND 1 + +#cmakedefine BSR_ANYCRLF 1 +#cmakedefine EBCDIC 1 +#cmakedefine EBCDIC_NL25 1 +#cmakedefine HEAP_MATCH_RECURSE 1 +#cmakedefine NEVER_BACKSLASH_C 1 + +#define PCRE2_EXPORT @PCRE2_EXPORT@ +#define LINK_SIZE @PCRE2_LINK_SIZE@ +#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@ +#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ +#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ +#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@ +#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ +#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@ +#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@ +#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@ + +#define MAX_NAME_SIZE 128 +#define MAX_NAME_COUNT 10000 + +/* end config.h for CMake builds */ diff --git a/config.guess b/config.guess new file mode 100755 index 0000000..e81d3ae --- /dev/null +++ b/config.guess @@ -0,0 +1,1748 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2021 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2021-06-03' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess +# +# Please send patches to . + + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2021 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +# Just in case it came from the environment. +GUESS= + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 + +set_cc_for_build() { + # prevent multiple calls if $tmp is already set + test "$tmp" && return 0 + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039,SC3028 + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD=$driver + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if test -f /.attbin/uname ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case $UNAME_SYSTEM in +Linux|GNU|GNU/*) + LIBC=unknown + + set_cc_for_build + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #elif defined(__GLIBC__) + LIBC=gnu + #else + #include + /* First heuristic to detect musl libc. */ + #ifdef __DEFINED_va_list + LIBC=musl + #endif + #endif + EOF + cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` + eval "$cc_set_libc" + + # Second heuristic to detect musl libc. + if [ "$LIBC" = unknown ] && + command -v ldd >/dev/null && + ldd --version 2>&1 | grep -q ^musl; then + LIBC=musl + fi + + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + if [ "$LIBC" = unknown ]; then + LIBC=gnu + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + echo unknown)` + case $UNAME_MACHINE_ARCH in + aarch64eb) machine=aarch64_be-unknown ;; + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; + *) machine=$UNAME_MACHINE_ARCH-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case $UNAME_MACHINE_ARCH in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case $UNAME_MACHINE_ARCH in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case $UNAME_VERSION in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + GUESS=$machine-${os}${release}${abi-} + ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE + ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE + ;; + *:SecBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE + ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE + ;; + *:MidnightBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE + ;; + *:ekkoBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE + ;; + *:SolidBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE + ;; + *:OS108:*:*) + GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE + ;; + macppc:MirBSD:*:*) + GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE + ;; + *:MirBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE + ;; + *:Sortix:*:*) + GUESS=$UNAME_MACHINE-unknown-sortix + ;; + *:Twizzler:*:*) + GUESS=$UNAME_MACHINE-unknown-twizzler + ;; + *:Redox:*:*) + GUESS=$UNAME_MACHINE-unknown-redox + ;; + mips:OSF1:*.*) + GUESS=mips-dec-osf1 + ;; + alpha:OSF1:*:*) + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + trap '' 0 + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case $ALPHA_CPU_TYPE in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + GUESS=$UNAME_MACHINE-dec-osf$OSF_REL + ;; + Amiga*:UNIX_System_V:4.0:*) + GUESS=m68k-unknown-sysv4 + ;; + *:[Aa]miga[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-amigaos + ;; + *:[Mm]orph[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-morphos + ;; + *:OS/390:*:*) + GUESS=i370-ibm-openedition + ;; + *:z/VM:*:*) + GUESS=s390-ibm-zvmoe + ;; + *:OS400:*:*) + GUESS=powerpc-ibm-os400 + ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + GUESS=arm-acorn-riscix$UNAME_RELEASE + ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + GUESS=arm-unknown-riscos + ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + GUESS=hppa1.1-hitachi-hiuxmpp + ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + case `(/bin/universe) 2>/dev/null` in + att) GUESS=pyramid-pyramid-sysv3 ;; + *) GUESS=pyramid-pyramid-bsd ;; + esac + ;; + NILE*:*:*:dcosx) + GUESS=pyramid-pyramid-svr4 + ;; + DRS?6000:unix:4.0:6*) + GUESS=sparc-icl-nx6 + ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) GUESS=sparc-icl-nx7 ;; + esac + ;; + s390x:SunOS:*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL + ;; + sun4H:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-hal-solaris2$SUN_REL + ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris2$SUN_REL + ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + GUESS=i386-pc-auroraux$UNAME_RELEASE + ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + set_cc_for_build + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$SUN_ARCH-pc-solaris2$SUN_REL + ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris3$SUN_REL + ;; + sun4*:SunOS:*:*) + case `/usr/bin/arch -k` in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` + GUESS=sparc-sun-sunos$SUN_REL + ;; + sun3*:SunOS:*:*) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case `/bin/arch` in + sun3) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun4) + GUESS=sparc-sun-sunos$UNAME_RELEASE + ;; + esac + ;; + aushp:SunOS:*:*) + GUESS=sparc-auspex-sunos$UNAME_RELEASE + ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + GUESS=m68k-milan-mint$UNAME_RELEASE + ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + GUESS=m68k-hades-mint$UNAME_RELEASE + ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + GUESS=m68k-unknown-mint$UNAME_RELEASE + ;; + m68k:machten:*:*) + GUESS=m68k-apple-machten$UNAME_RELEASE + ;; + powerpc:machten:*:*) + GUESS=powerpc-apple-machten$UNAME_RELEASE + ;; + RISC*:Mach:*:*) + GUESS=mips-dec-mach_bsd4.3 + ;; + RISC*:ULTRIX:*:*) + GUESS=mips-dec-ultrix$UNAME_RELEASE + ;; + VAX*:ULTRIX*:*:*) + GUESS=vax-dec-ultrix$UNAME_RELEASE + ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + GUESS=clipper-intergraph-clix$UNAME_RELEASE + ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=mips-mips-riscos$UNAME_RELEASE + ;; + Motorola:PowerMAX_OS:*:*) + GUESS=powerpc-motorola-powermax + ;; + Motorola:*:4.3:PL8-*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:Power_UNIX:*:*) + GUESS=powerpc-harris-powerunix + ;; + m88k:CX/UX:7*:*) + GUESS=m88k-harris-cxux7 + ;; + m88k:*:4*:R4*) + GUESS=m88k-motorola-sysv4 + ;; + m88k:*:3*:R3*) + GUESS=m88k-motorola-sysv3 + ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 + then + if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ + test "$TARGET_BINARY_INTERFACE"x = x + then + GUESS=m88k-dg-dgux$UNAME_RELEASE + else + GUESS=m88k-dg-dguxbcs$UNAME_RELEASE + fi + else + GUESS=i586-dg-dgux$UNAME_RELEASE + fi + ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + GUESS=m88k-dolphin-sysv3 + ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + GUESS=m88k-motorola-sysv3 + ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + GUESS=m88k-tektronix-sysv3 + ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + GUESS=m68k-tektronix-bsd + ;; + *:IRIX*:*:*) + IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'` + GUESS=mips-sgi-irix$IRIX_REL + ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + GUESS=romp-ibm-aix # uname -m gives an 8 hex-code CPU id + ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + GUESS=i386-ibm-aix + ;; + ia64:AIX:*:*) + if test -x /usr/bin/oslevel ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV + ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + GUESS=$SYSTEM_NAME + else + GUESS=rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + GUESS=rs6000-ibm-aix3.2.4 + else + GUESS=rs6000-ibm-aix3.2 + fi + ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if test -x /usr/bin/lslpp ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \ + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$IBM_ARCH-ibm-aix$IBM_REV + ;; + *:AIX:*:*) + GUESS=rs6000-ibm-aix + ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + GUESS=romp-ibm-bsd4.4 + ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + GUESS=romp-ibm-bsd$UNAME_RELEASE # 4.3 with uname added to + ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + GUESS=rs6000-bull-bosx + ;; + DPX/2?00:B.O.S.:*:*) + GUESS=m68k-bull-sysv3 + ;; + 9000/[34]??:4.3bsd:1.*:*) + GUESS=m68k-hp-bsd + ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + GUESS=m68k-hp-bsd4.4 + ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + case $UNAME_MACHINE in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if test -x /usr/bin/getconf; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case $sc_cpu_version in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case $sc_kernel_bits in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if test "$HP_ARCH" = ""; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if test "$HP_ARCH" = hppa2.0w + then + set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + GUESS=$HP_ARCH-hp-hpux$HPUX_REV + ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + GUESS=ia64-hp-hpux$HPUX_REV + ;; + 3050*:HI-UX:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=unknown-hitachi-hiuxwe2 + ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + GUESS=hppa1.1-hp-bsd + ;; + 9000/8??:4.3bsd:*:*) + GUESS=hppa1.0-hp-bsd + ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + GUESS=hppa1.0-hp-mpeix + ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + GUESS=hppa1.1-hp-osf + ;; + hp8??:OSF1:*:*) + GUESS=hppa1.0-hp-osf + ;; + i*86:OSF1:*:*) + if test -x /usr/sbin/sysversion ; then + GUESS=$UNAME_MACHINE-unknown-osf1mk + else + GUESS=$UNAME_MACHINE-unknown-osf1 + fi + ;; + parisc*:Lites*:*:*) + GUESS=hppa1.1-hp-lites + ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + GUESS=c1-convex-bsd + ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + GUESS=c34-convex-bsd + ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + GUESS=c38-convex-bsd + ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + GUESS=c4-convex-bsd + ;; + CRAY*Y-MP:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=ymp-cray-unicos$CRAY_REL + ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=t90-cray-unicos$CRAY_REL + ;; + CRAY*T3E:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=alphaev5-cray-unicosmk$CRAY_REL + ;; + CRAY*SV1:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=sv1-cray-unicos$CRAY_REL + ;; + *:UNICOS/mp:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=craynv-cray-unicosmp$CRAY_REL + ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE + ;; + sparc*:BSD/OS:*:*) + GUESS=sparc-unknown-bsdi$UNAME_RELEASE + ;; + *:BSD/OS:*:*) + GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE + ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=`uname -p` + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi + else + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf + fi + ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case $UNAME_PROCESSOR in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL + ;; + i*:CYGWIN*:*) + GUESS=$UNAME_MACHINE-pc-cygwin + ;; + *:MINGW64*:*) + GUESS=$UNAME_MACHINE-pc-mingw64 + ;; + *:MINGW*:*) + GUESS=$UNAME_MACHINE-pc-mingw32 + ;; + *:MSYS*:*) + GUESS=$UNAME_MACHINE-pc-msys + ;; + i*:PW*:*) + GUESS=$UNAME_MACHINE-pc-pw32 + ;; + *:Interix*:*) + case $UNAME_MACHINE in + x86) + GUESS=i586-pc-interix$UNAME_RELEASE + ;; + authenticamd | genuineintel | EM64T) + GUESS=x86_64-unknown-interix$UNAME_RELEASE + ;; + IA64) + GUESS=ia64-unknown-interix$UNAME_RELEASE + ;; + esac ;; + i*:UWIN*:*) + GUESS=$UNAME_MACHINE-pc-uwin + ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + GUESS=x86_64-pc-cygwin + ;; + prep*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=powerpcle-unknown-solaris2$SUN_REL + ;; + *:GNU:*:*) + # the GNU system + GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'` + GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL + ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC + ;; + *:Minix:*:*) + GUESS=$UNAME_MACHINE-unknown-minix + ;; + aarch64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arm*:Linux:*:*) + set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi + else + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf + fi + fi + ;; + avr32*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + cris:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + crisv32:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + e2k:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + frv:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + hexagon:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:Linux:*:*) + GUESS=$UNAME_MACHINE-pc-linux-$LIBC + ;; + ia64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + k1om:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m32r*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m68*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + mips:Linux:*:* | mips64:Linux:*:*) + set_cc_for_build + IS_GLIBC=0 + test x"${LIBC}" = xgnu && IS_GLIBC=1 + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef mips + #undef mipsel + #undef mips64 + #undef mips64el + #if ${IS_GLIBC} && defined(_ABI64) + LIBCABI=gnuabi64 + #else + #if ${IS_GLIBC} && defined(_ABIN32) + LIBCABI=gnuabin32 + #else + LIBCABI=${LIBC} + #endif + #endif + + #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa64r6 + #else + #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa32r6 + #else + #if defined(__mips64) + CPU=mips64 + #else + CPU=mips + #endif + #endif + #endif + + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + MIPS_ENDIAN=el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + MIPS_ENDIAN= + #else + MIPS_ENDIAN= + #endif + #endif +EOF + cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'` + eval "$cc_set_vars" + test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } + ;; + mips64el:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + openrisc*:Linux:*:*) + GUESS=or1k-unknown-linux-$LIBC + ;; + or32:Linux:*:* | or1k*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + padre:Linux:*:*) + GUESS=sparc-unknown-linux-$LIBC + ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + GUESS=hppa64-unknown-linux-$LIBC + ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;; + PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;; + *) GUESS=hppa-unknown-linux-$LIBC ;; + esac + ;; + ppc64:Linux:*:*) + GUESS=powerpc64-unknown-linux-$LIBC + ;; + ppc:Linux:*:*) + GUESS=powerpc-unknown-linux-$LIBC + ;; + ppc64le:Linux:*:*) + GUESS=powerpc64le-unknown-linux-$LIBC + ;; + ppcle:Linux:*:*) + GUESS=powerpcle-unknown-linux-$LIBC + ;; + riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + s390:Linux:*:* | s390x:Linux:*:*) + GUESS=$UNAME_MACHINE-ibm-linux-$LIBC + ;; + sh64*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sh*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + tile*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + vax:Linux:*:*) + GUESS=$UNAME_MACHINE-dec-linux-$LIBC + ;; + x86_64:Linux:*:*) + set_cc_for_build + LIBCABI=$LIBC + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_X32 >/dev/null + then + LIBCABI=${LIBC}x32 + fi + fi + GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI + ;; + xtensa*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + GUESS=i386-sequent-sysv4 + ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION + ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + GUESS=$UNAME_MACHINE-pc-os2-emx + ;; + i*86:XTS-300:*:STOP) + GUESS=$UNAME_MACHINE-unknown-stop + ;; + i*86:atheos:*:*) + GUESS=$UNAME_MACHINE-unknown-atheos + ;; + i*86:syllable:*:*) + GUESS=$UNAME_MACHINE-pc-syllable + ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + GUESS=i386-unknown-lynxos$UNAME_RELEASE + ;; + i*86:*DOS:*:*) + GUESS=$UNAME_MACHINE-pc-msdosdjgpp + ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL + fi + ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv32 + fi + ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + GUESS=i586-pc-msdosdjgpp + ;; + Intel:Mach:3*:*) + GUESS=i386-pc-mach3 + ;; + paragon:*:*:*) + GUESS=i860-intel-osf1 + ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + GUESS=i860-stardent-sysv$UNAME_RELEASE # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + GUESS=i860-unknown-sysv$UNAME_RELEASE # Unknown i860-SVR4 + fi + ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + GUESS=m68010-convergent-sysv + ;; + mc68k:UNIX:SYSTEM5:3.51m) + GUESS=m68k-convergent-sysv + ;; + M680?0:D-NIX:5.3:*) + GUESS=m68k-diab-dnix + ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + GUESS=m68k-unknown-lynxos$UNAME_RELEASE + ;; + mc68030:UNIX_System_V:4.*:*) + GUESS=m68k-atari-sysv4 + ;; + TSUNAMI:LynxOS:2.*:*) + GUESS=sparc-unknown-lynxos$UNAME_RELEASE + ;; + rs6000:LynxOS:2.*:*) + GUESS=rs6000-unknown-lynxos$UNAME_RELEASE + ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + GUESS=powerpc-unknown-lynxos$UNAME_RELEASE + ;; + SM[BE]S:UNIX_SV:*:*) + GUESS=mips-dde-sysv$UNAME_RELEASE + ;; + RM*:ReliantUNIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + RM*:SINIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + GUESS=$UNAME_MACHINE-sni-sysv4 + else + GUESS=ns32k-sni-sysv + fi + ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + GUESS=i586-unisys-sysv4 + ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + GUESS=hppa1.1-stratus-sysv4 + ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + GUESS=i860-stratus-sysv4 + ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=$UNAME_MACHINE-stratus-vos + ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=hppa1.1-stratus-vos + ;; + mc68*:A/UX:*:*) + GUESS=m68k-apple-aux$UNAME_RELEASE + ;; + news*:NEWS-OS:6*:*) + GUESS=mips-sony-newsos6 + ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if test -d /usr/nec; then + GUESS=mips-nec-sysv$UNAME_RELEASE + else + GUESS=mips-unknown-sysv$UNAME_RELEASE + fi + ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + GUESS=powerpc-be-beos + ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + GUESS=powerpc-apple-beos + ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + GUESS=i586-pc-beos + ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + GUESS=i586-pc-haiku + ;; + x86_64:Haiku:*:*) + GUESS=x86_64-unknown-haiku + ;; + SX-4:SUPER-UX:*:*) + GUESS=sx4-nec-superux$UNAME_RELEASE + ;; + SX-5:SUPER-UX:*:*) + GUESS=sx5-nec-superux$UNAME_RELEASE + ;; + SX-6:SUPER-UX:*:*) + GUESS=sx6-nec-superux$UNAME_RELEASE + ;; + SX-7:SUPER-UX:*:*) + GUESS=sx7-nec-superux$UNAME_RELEASE + ;; + SX-8:SUPER-UX:*:*) + GUESS=sx8-nec-superux$UNAME_RELEASE + ;; + SX-8R:SUPER-UX:*:*) + GUESS=sx8r-nec-superux$UNAME_RELEASE + ;; + SX-ACE:SUPER-UX:*:*) + GUESS=sxace-nec-superux$UNAME_RELEASE + ;; + Power*:Rhapsody:*:*) + GUESS=powerpc-apple-rhapsody$UNAME_RELEASE + ;; + *:Rhapsody:*:*) + GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE + ;; + arm64:Darwin:*:*) + GUESS=aarch64-apple-darwin$UNAME_RELEASE + ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + if command -v xcode-select > /dev/null 2> /dev/null && \ + ! xcode-select --print-path > /dev/null 2> /dev/null ; then + # Avoid executing cc if there is no toolchain installed as + # cc will be a stub that puts up a graphical alert + # prompting the user to install developer tools. + CC_FOR_BUILD=no_compiler_found + else + set_cc_for_build + fi + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # uname -m returns i386 or x86_64 + UNAME_PROCESSOR=$UNAME_MACHINE + fi + GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE + ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE + ;; + *:QNX:*:4*) + GUESS=i386-pc-qnx + ;; + NEO-*:NONSTOP_KERNEL:*:*) + GUESS=neo-tandem-nsk$UNAME_RELEASE + ;; + NSE-*:NONSTOP_KERNEL:*:*) + GUESS=nse-tandem-nsk$UNAME_RELEASE + ;; + NSR-*:NONSTOP_KERNEL:*:*) + GUESS=nsr-tandem-nsk$UNAME_RELEASE + ;; + NSV-*:NONSTOP_KERNEL:*:*) + GUESS=nsv-tandem-nsk$UNAME_RELEASE + ;; + NSX-*:NONSTOP_KERNEL:*:*) + GUESS=nsx-tandem-nsk$UNAME_RELEASE + ;; + *:NonStop-UX:*:*) + GUESS=mips-compaq-nonstopux + ;; + BS2000:POSIX*:*:*) + GUESS=bs2000-siemens-sysv + ;; + DS/*:UNIX_System_V:*:*) + GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE + ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "${cputype-}" = 386; then + UNAME_MACHINE=i386 + elif test "x${cputype-}" != x; then + UNAME_MACHINE=$cputype + fi + GUESS=$UNAME_MACHINE-unknown-plan9 + ;; + *:TOPS-10:*:*) + GUESS=pdp10-unknown-tops10 + ;; + *:TENEX:*:*) + GUESS=pdp10-unknown-tenex + ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + GUESS=pdp10-dec-tops20 + ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + GUESS=pdp10-xkl-tops20 + ;; + *:TOPS-20:*:*) + GUESS=pdp10-unknown-tops20 + ;; + *:ITS:*:*) + GUESS=pdp10-unknown-its + ;; + SEI:*:*:SEIUX) + GUESS=mips-sei-seiux$UNAME_RELEASE + ;; + *:DragonFly:*:*) + DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL + ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case $UNAME_MACHINE in + A*) GUESS=alpha-dec-vms ;; + I*) GUESS=ia64-dec-vms ;; + V*) GUESS=vax-dec-vms ;; + esac ;; + *:XENIX:*:SysV) + GUESS=i386-pc-xenix + ;; + i*86:skyos:*:*) + SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'` + GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL + ;; + i*86:rdos:*:*) + GUESS=$UNAME_MACHINE-pc-rdos + ;; + *:AROS:*:*) + GUESS=$UNAME_MACHINE-unknown-aros + ;; + x86_64:VMkernel:*:*) + GUESS=$UNAME_MACHINE-unknown-esx + ;; + amd64:Isilon\ OneFS:*:*) + GUESS=x86_64-unknown-onefs + ;; + *:Unleashed:*:*) + GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE + ;; +esac + +# Do we have a guess based on uname results? +if test "x$GUESS" != x; then + echo "$GUESS" + exit +fi + +# No uname command or uname output not recognized. +set_cc_for_build +cat > "$dummy.c" < +#include +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#include +#if defined(_SIZE_T_) || defined(SIGLOST) +#include +#endif +#endif +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); +#endif + +#if defined (vax) +#if !defined (ultrix) +#include +#if defined (BSD) +#if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +#else +#if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#endif +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#else +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname un; + uname (&un); + printf ("vax-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname *un; + uname (&un); + printf ("mips-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("mips-dec-ultrix\n"); exit (0); +#endif +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. +test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } + +echo "$0: unable to guess system type" >&2 + +case $UNAME_MACHINE:$UNAME_SYSTEM in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF +fi + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.sub b/config.sub new file mode 100755 index 0000000..d74fb6d --- /dev/null +++ b/config.sub @@ -0,0 +1,1884 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2021 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2021-08-14' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2021 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Split fields of configuration type +# shellcheck disable=SC2162 +saved_IFS=$IFS +IFS="-" read field1 field2 field3 field4 <&2 + exit 1 + ;; + *-*-*-*) + basic_machine=$field1-$field2 + basic_os=$field3-$field4 + ;; + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + basic_os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + basic_os=linux-android + ;; + *) + basic_machine=$field1-$field2 + basic_os=$field3 + ;; + esac + ;; + *-*) + # A lone config we happen to match not fitting any pattern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + basic_os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + basic_os=$field2 + ;; + zephyr*) + basic_machine=$field1-unknown + basic_os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + basic_os= + ;; + *) + basic_machine=$field1 + basic_os=$field2 + ;; + esac + ;; + esac + ;; + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + basic_os=bsd + ;; + a29khif) + basic_machine=a29k-amd + basic_os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + basic_os=scout + ;; + alliant) + basic_machine=fx80-alliant + basic_os= + ;; + altos | altos3068) + basic_machine=m68k-altos + basic_os= + ;; + am29k) + basic_machine=a29k-none + basic_os=bsd + ;; + amdahl) + basic_machine=580-amdahl + basic_os=sysv + ;; + amiga) + basic_machine=m68k-unknown + basic_os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + basic_os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + basic_os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + basic_os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + basic_os=bsd + ;; + aros) + basic_machine=i386-pc + basic_os=aros + ;; + aux) + basic_machine=m68k-apple + basic_os=aux + ;; + balance) + basic_machine=ns32k-sequent + basic_os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + basic_os=linux + ;; + cegcc) + basic_machine=arm-unknown + basic_os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + basic_os=bsd + ;; + convex-c2) + basic_machine=c2-convex + basic_os=bsd + ;; + convex-c32) + basic_machine=c32-convex + basic_os=bsd + ;; + convex-c34) + basic_machine=c34-convex + basic_os=bsd + ;; + convex-c38) + basic_machine=c38-convex + basic_os=bsd + ;; + cray) + basic_machine=j90-cray + basic_os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + basic_os= + ;; + da30) + basic_machine=m68k-da30 + basic_os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + basic_os= + ;; + delta88) + basic_machine=m88k-motorola + basic_os=sysv3 + ;; + dicos) + basic_machine=i686-pc + basic_os=dicos + ;; + djgpp) + basic_machine=i586-pc + basic_os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + basic_os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + basic_os=ose + ;; + gmicro) + basic_machine=tron-gmicro + basic_os=sysv + ;; + go32) + basic_machine=i386-pc + basic_os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + basic_os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + basic_os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + basic_os=hms + ;; + harris) + basic_machine=m88k-harris + basic_os=sysv3 + ;; + hp300 | hp300hpux) + basic_machine=m68k-hp + basic_os=hpux + ;; + hp300bsd) + basic_machine=m68k-hp + basic_os=bsd + ;; + hppaosf) + basic_machine=hppa1.1-hp + basic_os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + basic_os=proelf + ;; + i386mach) + basic_machine=i386-mach + basic_os=mach + ;; + isi68 | isi) + basic_machine=m68k-isi + basic_os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + basic_os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + basic_os=sysv + ;; + merlin) + basic_machine=ns32k-utek + basic_os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + basic_os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + basic_os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + basic_os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + basic_os=coff + ;; + morphos) + basic_machine=powerpc-unknown + basic_os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + basic_os=moxiebox + ;; + msdos) + basic_machine=i386-pc + basic_os=msdos + ;; + msys) + basic_machine=i686-pc + basic_os=msys + ;; + mvs) + basic_machine=i370-ibm + basic_os=mvs + ;; + nacl) + basic_machine=le32-unknown + basic_os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + basic_os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + basic_os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + basic_os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + basic_os=newsos + ;; + news1000) + basic_machine=m68030-sony + basic_os=newsos + ;; + necv70) + basic_machine=v70-nec + basic_os=sysv + ;; + nh3000) + basic_machine=m68k-harris + basic_os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + basic_os=cxux + ;; + nindy960) + basic_machine=i960-intel + basic_os=nindy + ;; + mon960) + basic_machine=i960-intel + basic_os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + basic_os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + basic_os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + basic_os=ose + ;; + os68k) + basic_machine=m68k-none + basic_os=os68k + ;; + paragon) + basic_machine=i860-intel + basic_os=osf + ;; + parisc) + basic_machine=hppa-unknown + basic_os=linux + ;; + psp) + basic_machine=mipsallegrexel-sony + basic_os=psp + ;; + pw32) + basic_machine=i586-unknown + basic_os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + basic_os=rdos + ;; + rdos32) + basic_machine=i386-pc + basic_os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + basic_os=coff + ;; + sa29200) + basic_machine=a29k-amd + basic_os=udi + ;; + sei) + basic_machine=mips-sei + basic_os=seiux + ;; + sequent) + basic_machine=i386-sequent + basic_os= + ;; + sps7) + basic_machine=m68k-bull + basic_os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + basic_os= + ;; + stratus) + basic_machine=i860-stratus + basic_os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + basic_os= + ;; + sun2os3) + basic_machine=m68000-sun + basic_os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + basic_os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + basic_os= + ;; + sun3os3) + basic_machine=m68k-sun + basic_os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + basic_os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + basic_os= + ;; + sun4os3) + basic_machine=sparc-sun + basic_os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + basic_os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + basic_os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + basic_os= + ;; + sv1) + basic_machine=sv1-cray + basic_os=unicos + ;; + symmetry) + basic_machine=i386-sequent + basic_os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + basic_os=unicos + ;; + t90) + basic_machine=t90-cray + basic_os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + basic_os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + basic_os=tpf + ;; + udi29k) + basic_machine=a29k-amd + basic_os=udi + ;; + ultra3) + basic_machine=a29k-nyu + basic_os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + basic_os=none + ;; + vaxv) + basic_machine=vax-dec + basic_os=sysv + ;; + vms) + basic_machine=vax-dec + basic_os=vms + ;; + vsta) + basic_machine=i386-pc + basic_os=vsta + ;; + vxworks960) + basic_machine=i960-wrs + basic_os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + basic_os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + basic_os=vxworks + ;; + xbox) + basic_machine=i686-pc + basic_os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + basic_os=unicos + ;; + *) + basic_machine=$1 + basic_os= + ;; + esac + ;; +esac + +# Decode 1-component or ad-hoc basic machines +case $basic_machine in + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond + ;; + op50n) + cpu=hppa1.1 + vendor=oki + ;; + op60c) + cpu=hppa1.1 + vendor=oki + ;; + ibm*) + cpu=i370 + vendor=ibm + ;; + orion105) + cpu=clipper + vendor=highlevel + ;; + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple + ;; + pmac | pmac-mpw) + cpu=powerpc + vendor=apple + ;; + + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + cpu=m68000 + vendor=att + ;; + 3b*) + cpu=we32k + vendor=att + ;; + bluegene*) + cpu=powerpc + vendor=ibm + basic_os=cnk + ;; + decsystem10* | dec10*) + cpu=pdp10 + vendor=dec + basic_os=tops10 + ;; + decsystem20* | dec20*) + cpu=pdp10 + vendor=dec + basic_os=tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + cpu=m68k + vendor=motorola + ;; + dpx2*) + cpu=m68k + vendor=bull + basic_os=sysv3 + ;; + encore | umax | mmax) + cpu=ns32k + vendor=encore + ;; + elxsi) + cpu=elxsi + vendor=elxsi + basic_os=${basic_os:-bsd} + ;; + fx2800) + cpu=i860 + vendor=alliant + ;; + genix) + cpu=ns32k + vendor=ns + ;; + h3050r* | hiux*) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + cpu=m68000 + vendor=hp + ;; + hp9k3[2-9][0-9]) + cpu=m68k + vendor=hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + i*86v32) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv32 + ;; + i*86v4*) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv4 + ;; + i*86v) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv + ;; + i*86sol2) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=solaris2 + ;; + j90 | j90-cray) + cpu=j90 + vendor=cray + basic_os=${basic_os:-unicos} + ;; + iris | iris4d) + cpu=mips + vendor=sgi + case $basic_os in + irix*) + ;; + *) + basic_os=irix4 + ;; + esac + ;; + miniframe) + cpu=m68000 + vendor=convergent + ;; + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + basic_os=mint + ;; + news-3600 | risc-news) + cpu=mips + vendor=sony + basic_os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $basic_os in + openstep*) + ;; + nextstep*) + ;; + ns2*) + basic_os=nextstep2 + ;; + *) + basic_os=nextstep3 + ;; + esac + ;; + np1) + cpu=np1 + vendor=gould + ;; + op50n-* | op60c-*) + cpu=hppa1.1 + vendor=oki + basic_os=proelf + ;; + pa-hitachi) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + pbd) + cpu=sparc + vendor=tti + ;; + pbb) + cpu=m68k + vendor=tti + ;; + pc532) + cpu=ns32k + vendor=pc532 + ;; + pn) + cpu=pn + vendor=gould + ;; + power) + cpu=power + vendor=ibm + ;; + ps2) + cpu=i386 + vendor=ibm + ;; + rm[46]00) + cpu=mips + vendor=siemens + ;; + rtpc | rtpc-*) + cpu=romp + vendor=ibm + ;; + sde) + cpu=mipsisa32 + vendor=sde + basic_os=${basic_os:-elf} + ;; + simso-wrs) + cpu=sparclite + vendor=wrs + basic_os=vxworks + ;; + tower | tower-32) + cpu=m68k + vendor=ncr + ;; + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu + ;; + w65) + cpu=w65 + vendor=wdc + ;; + w89k-*) + cpu=hppa1.1 + vendor=winbond + basic_os=proelf + ;; + none) + cpu=none + vendor=none + ;; + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine + ;; + leon-*|leon[3-9]-*) + cpu=sparc + vendor=`echo "$basic_machine" | sed 's/-.*//'` + ;; + + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read cpu vendor <&2 + exit 1 + ;; + esac + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $vendor in + digital*) + vendor=dec + ;; + commodore*) + vendor=cbm + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if test x$basic_os != x +then + +# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# set os. +case $basic_os in + gnu/linux*) + kernel=linux + os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'` + ;; + os2-emx) + kernel=os2 + os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'` + ;; + nto-qnx*) + kernel=nto + os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'` + ;; + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read kernel os <&2 + exit 1 + ;; +esac + +# As a final step for OS-related things, validate the OS-kernel combination +# (given a valid OS), if there is a kernel. +case $kernel-$os in + linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ + | linux-musl* | linux-relibc* | linux-uclibc* ) + ;; + uclinux-uclibc* ) + ;; + -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) + # These are just libc implementations, not actual OSes, and thus + # require a kernel. + echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + exit 1 + ;; + kfreebsd*-gnu* | kopensolaris*-gnu*) + ;; + vxworks-simlinux | vxworks-simwindows | vxworks-spe) + ;; + nto-qnx*) + ;; + os2-emx) + ;; + *-eabi* | *-gnueabi*) + ;; + -*) + # Blank kernel with real OS is always fine. + ;; + *-*) + echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + exit 1 + ;; +esac + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +case $vendor in + unknown) + case $cpu-$os in + *-riscix*) + vendor=acorn + ;; + *-sunos*) + vendor=sun + ;; + *-cnk* | *-aix*) + vendor=ibm + ;; + *-beos*) + vendor=be + ;; + *-hpux*) + vendor=hp + ;; + *-mpeix*) + vendor=hp + ;; + *-hiux*) + vendor=hitachi + ;; + *-unos*) + vendor=crds + ;; + *-dgux*) + vendor=dg + ;; + *-luna*) + vendor=omron + ;; + *-genix*) + vendor=ns + ;; + *-clix*) + vendor=intergraph + ;; + *-mvs* | *-opened*) + vendor=ibm + ;; + *-os400*) + vendor=ibm + ;; + s390-* | s390x-*) + vendor=ibm + ;; + *-ptx*) + vendor=sequent + ;; + *-tpf*) + vendor=ibm + ;; + *-vxsim* | *-vxworks* | *-windiss*) + vendor=wrs + ;; + *-aux*) + vendor=apple + ;; + *-hms*) + vendor=hitachi + ;; + *-mpw* | *-macos*) + vendor=apple + ;; + *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) + vendor=atari + ;; + *-vos*) + vendor=stratus + ;; + esac + ;; +esac + +echo "$cpu-$vendor-${kernel:+$kernel-}$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/configure b/configure new file mode 100755 index 0000000..fa7e664 --- /dev/null +++ b/configure @@ -0,0 +1,19255 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.72 for PCRE2 10.44. +# +# +# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation, +# Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else case e in #( + e) case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as 'sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed 'exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else case e in #( + e) case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : + +else case e in #( + e) exitcode=1; echo positional parameters were not saved. ;; +esac +fi +test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null +then : + as_have_required=yes +else case e in #( + e) as_have_required=no ;; +esac +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : + +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$as_shell as_have_required=yes + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : + break 2 +fi +fi + done;; + esac + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else case e in #( + e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi ;; +esac +fi + + + if test "x$CONFIG_SHELL" != x +then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed 'exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." + else + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi ;; +esac +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else case e in #( + e) as_fn_append () + { + eval $1=\$$1\$2 + } ;; +esac +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else case e in #( + e) as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } ;; +esac +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + t clear + :clear + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated + +# Sed expression to map a string onto a valid variable name. +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='PCRE2' +PACKAGE_TARNAME='pcre2' +PACKAGE_VERSION='10.44' +PACKAGE_STRING='PCRE2 10.44' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +ac_unique_file="src/pcre2.h.in" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_STDIO_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_header_c_list= +enable_year2038=no +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +LIB_POSTFIX +CET_CFLAGS +WITH_GCOV_FALSE +WITH_GCOV_TRUE +GCOV_LIBS +GCOV_CXXFLAGS +GCOV_CFLAGS +GENHTML +LCOV +SHTOOL +VALGRIND_LIBS +VALGRIND_CFLAGS +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH +PKG_CONFIG +LIBBZ2 +LIBZ +DISTCHECK_CONFIGURE_FLAGS +EXTRA_LIBPCRE2_POSIX_LDFLAGS +EXTRA_LIBPCRE2_32_LDFLAGS +EXTRA_LIBPCRE2_16_LDFLAGS +EXTRA_LIBPCRE2_8_LDFLAGS +PTHREAD_CFLAGS +PTHREAD_LIBS +PTHREAD_CC +ax_pthread_config +PCRE2POSIX_CFLAG +PCRE2_STATIC_CFLAG +LIBREADLINE +WITH_DIFF_FUZZ_SUPPORT_FALSE +WITH_DIFF_FUZZ_SUPPORT_TRUE +WITH_FUZZ_SUPPORT_FALSE +WITH_FUZZ_SUPPORT_TRUE +WITH_VALGRIND_FALSE +WITH_VALGRIND_TRUE +WITH_UNICODE_FALSE +WITH_UNICODE_TRUE +WITH_JIT_FALSE +WITH_JIT_TRUE +WITH_REBUILD_CHARTABLES_FALSE +WITH_REBUILD_CHARTABLES_TRUE +WITH_DEBUG_FALSE +WITH_DEBUG_TRUE +WITH_PCRE2_32_FALSE +WITH_PCRE2_32_TRUE +WITH_PCRE2_16_FALSE +WITH_PCRE2_16_TRUE +WITH_PCRE2_8_FALSE +WITH_PCRE2_8_TRUE +enable_pcre2_32 +enable_pcre2_16 +enable_pcre2_8 +PCRE2_DATE +PCRE2_PRERELEASE +PCRE2_MINOR +PCRE2_MAJOR +HAVE_VISIBILITY +VISIBILITY_CXXFLAGS +VISIBILITY_CFLAGS +LT_SYS_LIBRARY_PATH +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +FILECMD +LN_S +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +EGREP +GREP +SED +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +LIBTOOL +OBJDUMP +DLLTOOL +AS +ac_ct_AR +AR +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +CSCOPE +ETAGS +CTAGS +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_dependency_tracking +enable_shared +enable_static +with_pic +enable_fast_install +with_aix_soname +with_gnu_ld +with_sysroot +enable_libtool_lock +enable_largefile +enable_pcre8 +enable_pcre16 +enable_pcre32 +enable_pcre2_8 +enable_pcre2_16 +enable_pcre2_32 +enable_debug +enable_jit +enable_jit_sealloc +enable_pcre2grep_jit +enable_pcre2grep_callout +enable_pcre2grep_callout_fork +enable_rebuild_chartables +enable_unicode +enable_newline_is_cr +enable_newline_is_lf +enable_newline_is_crlf +enable_newline_is_anycrlf +enable_newline_is_any +enable_newline_is_nul +enable_bsr_anycrlf +enable_never_backslash_C +enable_ebcdic +enable_ebcdic_nl25 +enable_pcre2grep_libz +enable_pcre2grep_libbz2 +with_pcre2grep_bufsize +with_pcre2grep_max_bufsize +enable_pcre2test_libedit +enable_pcre2test_libreadline +with_link_size +with_max_varlookbehind +with_parens_nest_limit +with_heap_limit +with_match_limit +with_match_limit_depth +with_match_limit_recursion +enable_valgrind +enable_coverage +enable_fuzz_support +enable_diff_fuzz_support +enable_stack_for_recursion +enable_percent_zt +enable_year2038 +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +LT_SYS_LIBRARY_PATH +PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR +VALGRIND_CFLAGS +VALGRIND_LIBS +LCOV +GENHTML' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: '$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: '$ac_option' +Try '$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: '$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: '$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +'configure' configures PCRE2 10.44 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print 'checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for '--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or '..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, 'make install' will install all the files in +'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc. You can specify +an installation prefix other than '$ac_default_prefix' using '--prefix', +for instance '--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/pcre2] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of PCRE2 10.44:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --disable-largefile omit support for large files + + --disable-pcre2-8 disable 8 bit character support + --enable-pcre2-16 enable 16 bit character support + --enable-pcre2-32 enable 32 bit character support + --enable-debug enable debugging code + --enable-jit enable Just-In-Time compiling support + --enable-jit-sealloc enable SELinux compatible execmem allocator in JIT + (experimental) + --disable-pcre2grep-jit disable JIT support in pcre2grep + --disable-pcre2grep-callout + disable callout script support in pcre2grep + --disable-pcre2grep-callout-fork + disable callout script fork support in pcre2grep + --enable-rebuild-chartables + rebuild character tables in current locale + --disable-unicode disable Unicode support + --enable-newline-is-cr use CR as newline character + --enable-newline-is-lf use LF as newline character (default) + --enable-newline-is-crlf + use CRLF as newline sequence + --enable-newline-is-anycrlf + use CR, LF, or CRLF as newline sequence + --enable-newline-is-any use any valid Unicode newline sequence + --enable-newline-is-nul use NUL (binary zero) as newline character + --enable-bsr-anycrlf \R matches only CR, LF, CRLF by default + --enable-never-backslash-C + use of \C causes an error + --enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible + with --enable-utf; use only in (uncommon) EBCDIC + environments; it implies --enable-rebuild-chartables + --enable-ebcdic-nl25 set EBCDIC code for NL to 0x25 instead of 0x15; it + implies --enable-ebcdic + --enable-pcre2grep-libz link pcre2grep with libz to handle .gz files + --enable-pcre2grep-libbz2 + link pcre2grep with libbz2 to handle .bz2 files + --enable-pcre2test-libedit + link pcre2test with libedit + --enable-pcre2test-libreadline + link pcre2test with libreadline + --enable-valgrind enable valgrind support + --enable-coverage enable code coverage reports using gcov + --enable-fuzz-support enable fuzzer support + --enable-diff-fuzz-support + enable differential fuzzer support + --disable-percent-zt disable the use of z and t formatting modifiers + --enable-year2038 support timestamps after 2038 + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use + both] + --with-aix-soname=aix|svr4|both + shared library versioning (aka "SONAME") variant to + provide on AIX, [default=aix]. + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot[=DIR] Search for dependent libraries within DIR (or the + compiler's sysroot if not specified). + --with-pcre2grep-bufsize=N + pcre2grep initial buffer size (default=20480, + minimum=8192) + --with-pcre2grep-max-bufsize=N + pcre2grep maximum buffer size (default=1048576, + minimum=8192) + --with-link-size=N internal link size (2, 3, or 4 allowed; default=2) + --with-max-varlookbehind=N + maximum length of variable lookbehind (default=255) + --with-parens-nest-limit=N + nested parentheses limit (default=250) + --with-heap-limit=N default limit on heap memory (kibibytes, + default=20000000) + --with-match-limit=N default limit on internal looping (default=10000000) + --with-match-limit-depth=N + default limit on match tree depth + (default=MATCH_LIMIT) + + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + LT_SYS_LIBRARY_PATH + User-defined run-time library search path. + PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path + VALGRIND_CFLAGS + C compiler flags for VALGRIND, overriding pkg-config + VALGRIND_LIBS + linker flags for VALGRIND, overriding pkg-config + LCOV the ltp lcov program + GENHTML the ltp genhtml program + +Use these variables to override the choices made by 'configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +PCRE2 configure 10.44 +generated by GNU Autoconf 2.72 + +Copyright (C) 2023 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 ;; +esac +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_find_intX_t LINENO BITS VAR +# ----------------------------------- +# Finds a signed integer type with width BITS, setting cache variable VAR +# accordingly. +ac_fn_c_find_intX_t () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for int$2_t" >&5 +printf %s "checking for int$2_t... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) eval "$3=no" + # Order is important - never check a type that is potentially smaller + # than half of the expected target width. + for ac_type in int$2_t 'int' 'long int' \ + 'long long int' 'short int' 'signed char'; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main (void) +{ +static int test_array [1 - 2 * !(0 < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main (void) +{ +static int test_array [1 - 2 * !(($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1) + < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 2))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) case $ac_type in #( + int$2_t) : + eval "$3=yes" ;; #( + *) : + eval "$3=\$ac_type" ;; +esac ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + if eval test \"x\$"$3"\" = x"no" +then : + +else case e in #( + e) break ;; +esac +fi + done ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_find_intX_t + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 ;; +esac +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (void); below. */ + +#include +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (void); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main (void) +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) eval "$3=yes" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by PCRE2 $as_me 10.44, which was +generated by GNU Autoconf 2.72. Invocation command line was + + $ $0$ac_configure_args_raw + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" + # Save into config.log some information that might help in debugging. + { + echo + + printf "%s\n" "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + printf "%s\n" "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + printf "%s\n" "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + printf "%s\n" "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +printf "%s\n" "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + ac_site_files="$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" +else + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" +fi + +for ac_site_file in $ac_site_files +do + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See 'config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (char **p, int i) +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* C89 style stringification. */ +#define noexpand_stringify(a) #a +const char *stringified = noexpand_stringify(arbitrary+token=sequence); + +/* C89 style token pasting. Exercises some of the corner cases that + e.g. old MSVC gets wrong, but not very hard. */ +#define noexpand_concat(a,b) a##b +#define expand_concat(a,b) noexpand_concat(a,b) +extern int vA; +extern int vbee; +#define aye A +#define bee B +int *pvA = &expand_concat(v,aye); +int *pvbee = &noexpand_concat(v,bee); + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +/* Does the compiler advertise C99 conformance? */ +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +// See if C++-style comments work. + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); +extern void free (void *); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + // Work around memory leak warnings. + free (ia); + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +/* Does the compiler advertise C11 conformance? */ +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + +as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" +as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" +as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" +as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" +as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" +as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" +as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" +as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" +as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" +as_fn_append ac_header_c_list " wchar.h wchar_h HAVE_WCHAR_H" +as_fn_append ac_header_c_list " minix/config.h minix_config_h HAVE_MINIX_CONFIG_H" + +# Auxiliary files required by this configure script. +ac_aux_files="config.guess config.sub ltmain.sh ar-lib compile missing install-sh" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}${PATH_SEPARATOR}${srcdir}/..${PATH_SEPARATOR}${srcdir}/../.." + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" + break + fi + ac_first_candidate=false + + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else case e in #( + e) as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 ;; +esac +fi + + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: '$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: '$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: '$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: '$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file' + and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +am__api_version='1.16' + + + + # Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +printf %s "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if test ${ac_cv_path_install+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + # Account for fact that we put trailing slashes in our PATH walk. +case $as_dir in #(( + ./ | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir/" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + ;; +esac +fi + if test ${ac_cv_path_install+y}; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +printf "%s\n" "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +printf %s "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was 's,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir -p" >&5 +printf %s "checking for a race-free mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if test ${ac_cv_path_mkdir+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue + case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir ('*'coreutils) '* | \ + *'BusyBox '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + ;; +esac +fi + + test -d ./--version && rmdir ./--version + if test ${ac_cv_path_mkdir+y}; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use plain mkdir -p, + # in the hope it doesn't have the bugs of ancient mkdir. + MKDIR_P='mkdir -p' + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +printf "%s\n" "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AWK+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +printf "%s\n" "$AWK" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval test \${ac_cv_prog_make_${ac_make}_set+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make ;; +esac +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + SET_MAKE= +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='pcre2' + VERSION='10.44' + + +printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h + + +printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi + +if test -z "$ETAGS"; then + ETAGS=etags +fi + +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=0;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +ac_config_headers="$ac_config_headers src/config.h" + + +# This was added at the suggestion of libtoolize (03-Jan-10) + + +# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any +# other compiler. There doesn't seem to be a standard way of getting rid of the +# -g (which I don't think is needed for a production library). This fudge seems +# to achieve the necessary. First, we remember the externally set values of +# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is +# not set, it will be set to Autoconf's defaults. Afterwards, if the original +# values were not set, remove the -g from the Autoconf defaults. + +remember_set_CFLAGS="$CFLAGS" + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See 'config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. +# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an '-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else case e in #( + e) ac_file='' ;; +esac +fi +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See 'config.log' for more details" "$LINENO" 5; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) +# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will +# work properly (i.e., refer to 'conftest.exe'), while it won't with +# 'rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else case e in #( + e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest conftest$ac_cv_exeext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +FILE *f = fopen ("conftest.out", "w"); + if (!f) + return 1; + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. +If you meant to cross compile, use '--host'. +See 'config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext \ + conftest.o conftest.obj conftest.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else case e in #( + e) printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else case e in #( + e) ac_compiler_gnu=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else case e in #( + e) CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 ;; +esac +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +printf %s "checking whether $CC understands -c and -o together... " >&6; } +if test ${am_cv_prog_cc_c_o+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +printf %s "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +printf "%s\n" "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test ${enable_dependency_tracking+y} +then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + +depcc="$CC" am_compiler_list= + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CC_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5 +printf %s "checking whether it is safe to define __EXTENSIONS__... " >&6; } +if test ${ac_cv_safe_to_define___extensions__+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +# define __EXTENSIONS__ 1 + $ac_includes_default +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_safe_to_define___extensions__=yes +else case e in #( + e) ac_cv_safe_to_define___extensions__=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 +printf "%s\n" "$ac_cv_safe_to_define___extensions__" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether _XOPEN_SOURCE should be defined" >&5 +printf %s "checking whether _XOPEN_SOURCE should be defined... " >&6; } +if test ${ac_cv_should_define__xopen_source+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_should_define__xopen_source=no + if test $ac_cv_header_wchar_h = yes +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + mbstate_t x; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define _XOPEN_SOURCE 500 + #include + mbstate_t x; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_should_define__xopen_source=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_should_define__xopen_source" >&5 +printf "%s\n" "$ac_cv_should_define__xopen_source" >&6; } + + printf "%s\n" "#define _ALL_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _DARWIN_C_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _GNU_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _HPUX_ALT_XOPEN_SOCKET_API 1" >>confdefs.h + + printf "%s\n" "#define _NETBSD_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _OPENBSD_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_BFP_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_DFP_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_LIB_EXT2__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_MATH_SPEC_FUNCS__ 1" >>confdefs.h + + printf "%s\n" "#define _TANDEM_SOURCE 1" >>confdefs.h + + if test $ac_cv_header_minix_config_h = yes +then : + MINIX=yes + printf "%s\n" "#define _MINIX 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_1_SOURCE 2" >>confdefs.h + +else case e in #( + e) MINIX= ;; +esac +fi + if test $ac_cv_safe_to_define___extensions__ = yes +then : + printf "%s\n" "#define __EXTENSIONS__ 1" >>confdefs.h + +fi + if test $ac_cv_should_define__xopen_source = yes +then : + printf "%s\n" "#define _XOPEN_SOURCE 500" >>confdefs.h + +fi + + +if test "x$remember_set_CFLAGS" = "x" +then + if test "$CFLAGS" = "-g -O2" + then + CFLAGS="-O2" + elif test "$CFLAGS" = "-g" + then + CFLAGS="" + fi +fi + +# This is a new thing required to stop a warning from automake 1.12 + + if test -n "$ac_tool_prefix"; then + for ac_prog in ar lib "link -lib" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar lib "link -lib" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 +printf %s "checking the archiver ($AR) interface... " >&6; } +if test ${am_cv_ar_interface+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + am_cv_ar_interface=ar + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int some_variable = 0; +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 +printf "%s\n" "$am_cv_ar_interface" >&6; } + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + as_fn_error $? "could not determine $AR interface" "$LINENO" 5 + ;; +esac + + +# Check for a 64-bit integer type +ac_fn_c_find_intX_t "$LINENO" "64" "ac_cv_c_int64_t" +case $ac_cv_c_int64_t in #( + no|yes) ;; #( + *) + +printf "%s\n" "#define int64_t $ac_cv_c_int64_t" >>confdefs.h +;; +esac + + + +case `pwd` in + *\ * | *\ *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +printf "%s\n" "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.5.0.1-38c1-dirty' +macro_revision='2.5.0.1' + + + + + + + + + + + + + + +ltmain=$ac_aux_dir/ltmain.sh + + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +printf %s "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case $ECHO in + printf*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +printf "%s\n" "printf" >&6; } ;; + print*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +printf "%s\n" "print -r" >&6; } ;; + *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +printf "%s\n" "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +printf %s "checking for a sed that does not truncate output... " >&6; } +if test ${ac_cv_path_SED+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in sed gsed + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in #( +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +#( +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +printf "%s\n" "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in #( +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +#( +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in #( +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +#( +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + EGREP_TRADITIONAL=$EGREP + ac_cv_path_EGREP_TRADITIONAL=$EGREP + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +printf %s "checking for fgrep... " >&6; } +if test ${ac_cv_path_FGREP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in fgrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in #( +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +#( +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +printf "%s\n" "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test ${with_gnu_ld+y} +then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else case e in #( + e) with_gnu_ld=no ;; +esac +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +printf %s "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw* | *-*-windows*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +printf %s "checking for GNU ld... " >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +printf %s "checking for non-GNU ld... " >&6; } +fi +if test ${lt_cv_path_LD+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +printf "%s\n" "$LD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +printf %s "checking if the linker ($LD) is GNU ld... " >&6; } +if test ${lt_cv_prog_gnu_ld+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if test ${lt_cv_path_NM+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw* | windows*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +printf "%s\n" "$lt_cv_path_NM" >&6; } +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +printf "%s\n" "$DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +printf "%s\n" "$ac_ct_DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +printf %s "checking the name lister ($NM) interface... " >&6; } +if test ${lt_cv_nm_interface+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +printf "%s\n" "$lt_cv_nm_interface" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +printf %s "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +printf "%s\n" "no, using $LN_S" >&6; } +fi + +# find the maximum length of command line arguments +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +printf %s "checking the maximum length of command line arguments... " >&6; } +if test ${lt_cv_sys_max_cmd_len+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | windows* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + ;; +esac +fi + +if test -n "$lt_cv_sys_max_cmd_len"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +printf "%s\n" "$lt_cv_sys_max_cmd_len" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none" >&5 +printf "%s\n" "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +printf %s "checking how to convert $build file names to $host format... " >&6; } +if test ${lt_cv_to_host_file_cmd+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $host in + *-*-mingw* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + ;; +esac +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +printf %s "checking how to convert $build file names to toolchain format... " >&6; } +if test ${lt_cv_to_tool_file_cmd+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* | *-*-windows* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + ;; +esac +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +printf %s "checking for $LD option to reload object files... " >&6; } +if test ${lt_cv_ld_reload_flag+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_ld_reload_flag='-r' ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +printf "%s\n" "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | windows* | pw32* | cegcc*) + if test yes != "$GCC"; then + reload_cmds=false + fi + ;; + darwin*) + if test yes = "$GCC"; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib $wl-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +# Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FILECMD+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$FILECMD"; then + ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FILECMD=":" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +FILECMD=$ac_cv_prog_FILECMD +if test -n "$FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 +printf "%s\n" "$FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +printf %s "checking how to recognize dependent libraries... " >&6; } +if test ${lt_cv_deplibs_check_method+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | windows* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +printf "%s\n" "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | windows* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +printf %s "checking how to associate runtime and link libraries... " >&6; } +if test ${lt_cv_sharedlib_from_linklib_cmd+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | windows* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + + + + + + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because that's what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS + + + + + + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +printf %s "checking for archiver @FILE support... " >&6; } +if test ${lt_cv_ar_at_file+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +printf "%s\n" "$lt_cv_ar_at_file" >&6; } + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_RANLIB+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +printf "%s\n" "$RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_RANLIB+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +printf "%s\n" "$ac_ct_RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +printf %s "checking command to parse $NM output from $compiler object... " >&6; } +if test ${lt_cv_sys_global_symbol_pipe+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | windows* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BCDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw* | windows*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 + (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + ;; +esac +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +printf "%s\n" "failed" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +printf "%s\n" "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +printf %s "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test ${with_sysroot+y} +then : + withval=$with_sysroot; +else case e in #( + e) with_sysroot=no ;; +esac +fi + + +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + # Trim trailing / since we'll always append absolute paths and we want + # to avoid //, if only for less confusing output for the user. + lt_sysroot=`$CC --print-sysroot 2>/dev/null | $SED 's:/\+$::'` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_sysroot" >&5 +printf "%s\n" "$with_sysroot" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +printf "%s\n" "${lt_sysroot:-no}" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a working dd" >&5 +printf %s "checking for a working dd... " >&6; } +if test ${ac_cv_path_lt_DD+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +if test -z "$lt_DD"; then + ac_path_lt_DD_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in dd + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_lt_DD="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_lt_DD" || continue +if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi + $ac_path_lt_DD_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_lt_DD"; then + : + fi +else + ac_cv_path_lt_DD=$lt_DD +fi + +rm -f conftest.i conftest2.i conftest.out ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 +printf "%s\n" "$ac_cv_path_lt_DD" >&6; } + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to truncate binary pipes" >&5 +printf %s "checking how to truncate binary pipes... " >&6; } +if test ${lt_cv_truncate_bin+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 +printf "%s\n" "$lt_cv_truncate_bin" >&6; } + + + + + + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + +# Check whether --enable-libtool-lock was given. +if test ${enable_libtool_lock+y} +then : + enableval=$enable_libtool_lock; +fi + +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*|x86_64-gnu*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*|x86_64-gnu*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +printf %s "checking whether the C compiler needs -belf... " >&6; } +if test ${lt_cv_cc_needs_belf+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_cc_needs_belf=yes +else case e in #( + e) lt_cv_cc_needs_belf=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +printf "%s\n" "$MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +printf "%s\n" "$ac_ct_MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if test ${lt_cv_path_manifest_tool+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_path_manifest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_manifest_tool=yes + fi + rm -f conftest* ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_manifest_tool" >&5 +printf "%s\n" "$lt_cv_path_manifest_tool" >&6; } +if test yes != "$lt_cv_path_manifest_tool"; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +printf "%s\n" "$DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +printf "%s\n" "$ac_ct_DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +printf "%s\n" "$NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +printf "%s\n" "$ac_ct_NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_LIPO+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +printf "%s\n" "$LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_LIPO+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +printf "%s\n" "$ac_ct_LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +printf "%s\n" "$OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +printf "%s\n" "$ac_ct_OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +printf "%s\n" "$OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +printf "%s\n" "$ac_ct_OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +printf %s "checking for -single_module linker flag... " >&6; } +if test ${lt_cv_apple_cc_single_mod+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +printf %s "checking for -exported_symbols_list linker flag... " >&6; } +if test ${lt_cv_ld_exported_symbols_list+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_ld_exported_symbols_list=yes +else case e in #( + e) lt_cv_ld_exported_symbols_list=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +printf %s "checking for -force_load linker flag... " >&6; } +if test ${lt_cv_ld_force_load+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&5 + $AR $AR_FLAGS libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +printf "%s\n" "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[012],*|,*powerpc*-darwin[5-8]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + +ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes +then : + printf "%s\n" "#define HAVE_DLFCN_H 1" >>confdefs.h + +fi + + + + + +# Set options +enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-cegcc*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. +set dummy ${ac_tool_prefix}as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AS+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AS="${ac_tool_prefix}as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +AS=$ac_cv_prog_AS +if test -n "$AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AS" >&5 +printf "%s\n" "$AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_AS"; then + ac_ct_AS=$AS + # Extract the first word of "as", so it can be a program name with args. +set dummy as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AS+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_AS"; then + ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AS="as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_AS=$ac_cv_prog_ac_ct_AS +if test -n "$ac_ct_AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5 +printf "%s\n" "$ac_ct_AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_AS" = x; then + AS="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AS=$ac_ct_AS + fi +else + AS="$ac_cv_prog_AS" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + + ;; +esac + +test -z "$AS" && AS=as + + + + + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + enable_dlopen=no + + + + # Check whether --enable-shared was given. +if test ${enable_shared+y} +then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else case e in #( + e) enable_shared=yes ;; +esac +fi + + + + + + + + + + # Check whether --enable-static was given. +if test ${enable_static+y} +then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else case e in #( + e) enable_static=yes ;; +esac +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test ${with_pic+y} +then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else case e in #( + e) pic_mode=default ;; +esac +fi + + + + + + + + + # Check whether --enable-fast-install was given. +if test ${enable_fast_install+y} +then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else case e in #( + e) enable_fast_install=yes ;; +esac +fi + + + + + + + + + shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[5-9]*,yes) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which variant of shared library versioning to provide" >&5 +printf %s "checking which variant of shared library versioning to provide... " >&6; } + +# Check whether --with-aix-soname was given. +if test ${with_aix_soname+y} +then : + withval=$with_aix_soname; case $withval in + aix|svr4|both) + ;; + *) + as_fn_error $? "Unknown argument to --with-aix-soname" "$LINENO" 5 + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname +else case e in #( + e) if test ${lt_cv_with_aix_soname+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_with_aix_soname=aix ;; +esac +fi + + with_aix_soname=$lt_cv_with_aix_soname ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 +printf "%s\n" "$with_aix_soname" >&6; } + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +printf %s "checking for objdir... " >&6; } +if test ${lt_cv_objdir+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +printf "%s\n" "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +printf "%s\n" "#define LT_OBJDIR \"$lt_cv_objdir/\"" >>confdefs.h + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +printf %s "checking for ${ac_tool_prefix}file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/${ac_tool_prefix}file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +printf %s "checking for file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC=$CC +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if test ${lt_cv_prog_compiler_rtti_exceptions+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test yes = "$lt_cv_prog_compiler_rtti_exceptions"; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + lt_prog_compiler_pic='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + + mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + *flang) + # Flang compiler. + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_pic=$lt_prog_compiler_pic ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works"; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_static_works=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works"; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | windows* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | windows* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='$wl--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + export_dynamic_flag_spec='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test no = "$ld_shlibs"; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct=no + hardcode_direct_absolute=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + ;; +esac +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + ;; +esac +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' $wl-bernotok' + allow_undefined_flag=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + archive_expsym_cmds='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds="$archive_expsym_cmds"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds="$archive_expsym_cmds"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | windows* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -Fe $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -Fe $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +printf %s "checking if $CC understands -b... " >&6; } +if test ${lt_cv_prog_compiler__b+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_prog_compiler__b=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +printf "%s\n" "$lt_cv_prog_compiler__b" >&6; } + +if test yes = "$lt_cv_prog_compiler__b"; then + archive_cmds='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if test ${lt_cv_irix_exported_symbol+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_irix_exported_symbol=yes +else case e in #( + e) lt_cv_irix_exported_symbol=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs=yes + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + else + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_from_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='$wl-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='$wl-z,text' + allow_undefined_flag='$wl-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +printf "%s\n" "$ld_shlibs" >&6; } +test no = "$ld_shlibs" && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | windows* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | windows* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([A-Za-z]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | windows* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | windows* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw* | windows*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # -rpath works at least for libraries that are not overridden by + # libraries installed in system locations. + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ;; +esac +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directories which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test yes = "$hardcode_automatic"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, )" && + test no != "$hardcode_minus_L"; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +printf "%s\n" "$hardcode_action" >&6; } + +if test relink = "$hardcode_action" || + test yes = "$inherit_rpath"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | windows* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (void); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else case e in #( + e) ac_cv_lib_dl_dlopen=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else case e in #( + e) + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; +esac +fi + + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes +then : + lt_cv_dlopen=shl_load +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +printf %s "checking for shl_load in -ldld... " >&6; } +if test ${ac_cv_lib_dld_shl_load+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load (void); +int +main (void) +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_shl_load=yes +else case e in #( + e) ac_cv_lib_dld_shl_load=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes +then : + lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld +else case e in #( + e) ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes +then : + lt_cv_dlopen=dlopen +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (void); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else case e in #( + e) ac_cv_lib_dl_dlopen=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +printf %s "checking for dlopen in -lsvld... " >&6; } +if test ${ac_cv_lib_svld_dlopen+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (void); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_svld_dlopen=yes +else case e in #( + e) ac_cv_lib_svld_dlopen=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +printf %s "checking for dld_link in -ldld... " >&6; } +if test ${ac_cv_lib_dld_dld_link+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link (void); +int +main (void) +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_dld_link=yes +else case e in #( + e) ac_cv_lib_dld_dld_link=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes +then : + lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld +fi + + ;; +esac +fi + + ;; +esac +fi + + ;; +esac +fi + + ;; +esac +fi + + ;; +esac +fi + + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +printf %s "checking whether a program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +printf "%s\n" "$lt_cv_dlopen_self" >&6; } + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +printf %s "checking whether a statically linked program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self_static+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +printf "%s\n" "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +printf %s "checking whether stripping libraries is possible... " >&6; } +if test -z "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + fi + ;; + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; + esac + fi +fi + + + + + + + + + + + + + # Report what library types will actually be built + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +printf %s "checking if libtool supports shared libraries... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +printf "%s\n" "$can_build_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +printf %s "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +printf "%s\n" "$enable_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +printf %s "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +printf "%s\n" "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC=$lt_save_CC + + + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +printf %s "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +printf "%s\n" "no, using $LN_S" >&6; } +fi + + +# Check whether --enable-largefile was given. +if test ${enable_largefile+y} +then : + enableval=$enable_largefile; +fi +if test "$enable_largefile,$enable_year2038" != no,no +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable large file support" >&5 +printf %s "checking for $CC option to enable large file support... " >&6; } +if test ${ac_cv_sys_largefile_opts+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CC="$CC" + ac_opt_found=no + for ac_opt in "none needed" "-D_FILE_OFFSET_BITS=64" "-D_LARGE_FILES=1" "-n32"; do + if test x"$ac_opt" != x"none needed" +then : + CC="$ac_save_CC $ac_opt" +fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#ifndef FTYPE +# define FTYPE off_t +#endif + /* Check that FTYPE can represent 2**63 - 1 correctly. + We can't simply define LARGE_FTYPE to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_FTYPE (((FTYPE) 1 << 31 << 31) - 1 + ((FTYPE) 1 << 31 << 31)) + int FTYPE_is_large[(LARGE_FTYPE % 2147483629 == 721 + && LARGE_FTYPE % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_opt" = x"none needed" +then : + # GNU/Linux s390x and alpha need _FILE_OFFSET_BITS=64 for wide ino_t. + CC="$CC -DFTYPE=ino_t" + if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) CC="$CC -D_FILE_OFFSET_BITS=64" + if ac_fn_c_try_compile "$LINENO" +then : + ac_opt='-D_FILE_OFFSET_BITS=64' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam +fi + ac_cv_sys_largefile_opts=$ac_opt + ac_opt_found=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + test $ac_opt_found = no || break + done + CC="$ac_save_CC" + + test $ac_opt_found = yes || ac_cv_sys_largefile_opts="support not detected" ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_opts" >&5 +printf "%s\n" "$ac_cv_sys_largefile_opts" >&6; } + +ac_have_largefile=yes +case $ac_cv_sys_largefile_opts in #( + "none needed") : + ;; #( + "supported through gnulib") : + ;; #( + "support not detected") : + ac_have_largefile=no ;; #( + "-D_FILE_OFFSET_BITS=64") : + +printf "%s\n" "#define _FILE_OFFSET_BITS 64" >>confdefs.h + ;; #( + "-D_LARGE_FILES=1") : + +printf "%s\n" "#define _LARGE_FILES 1" >>confdefs.h + ;; #( + "-n32") : + CC="$CC -n32" ;; #( + *) : + as_fn_error $? "internal error: bad value for \$ac_cv_sys_largefile_opts" "$LINENO" 5 ;; +esac + +if test "$enable_year2038" != no +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option for timestamps after 2038" >&5 +printf %s "checking for $CC option for timestamps after 2038... " >&6; } +if test ${ac_cv_sys_year2038_opts+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CPPFLAGS="$CPPFLAGS" + ac_opt_found=no + for ac_opt in "none needed" "-D_TIME_BITS=64" "-D__MINGW_USE_VC2005_COMPAT" "-U_USE_32_BIT_TIME_T -D__MINGW_USE_VC2005_COMPAT"; do + if test x"$ac_opt" != x"none needed" +then : + CPPFLAGS="$ac_save_CPPFLAGS $ac_opt" +fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + /* Check that time_t can represent 2**32 - 1 correctly. */ + #define LARGE_TIME_T \\ + ((time_t) (((time_t) 1 << 30) - 1 + 3 * ((time_t) 1 << 30))) + int verify_time_t_range[(LARGE_TIME_T / 65537 == 65535 + && LARGE_TIME_T % 65537 == 0) + ? 1 : -1]; + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_year2038_opts="$ac_opt" + ac_opt_found=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + test $ac_opt_found = no || break + done + CPPFLAGS="$ac_save_CPPFLAGS" + test $ac_opt_found = yes || ac_cv_sys_year2038_opts="support not detected" ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_year2038_opts" >&5 +printf "%s\n" "$ac_cv_sys_year2038_opts" >&6; } + +ac_have_year2038=yes +case $ac_cv_sys_year2038_opts in #( + "none needed") : + ;; #( + "support not detected") : + ac_have_year2038=no ;; #( + "-D_TIME_BITS=64") : + +printf "%s\n" "#define _TIME_BITS 64" >>confdefs.h + ;; #( + "-D__MINGW_USE_VC2005_COMPAT") : + +printf "%s\n" "#define __MINGW_USE_VC2005_COMPAT 1" >>confdefs.h + ;; #( + "-U_USE_32_BIT_TIME_T"*) : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "the 'time_t' type is currently forced to be 32-bit. It +will stop working after mid-January 2038. Remove +_USE_32BIT_TIME_T from the compiler flags. +See 'config.log' for more details" "$LINENO" 5; } ;; #( + *) : + as_fn_error $? "internal error: bad value for \$ac_cv_sys_year2038_opts" "$LINENO" 5 ;; +esac + +fi + +fi + +# Check for GCC visibility feature + + + + VISIBILITY_CFLAGS= + VISIBILITY_CXXFLAGS= + HAVE_VISIBILITY=0 + if test -n "$GCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 +printf %s "checking whether the -Werror option is usable... " >&6; } + if test ${pcre2_cv_cc_vis_werror+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cv_cc_vis_werror=yes +else case e in #( + e) pcre2_cv_cc_vis_werror=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 +printf "%s\n" "$pcre2_cv_cc_vis_werror" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for simple visibility declarations" >&5 +printf %s "checking for simple visibility declarations... " >&6; } + if test ${pcre2_cv_cc_visibility+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fvisibility=hidden" + if test $pcre2_cv_cc_vis_werror = yes; then + CFLAGS="$CFLAGS -Werror" + fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +extern __attribute__((__visibility__("hidden"))) int hiddenvar; + extern __attribute__((__visibility__("default"))) int exportedvar; + extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + extern __attribute__((__visibility__("default"))) int exportedfunc (void); + void dummyfunc (void) {} + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cv_cc_visibility=yes +else case e in #( + e) pcre2_cv_cc_visibility=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" ;; +esac +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 +printf "%s\n" "$pcre2_cv_cc_visibility" >&6; } + if test $pcre2_cv_cc_visibility = yes; then + VISIBILITY_CFLAGS="-fvisibility=hidden" + VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" + HAVE_VISIBILITY=1 + +printf "%s\n" "#define PCRE2_EXPORT __attribute__ ((visibility (\"default\")))" >>confdefs.h + + else + +printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + + fi + else + +printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + + fi + + + + +printf "%s\n" "#define HAVE_VISIBILITY $HAVE_VISIBILITY" >>confdefs.h + + + +# Check for the mul_overflow() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_mul_overflow()" >&5 +printf %s "checking for __builtin_mul_overflow()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef HAVE_SYS_TYPES_H + #include + #endif + #include + + int a, b; + size_t m; + +int +main (void) +{ +__builtin_mul_overflow(a, b, &m) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_mul_overflow=yes +else case e in #( + e) pcre2_cc_cv_builtin_mul_overflow=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_mul_overflow" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_mul_overflow" >&6; } +if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_MUL_OVERFLOW 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for Clang __attribute__((uninitialized)) feature + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 +printf %s "checking for __attribute__((uninitialized))... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +tmp_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS -Werror" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +char buf[128] __attribute__((uninitialized));(void)buf + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_attribute_uninitialized=yes +else case e in #( + e) pcre2_cc_cv_attribute_uninitialized=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 +printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + +printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h + +fi +CFLAGS=$tmp_CFLAGS +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Versioning + +PCRE2_MAJOR="10" +PCRE2_MINOR="44" +PCRE2_PRERELEASE="" +PCRE2_DATE="2024-06-07" + +if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" +then + echo "***" + echo "*** Minor version number $PCRE2_MINOR must not be used. ***" + echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***" + echo "***" + exit 1 +fi + + + + + + +# Set a more sensible default value for $(htmldir). +if test "x$htmldir" = 'x${docdir}' +then + htmldir='${docdir}/html' +fi + +# Force an error for PCRE1 size options +# Check whether --enable-pcre8 was given. +if test ${enable_pcre8+y} +then : + enableval=$enable_pcre8; +else case e in #( + e) enable_pcre8=no ;; +esac +fi + +# Check whether --enable-pcre16 was given. +if test ${enable_pcre16+y} +then : + enableval=$enable_pcre16; +else case e in #( + e) enable_pcre16=no ;; +esac +fi + +# Check whether --enable-pcre32 was given. +if test ${enable_pcre32+y} +then : + enableval=$enable_pcre32; +else case e in #( + e) enable_pcre32=no ;; +esac +fi + + +if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" +then + echo "** ERROR: Use --[en|dis]able-pcre2-[8|16|32], not --[en|dis]able-pcre[8|16|32]" + exit 1 +fi + +# Handle --disable-pcre2-8 (enabled by default) +# Check whether --enable-pcre2-8 was given. +if test ${enable_pcre2_8+y} +then : + enableval=$enable_pcre2_8; +else case e in #( + e) enable_pcre2_8=unset ;; +esac +fi + + + +# Handle --enable-pcre2-16 (disabled by default) +# Check whether --enable-pcre2-16 was given. +if test ${enable_pcre2_16+y} +then : + enableval=$enable_pcre2_16; +else case e in #( + e) enable_pcre2_16=unset ;; +esac +fi + + + +# Handle --enable-pcre2-32 (disabled by default) +# Check whether --enable-pcre2-32 was given. +if test ${enable_pcre2_32+y} +then : + enableval=$enable_pcre2_32; +else case e in #( + e) enable_pcre2_32=unset ;; +esac +fi + + + +# Handle --enable-debug (disabled by default) +# Check whether --enable-debug was given. +if test ${enable_debug+y} +then : + enableval=$enable_debug; +else case e in #( + e) enable_debug=no ;; +esac +fi + + +# Handle --enable-jit (disabled by default) +# Check whether --enable-jit was given. +if test ${enable_jit+y} +then : + enableval=$enable_jit; +else case e in #( + e) enable_jit=no ;; +esac +fi + + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define SLJIT_CONFIG_AUTO 1 + #include "src/sljit/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + enable_jit=yes +else case e in #( + e) enable_jit=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + # Check whether --enable-jit-sealloc was given. +if test ${enable_jit_sealloc+y} +then : + enableval=$enable_jit_sealloc; +else case e in #( + e) enable_jit_sealloc=no ;; +esac +fi + + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +# Check whether --enable-pcre2grep-jit was given. +if test ${enable_pcre2grep_jit+y} +then : + enableval=$enable_pcre2grep_jit; +else case e in #( + e) enable_pcre2grep_jit=yes ;; +esac +fi + + +# Handle --disable-pcre2grep-callout (enabled by default) +# Check whether --enable-pcre2grep-callout was given. +if test ${enable_pcre2grep_callout+y} +then : + enableval=$enable_pcre2grep_callout; +else case e in #( + e) enable_pcre2grep_callout=yes ;; +esac +fi + + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +# Check whether --enable-pcre2grep-callout-fork was given. +if test ${enable_pcre2grep_callout_fork+y} +then : + enableval=$enable_pcre2grep_callout_fork; +else case e in #( + e) enable_pcre2grep_callout_fork=yes ;; +esac +fi + + +# Handle --enable-rebuild-chartables +# Check whether --enable-rebuild-chartables was given. +if test ${enable_rebuild_chartables+y} +then : + enableval=$enable_rebuild_chartables; +else case e in #( + e) enable_rebuild_chartables=no ;; +esac +fi + + +# Handle --disable-unicode (enabled by default) +# Check whether --enable-unicode was given. +if test ${enable_unicode+y} +then : + enableval=$enable_unicode; +else case e in #( + e) enable_unicode=unset ;; +esac +fi + + +# Handle newline options +ac_pcre2_newline=lf +# Check whether --enable-newline-is-cr was given. +if test ${enable_newline_is_cr+y} +then : + enableval=$enable_newline_is_cr; ac_pcre2_newline=cr +fi + +# Check whether --enable-newline-is-lf was given. +if test ${enable_newline_is_lf+y} +then : + enableval=$enable_newline_is_lf; ac_pcre2_newline=lf +fi + +# Check whether --enable-newline-is-crlf was given. +if test ${enable_newline_is_crlf+y} +then : + enableval=$enable_newline_is_crlf; ac_pcre2_newline=crlf +fi + +# Check whether --enable-newline-is-anycrlf was given. +if test ${enable_newline_is_anycrlf+y} +then : + enableval=$enable_newline_is_anycrlf; ac_pcre2_newline=anycrlf +fi + +# Check whether --enable-newline-is-any was given. +if test ${enable_newline_is_any+y} +then : + enableval=$enable_newline_is_any; ac_pcre2_newline=any +fi + +# Check whether --enable-newline-is-nul was given. +if test ${enable_newline_is_nul+y} +then : + enableval=$enable_newline_is_nul; ac_pcre2_newline=nul +fi + +enable_newline="$ac_pcre2_newline" + +# Handle --enable-bsr-anycrlf +# Check whether --enable-bsr-anycrlf was given. +if test ${enable_bsr_anycrlf+y} +then : + enableval=$enable_bsr_anycrlf; +else case e in #( + e) enable_bsr_anycrlf=no ;; +esac +fi + + +# Handle --enable-never-backslash-C +# Check whether --enable-never-backslash-C was given. +if test ${enable_never_backslash_C+y} +then : + enableval=$enable_never_backslash_C; +else case e in #( + e) enable_never_backslash_C=no ;; +esac +fi + + +# Handle --enable-ebcdic +# Check whether --enable-ebcdic was given. +if test ${enable_ebcdic+y} +then : + enableval=$enable_ebcdic; +else case e in #( + e) enable_ebcdic=no ;; +esac +fi + + +# Handle --enable-ebcdic-nl25 +# Check whether --enable-ebcdic-nl25 was given. +if test ${enable_ebcdic_nl25+y} +then : + enableval=$enable_ebcdic_nl25; +else case e in #( + e) enable_ebcdic_nl25=no ;; +esac +fi + + +# Handle --enable-pcre2grep-libz +# Check whether --enable-pcre2grep-libz was given. +if test ${enable_pcre2grep_libz+y} +then : + enableval=$enable_pcre2grep_libz; +else case e in #( + e) enable_pcre2grep_libz=no ;; +esac +fi + + +# Handle --enable-pcre2grep-libbz2 +# Check whether --enable-pcre2grep-libbz2 was given. +if test ${enable_pcre2grep_libbz2+y} +then : + enableval=$enable_pcre2grep_libbz2; +else case e in #( + e) enable_pcre2grep_libbz2=no ;; +esac +fi + + +# Handle --with-pcre2grep-bufsize=N + +# Check whether --with-pcre2grep-bufsize was given. +if test ${with_pcre2grep_bufsize+y} +then : + withval=$with_pcre2grep_bufsize; +else case e in #( + e) with_pcre2grep_bufsize=20480 ;; +esac +fi + + +# Handle --with-pcre2grep-max-bufsize=N + +# Check whether --with-pcre2grep-max-bufsize was given. +if test ${with_pcre2grep_max_bufsize+y} +then : + withval=$with_pcre2grep_max_bufsize; +else case e in #( + e) with_pcre2grep_max_bufsize=1048576 ;; +esac +fi + + +# Handle --enable-pcre2test-libedit +# Check whether --enable-pcre2test-libedit was given. +if test ${enable_pcre2test_libedit+y} +then : + enableval=$enable_pcre2test_libedit; +else case e in #( + e) enable_pcre2test_libedit=no ;; +esac +fi + + +# Handle --enable-pcre2test-libreadline +# Check whether --enable-pcre2test-libreadline was given. +if test ${enable_pcre2test_libreadline+y} +then : + enableval=$enable_pcre2test_libreadline; +else case e in #( + e) enable_pcre2test_libreadline=no ;; +esac +fi + + +# Handle --with-link-size=N + +# Check whether --with-link-size was given. +if test ${with_link_size+y} +then : + withval=$with_link_size; +else case e in #( + e) with_link_size=2 ;; +esac +fi + + +# Handle --with-max-varlookbehind=N + +# Check whether --with-max-varlookbehind was given. +if test ${with_max_varlookbehind+y} +then : + withval=$with_max_varlookbehind; +else case e in #( + e) with_max_varlookbehind=255 ;; +esac +fi + + +# Handle --with-parens-nest-limit=N + +# Check whether --with-parens-nest-limit was given. +if test ${with_parens_nest_limit+y} +then : + withval=$with_parens_nest_limit; +else case e in #( + e) with_parens_nest_limit=250 ;; +esac +fi + + +# Handle --with-heap-limit + +# Check whether --with-heap-limit was given. +if test ${with_heap_limit+y} +then : + withval=$with_heap_limit; +else case e in #( + e) with_heap_limit=20000000 ;; +esac +fi + + +# Handle --with-match-limit=N + +# Check whether --with-match-limit was given. +if test ${with_match_limit+y} +then : + withval=$with_match_limit; +else case e in #( + e) with_match_limit=10000000 ;; +esac +fi + + +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion +# +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). +# + +# Check whether --with-match-limit-depth was given. +if test ${with_match_limit_depth+y} +then : + withval=$with_match_limit_depth; +else case e in #( + e) with_match_limit_depth=MATCH_LIMIT ;; +esac +fi + + + +# Check whether --with-match-limit-recursion was given. +if test ${with_match_limit_recursion+y} +then : + withval=$with_match_limit_recursion; +else case e in #( + e) with_match_limit_recursion=UNSET ;; +esac +fi + + +# Handle --enable-valgrind +# Check whether --enable-valgrind was given. +if test ${enable_valgrind+y} +then : + enableval=$enable_valgrind; +else case e in #( + e) enable_valgrind=no ;; +esac +fi + + +# Enable code coverage reports using gcov +# Check whether --enable-coverage was given. +if test ${enable_coverage+y} +then : + enableval=$enable_coverage; +else case e in #( + e) enable_coverage=no ;; +esac +fi + + +# Handle --enable-fuzz-support +# Check whether --enable-fuzz_support was given. +if test ${enable_fuzz_support+y} +then : + enableval=$enable_fuzz_support; +else case e in #( + e) enable_fuzz_support=no ;; +esac +fi + + +# Handle --enable-diff-fuzz-support +# Check whether --enable-diff_fuzz_support was given. +if test ${enable_diff_fuzz_support+y} +then : + enableval=$enable_diff_fuzz_support; +else case e in #( + e) enable_diff_fuzz_support=no ;; +esac +fi + + +# Handle --disable-stack-for-recursion +# This option became obsolete at release 10.30. +# Check whether --enable-stack-for-recursion was given. +if test ${enable_stack_for_recursion+y} +then : + enableval=$enable_stack_for_recursion; +else case e in #( + e) enable_stack_for_recursion=yes ;; +esac +fi + + +# Original code +# AC_ARG_ENABLE(stack-for-recursion, +# AS_HELP_STRING([--disable-stack-for-recursion], +# [don't use stack recursion when matching]), +# , enable_stack_for_recursion=yes) + +# Handle --disable-percent_zt (set as "auto" by default) +# Check whether --enable-percent-zt was given. +if test ${enable_percent_zt+y} +then : + enableval=$enable_percent_zt; +else case e in #( + e) enable_percent_zt=auto ;; +esac +fi + + +# Set the default value for pcre2-8 +if test "x$enable_pcre2_8" = "xunset" +then + enable_pcre2_8=yes +fi + +# Set the default value for pcre2-16 +if test "x$enable_pcre2_16" = "xunset" +then + enable_pcre2_16=no +fi + +# Set the default value for pcre2-32 +if test "x$enable_pcre2_32" = "xunset" +then + enable_pcre2_32=no +fi + +# Make sure at least one library is selected +if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono" +then + as_fn_error $? "At least one of the 8, 16 or 32 bit libraries must be enabled" "$LINENO" 5 +fi + +# Unicode is enabled by default. +if test "x$enable_unicode" = "xunset" +then + enable_unicode=yes +fi + +# Convert the newline identifier into the appropriate integer value. These must +# agree with the PCRE2_NEWLINE_xxx values in pcre2.h. + +case "$enable_newline" in + cr) ac_pcre2_newline_value=1 ;; + lf) ac_pcre2_newline_value=2 ;; + crlf) ac_pcre2_newline_value=3 ;; + any) ac_pcre2_newline_value=4 ;; + anycrlf) ac_pcre2_newline_value=5 ;; + nul) ac_pcre2_newline_value=6 ;; + *) + as_fn_error $? "invalid argument \"$enable_newline\" to --enable-newline option" "$LINENO" 5 + ;; +esac + +# --enable-ebcdic-nl25 implies --enable-ebcdic +if test "x$enable_ebcdic_nl25" = "xyes"; then + enable_ebcdic=yes +fi + +# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. +# Also check that UTF support is not requested, because PCRE2 cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. +# +if test "x$enable_ebcdic" = "xyes"; then + enable_rebuild_chartables=yes + if test "x$enable_unicode" = "xyes"; then + as_fn_error $? "support for EBCDIC and Unicode cannot be enabled at the same time" "$LINENO" 5 + fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + as_fn_error $? "EBCDIC support is available only for the 8-bit library" "$LINENO" 5 + fi +fi + +# Check argument to --with-link-size +case "$with_link_size" in + 2|3|4) ;; + *) + as_fn_error $? "invalid argument \"$with_link_size\" to --with-link-size option" "$LINENO" 5 + ;; +esac + + + +# Checks for header files. +ac_fn_c_check_header_compile "$LINENO" "limits.h" "ac_cv_header_limits_h" "$ac_includes_default" +if test "x$ac_cv_header_limits_h" = xyes +then : + printf "%s\n" "#define HAVE_LIMITS_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "sys/types.h" "ac_cv_header_sys_types_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_types_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_TYPES_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "sys/stat.h" "ac_cv_header_sys_stat_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_stat_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_STAT_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "dirent.h" "ac_cv_header_dirent_h" "$ac_includes_default" +if test "x$ac_cv_header_dirent_h" = xyes +then : + printf "%s\n" "#define HAVE_DIRENT_H 1" >>confdefs.h + +fi + + for ac_header in windows.h +do : + ac_fn_c_check_header_compile "$LINENO" "windows.h" "ac_cv_header_windows_h" "$ac_includes_default" +if test "x$ac_cv_header_windows_h" = xyes +then : + printf "%s\n" "#define HAVE_WINDOWS_H 1" >>confdefs.h + HAVE_WINDOWS_H=1 +fi + +done + for ac_header in sys/wait.h +do : + ac_fn_c_check_header_compile "$LINENO" "sys/wait.h" "ac_cv_header_sys_wait_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_wait_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_WAIT_H 1" >>confdefs.h + HAVE_SYS_WAIT_H=1 +fi + +done + +# Conditional compilation + if test "x$enable_pcre2_8" = "xyes"; then + WITH_PCRE2_8_TRUE= + WITH_PCRE2_8_FALSE='#' +else + WITH_PCRE2_8_TRUE='#' + WITH_PCRE2_8_FALSE= +fi + + if test "x$enable_pcre2_16" = "xyes"; then + WITH_PCRE2_16_TRUE= + WITH_PCRE2_16_FALSE='#' +else + WITH_PCRE2_16_TRUE='#' + WITH_PCRE2_16_FALSE= +fi + + if test "x$enable_pcre2_32" = "xyes"; then + WITH_PCRE2_32_TRUE= + WITH_PCRE2_32_FALSE='#' +else + WITH_PCRE2_32_TRUE='#' + WITH_PCRE2_32_FALSE= +fi + + if test "x$enable_debug" = "xyes"; then + WITH_DEBUG_TRUE= + WITH_DEBUG_FALSE='#' +else + WITH_DEBUG_TRUE='#' + WITH_DEBUG_FALSE= +fi + + if test "x$enable_rebuild_chartables" = "xyes"; then + WITH_REBUILD_CHARTABLES_TRUE= + WITH_REBUILD_CHARTABLES_FALSE='#' +else + WITH_REBUILD_CHARTABLES_TRUE='#' + WITH_REBUILD_CHARTABLES_FALSE= +fi + + if test "x$enable_jit" = "xyes"; then + WITH_JIT_TRUE= + WITH_JIT_FALSE='#' +else + WITH_JIT_TRUE='#' + WITH_JIT_FALSE= +fi + + if test "x$enable_unicode" = "xyes"; then + WITH_UNICODE_TRUE= + WITH_UNICODE_FALSE='#' +else + WITH_UNICODE_TRUE='#' + WITH_UNICODE_FALSE= +fi + + if test "x$enable_valgrind" = "xyes"; then + WITH_VALGRIND_TRUE= + WITH_VALGRIND_FALSE='#' +else + WITH_VALGRIND_TRUE='#' + WITH_VALGRIND_FALSE= +fi + + if test "x$enable_fuzz_support" = "xyes"; then + WITH_FUZZ_SUPPORT_TRUE= + WITH_FUZZ_SUPPORT_FALSE='#' +else + WITH_FUZZ_SUPPORT_TRUE='#' + WITH_FUZZ_SUPPORT_FALSE= +fi + + if test "x$enable_diff_fuzz_support" = "xyes"; then + WITH_DIFF_FUZZ_SUPPORT_TRUE= + WITH_DIFF_FUZZ_SUPPORT_FALSE='#' +else + WITH_DIFF_FUZZ_SUPPORT_TRUE='#' + WITH_DIFF_FUZZ_SUPPORT_FALSE= +fi + + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + +printf "%s\n" "#define SUPPORT_DIFF_FUZZ /**/" >>confdefs.h + +fi + +# Checks for typedefs, structures, and compiler characteristics. + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 +printf %s "checking for an ANSI C-conforming const... " >&6; } +if test ${ac_cv_c_const+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + +#ifndef __cplusplus + /* Ultrix mips cc rejects this sort of thing. */ + typedef int charset[2]; + const charset cs = { 0, 0 }; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *pcpcc; + char **ppc; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* IBM XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + pcpcc = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++pcpcc; + ppc = (char**) pcpcc; + pcpcc = (char const *const *) ppc; + { /* SCO 3.2v4 cc rejects this sort of thing. */ + char tx; + char *t = &tx; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + if (s) return 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* IBM XL C 1.02.0.0 rejects this sort of thing, saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; } bx; + struct s *b = &bx; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + if (!foo) return 0; + } + return !cs[0] && !zero.x; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_c_const=yes +else case e in #( + e) ac_cv_c_const=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 +printf "%s\n" "$ac_cv_c_const" >&6; } +if test $ac_cv_c_const = no; then + +printf "%s\n" "#define const /**/" >>confdefs.h + +fi + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes +then : + +else case e in #( + e) +printf "%s\n" "#define size_t unsigned int" >>confdefs.h + ;; +esac +fi + + +# Checks for library functions. + +ac_fn_c_check_func "$LINENO" "bcopy" "ac_cv_func_bcopy" +if test "x$ac_cv_func_bcopy" = xyes +then : + printf "%s\n" "#define HAVE_BCOPY 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "memfd_create" "ac_cv_func_memfd_create" +if test "x$ac_cv_func_memfd_create" = xyes +then : + printf "%s\n" "#define HAVE_MEMFD_CREATE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "memmove" "ac_cv_func_memmove" +if test "x$ac_cv_func_memmove" = xyes +then : + printf "%s\n" "#define HAVE_MEMMOVE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "mkostemp" "ac_cv_func_mkostemp" +if test "x$ac_cv_func_mkostemp" = xyes +then : + printf "%s\n" "#define HAVE_MKOSTEMP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv" +if test "x$ac_cv_func_secure_getenv" = xyes +then : + printf "%s\n" "#define HAVE_SECURE_GETENV 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "strerror" "ac_cv_func_strerror" +if test "x$ac_cv_func_strerror" = xyes +then : + printf "%s\n" "#define HAVE_STRERROR 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for realpath" >&5 +printf %s "checking for realpath... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main (void) +{ + +char buffer[PATH_MAX]; +realpath(".", buffer); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_REALPATH 1" >>confdefs.h + + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +# Check for the availability of libz (aka zlib) + + for ac_header in zlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default" +if test "x$ac_cv_header_zlib_h" = xyes +then : + printf "%s\n" "#define HAVE_ZLIB_H 1" >>confdefs.h + HAVE_ZLIB_H=1 +fi + +done +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5 +printf %s "checking for gzopen in -lz... " >&6; } +if test ${ac_cv_lib_z_gzopen+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char gzopen (void); +int +main (void) +{ +return gzopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_z_gzopen=yes +else case e in #( + e) ac_cv_lib_z_gzopen=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzopen" >&5 +printf "%s\n" "$ac_cv_lib_z_gzopen" >&6; } +if test "x$ac_cv_lib_z_gzopen" = xyes +then : + HAVE_LIBZ=1 +fi + + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + + for ac_header in bzlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "$ac_includes_default" +if test "x$ac_cv_header_bzlib_h" = xyes +then : + printf "%s\n" "#define HAVE_BZLIB_H 1" >>confdefs.h + HAVE_BZLIB_H=1 +fi + +done +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libbz2" >&5 +printf %s "checking for libbz2... " >&6; } +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef HAVE_BZLIB_H +#include +#endif +int +main (void) +{ +return (int)BZ2_bzopen("conftest", "rb"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; };HAVE_LIBBZ2=1; break; +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + for ac_header in readline/readline.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_readline_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_READLINE_H 1" >>confdefs.h + HAVE_READLINE_H=1 +fi + +done + for ac_header in readline/history.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_history_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_HISTORY_H 1" >>confdefs.h + HAVE_HISTORY_H=1 +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lreadline" +else case e in #( + e) unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltinfo $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltinfo" +else case e in #( + e) unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lcurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lcurses" +else case e in #( + e) unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncurses" +else case e in #( + e) unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncursesw $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncursesw" +else case e in #( + e) unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltermcap $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else case e in #( + e) ac_cv_lib_readline_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltermcap" +else case e in #( + e) LIBREADLINE="" ;; +esac +fi + ;; +esac +fi + ;; +esac +fi + ;; +esac +fi + ;; +esac +fi + ;; +esac +fi + + + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + for ac_header in editline/readline.h edit/readline/readline.h readline.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 +_ACEOF + + HAVE_LIBEDIT_HEADER=1 + break + +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -ledit" >&5 +printf %s "checking for readline in -ledit... " >&6; } +if test ${ac_cv_lib_edit_readline+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-ledit $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char readline (void); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_edit_readline=yes +else case e in #( + e) ac_cv_lib_edit_readline=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_edit_readline" >&5 +printf "%s\n" "$ac_cv_lib_edit_readline" >&6; } +if test "x$ac_cv_lib_edit_readline" = xyes +then : + LIBEDIT="-ledit" +fi + +fi + +PCRE2_STATIC_CFLAG="" +if test "x$enable_shared" = "xno" ; then + +printf "%s\n" "#define PCRE2_STATIC 1" >>confdefs.h + + PCRE2_STATIC_CFLAG="-DPCRE2_STATIC" +fi + + +PCRE2POSIX_CFLAG="" +if test "x$enable_shared" = "xyes" ; then + PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED" +fi + + +# Here is where PCRE2-specific defines are handled + +if test "$enable_pcre2_8" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_8 /**/" >>confdefs.h + +fi + +if test "$enable_pcre2_16" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_16 /**/" >>confdefs.h + +fi + +if test "$enable_pcre2_32" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_32 /**/" >>confdefs.h + +fi + +if test "$enable_debug" = "yes"; then + +printf "%s\n" "#define PCRE2_DEBUG /**/" >>confdefs.h + +fi + +if test "$enable_percent_zt" = "no"; then + +printf "%s\n" "#define DISABLE_PERCENT_ZT /**/" >>confdefs.h + +else + enable_percent_zt=auto +fi + +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 +printf %s "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_join (void); +int +main (void) +{ +return pthread_join (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 +printf %s "checking whether pthreads work without any flags... " >&6; } + ;; + + -*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5 +printf %s "checking whether pthreads work with $flag... " >&6; } + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + # Extract the first word of "pthread-config", so it can be a program name with args. +set dummy pthread-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ax_pthread_config+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ax_pthread_config"; then + ac_cv_prog_ax_pthread_config="$ax_pthread_config" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ax_pthread_config="yes" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_ax_pthread_config" && ac_cv_prog_ax_pthread_config="no" +fi ;; +esac +fi +ax_pthread_config=$ac_cv_prog_ax_pthread_config +if test -n "$ax_pthread_config"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_config" >&5 +printf "%s\n" "$ax_pthread_config" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5 +printf %s "checking for the pthreads library -l$flag... " >&6; } + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; } +int +main (void) +{ +pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 +printf %s "checking for joinable pthread attribute... " >&6; } + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +int attr = $attr; return attr /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + attr_name=$attr; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5 +printf "%s\n" "$attr_name" >&6; } + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + +printf "%s\n" "#define PTHREAD_CREATE_JOINABLE $attr_name" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5 +printf %s "checking if more special flags are required for pthreads... " >&6; } + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5 +printf "%s\n" "${flag}" >&6; } + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PTHREAD_PRIO_INHERIT" >&5 +printf %s "checking for PTHREAD_PRIO_INHERIT... " >&6; } +if test ${ax_cv_PTHREAD_PRIO_INHERIT+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include +int +main (void) +{ +int i = PTHREAD_PRIO_INHERIT; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_PTHREAD_PRIO_INHERIT=yes +else case e in #( + e) ax_cv_PTHREAD_PRIO_INHERIT=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 +printf "%s\n" "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } + if test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" +then : + +printf "%s\n" "#define HAVE_PTHREAD_PRIO_INHERIT 1" >>confdefs.h + +fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + for ac_prog in xlc_r cc_r +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PTHREAD_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$PTHREAD_CC"; then + ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PTHREAD_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +PTHREAD_CC=$ac_cv_prog_PTHREAD_CC +if test -n "$PTHREAD_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 +printf "%s\n" "$PTHREAD_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PTHREAD_CC" && break +done +test -n "$PTHREAD_CC" || PTHREAD_CC="${CC}" + + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + + + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + +printf "%s\n" "#define HAVE_PTHREAD 1" >>confdefs.h + + : +else + ax_pthread_ok=no + as_fn_error $? "JIT support requires pthreads" "$LINENO" 5 +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + +printf "%s\n" "#define SUPPORT_JIT /**/" >>confdefs.h + +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + +printf "%s\n" "#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 1" >>confdefs.h + +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2GREP_JIT /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5 + fi + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/" >>confdefs.h + + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT /**/" >>confdefs.h + +else + enable_pcre2grep_callout_fork="no" +fi + +if test "$enable_unicode" = "yes"; then + +printf "%s\n" "#define SUPPORT_UNICODE /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_libz" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBZ /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBBZ2 /**/" >>confdefs.h + +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&5 +printf "%s\n" "$as_me: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&2;} + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-bufsize" "$LINENO" 5 + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-max-bufsize" "$LINENO" 5 + fi +fi + + +printf "%s\n" "#define PCRE2GREP_BUFSIZE $with_pcre2grep_bufsize" >>confdefs.h + + + +printf "%s\n" "#define PCRE2GREP_MAX_BUFSIZE $with_pcre2grep_max_bufsize" >>confdefs.h + + +if test "$enable_pcre2test_libedit" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBEDIT /**/" >>confdefs.h + + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBREADLINE /**/" >>confdefs.h + +fi + + +printf "%s\n" "#define NEWLINE_DEFAULT $ac_pcre2_newline_value" >>confdefs.h + + +if test "$enable_bsr_anycrlf" = "yes"; then + +printf "%s\n" "#define BSR_ANYCRLF /**/" >>confdefs.h + +fi + +if test "$enable_never_backslash_C" = "yes"; then + +printf "%s\n" "#define NEVER_BACKSLASH_C /**/" >>confdefs.h + +fi + + +printf "%s\n" "#define LINK_SIZE $with_link_size" >>confdefs.h + + + +printf "%s\n" "#define MAX_VARLOOKBEHIND $with_max_varlookbehind" >>confdefs.h + + + +printf "%s\n" "#define PARENS_NEST_LIMIT $with_parens_nest_limit" >>confdefs.h + + + +printf "%s\n" "#define MATCH_LIMIT $with_match_limit" >>confdefs.h + + +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <>confdefs.h + + + +printf "%s\n" "#define HEAP_LIMIT $with_heap_limit" >>confdefs.h + + + +printf "%s\n" "#define MAX_NAME_SIZE 128" >>confdefs.h + + + +printf "%s\n" "#define MAX_NAME_COUNT 10000" >>confdefs.h + + + + +if test "$enable_ebcdic" = "yes"; then + +printf "%s\n" "#define EBCDIC /**/" >>confdefs.h + +fi + +if test "$enable_ebcdic_nl25" = "yes"; then + +printf "%s\n" "#define EBCDIC_NL25 /**/" >>confdefs.h + +fi + +if test "$enable_valgrind" = "yes"; then + +printf "%s\n" "#define SUPPORT_VALGRIND /**/" >>confdefs.h + +fi + +# Platform specific issues +NO_UNDEFINED= +EXPORT_ALL_SYMBOLS= +case $host_os in + cygwin* | mingw* ) + if test X"$enable_shared" = Xyes; then + NO_UNDEFINED="-no-undefined" + EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols" + fi + ;; +esac + +# The extra LDFLAGS for each particular library. The libpcre2*_version values +# are m4 variables, assigned above. + +EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \ + $NO_UNDEFINED -version-info 13:0:13" + +EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \ + $NO_UNDEFINED -version-info 13:0:13" + +EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \ + $NO_UNDEFINED -version-info 13:0:13" + +EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ + $NO_UNDEFINED -version-info 3:5:0" + + + + + + +# When we run 'make distcheck', use these arguments. Turning off compiler +# optimization makes it run faster. +DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit" + + +# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is +# specified, the relevant library is available. + +if test "$enable_pcre2grep_libz" = "yes"; then + if test "$HAVE_ZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBZ" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because libz was not found" + exit 1 + fi + LIBZ="-lz" +fi + + +if test "$enable_pcre2grep_libbz2" = "yes"; then + if test "$HAVE_BZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBBZ2" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found" + exit 1 + fi + LIBBZ2="-lbz2" +fi + + +# Similarly for --enable-pcre2test-readline + +if test "$enable_pcre2test_libedit" = "yes"; then + if test "$enable_pcre2test_libreadline" = "yes"; then + echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" + exit 1 + fi + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." + exit 1 + fi + if test -z "$LIBEDIT"; then + echo "** Cannot --enable-pcre2test-libedit because libedit library was not found." + exit 1 + fi +fi + +if test "$enable_pcre2test_libreadline" = "yes"; then + if test "$HAVE_READLINE_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found." + exit 1 + fi + if test "$HAVE_HISTORY_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found." + exit 1 + fi + if test -z "$LIBREADLINE"; then + echo "** Cannot --enable-pcre2test-readline because readline library was not found." + exit 1 + fi +fi + +# Handle valgrind support + +if test "$enable_valgrind" = "yes"; then + + + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +printf "%s\n" "$PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + PKG_CONFIG="" + fi +fi + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for valgrind" >&5 +printf %s "checking for valgrind... " >&6; } + +if test -n "$VALGRIND_CFLAGS"; then + pkg_cv_VALGRIND_CFLAGS="$VALGRIND_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_CFLAGS=`$PKG_CONFIG --cflags "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$VALGRIND_LIBS"; then + pkg_cv_VALGRIND_LIBS="$VALGRIND_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_LIBS=`$PKG_CONFIG --libs "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "valgrind" 2>&1` + else + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "valgrind" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$VALGRIND_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (valgrind) were not met: + +$VALGRIND_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See 'config.log' for more details" "$LINENO" 5; } +else + VALGRIND_CFLAGS=$pkg_cv_VALGRIND_CFLAGS + VALGRIND_LIBS=$pkg_cv_VALGRIND_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +fi +fi + +# Handle code coverage reporting support +if test "$enable_coverage" = "yes"; then + if test "x$GCC" != "xyes"; then + as_fn_error $? "Code coverage reports can only be generated when using GCC" "$LINENO" 5 + fi + + # ccache is incompatible with gcov + # Extract the first word of "shtool", so it can be a program name with args. +set dummy shtool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_SHTOOL+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $SHTOOL in + [\\/]* | ?:[\\/]*) + ac_cv_path_SHTOOL="$SHTOOL" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_SHTOOL="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_SHTOOL" && ac_cv_path_SHTOOL="false" + ;; +esac ;; +esac +fi +SHTOOL=$ac_cv_path_SHTOOL +if test -n "$SHTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SHTOOL" >&5 +printf "%s\n" "$SHTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + case `$SHTOOL path $CC` in + *ccache*) cc_ccache=yes;; + *) cc_ccache=no;; + esac + + if test "$cc_ccache" = "yes"; then + if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then + as_fn_error $? "must export CCACHE_DISABLE=1 to disable ccache for code coverage" "$LINENO" 5 + fi + fi + + + # Extract the first word of "lcov", so it can be a program name with args. +set dummy lcov; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_LCOV+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $LCOV in + [\\/]* | ?:[\\/]*) + ac_cv_path_LCOV="$LCOV" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_LCOV="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_LCOV" && ac_cv_path_LCOV="false" + ;; +esac ;; +esac +fi +LCOV=$ac_cv_path_LCOV +if test -n "$LCOV"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LCOV" >&5 +printf "%s\n" "$LCOV" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$LCOV" = "xfalse"; then + as_fn_error $? "lcov not found" "$LINENO" 5 + fi + + + # Extract the first word of "genhtml", so it can be a program name with args. +set dummy genhtml; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_GENHTML+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $GENHTML in + [\\/]* | ?:[\\/]*) + ac_cv_path_GENHTML="$GENHTML" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_GENHTML="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_GENHTML" && ac_cv_path_GENHTML="false" + ;; +esac ;; +esac +fi +GENHTML=$ac_cv_path_GENHTML +if test -n "$GENHTML"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GENHTML" >&5 +printf "%s\n" "$GENHTML" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$GENHTML" = "xfalse"; then + as_fn_error $? "genhtml not found" "$LINENO" 5 + fi + + # Set flags needed for gcov + GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage" + GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage" + GCOV_LIBS="-lgcov" + + + +fi # enable_coverage + + if test "x$enable_coverage" = "xyes"; then + WITH_GCOV_TRUE= + WITH_GCOV_FALSE='#' +else + WITH_GCOV_TRUE='#' + WITH_GCOV_FALSE= +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Intel CET is enabled" >&5 +printf %s "checking whether Intel CET is enabled... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __CET__ +# error CET is not enabled +#endif + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_intel_cet_enabled=yes +else case e in #( + e) pcre2_cc_cv_intel_cet_enabled=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_intel_cet_enabled" >&5 +printf "%s\n" "$pcre2_cc_cv_intel_cet_enabled" >&6; } +if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then + CET_CFLAGS="-mshstk" + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds. +# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here. + + +# Produce these files, in addition to config.h. + +ac_config_files="$ac_config_files Makefile libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc libpcre2-posix.pc pcre2-config src/pcre2.h" + + +# Make the generated script files executable. +ac_config_commands="$ac_config_commands script-chmod" + + +# Make sure that pcre2_chartables.c is removed in case the method for +# creating it was changed by reconfiguration. +ac_config_commands="$ac_config_commands delete-old-chartables" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# 'ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* 'ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # 'set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # 'set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +printf %s "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5 +printf "%s\n" "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +# Check whether --enable-year2038 was given. +if test ${enable_year2038+y} +then : + enableval=$enable_year2038; +fi + +if test -z "${WITH_PCRE2_8_TRUE}" && test -z "${WITH_PCRE2_8_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_8\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_PCRE2_16_TRUE}" && test -z "${WITH_PCRE2_16_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_16\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_PCRE2_32_TRUE}" && test -z "${WITH_PCRE2_32_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_32\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_DEBUG_TRUE}" && test -z "${WITH_DEBUG_FALSE}"; then + as_fn_error $? "conditional \"WITH_DEBUG\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_REBUILD_CHARTABLES_TRUE}" && test -z "${WITH_REBUILD_CHARTABLES_FALSE}"; then + as_fn_error $? "conditional \"WITH_REBUILD_CHARTABLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_JIT_TRUE}" && test -z "${WITH_JIT_FALSE}"; then + as_fn_error $? "conditional \"WITH_JIT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_UNICODE_TRUE}" && test -z "${WITH_UNICODE_FALSE}"; then + as_fn_error $? "conditional \"WITH_UNICODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_VALGRIND_TRUE}" && test -z "${WITH_VALGRIND_FALSE}"; then + as_fn_error $? "conditional \"WITH_VALGRIND\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_DIFF_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_DIFF_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_DIFF_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_GCOV_TRUE}" && test -z "${WITH_GCOV_FALSE}"; then + as_fn_error $? "conditional \"WITH_GCOV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else case e in #( + e) case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as 'sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else case e in #( + e) as_fn_append () + { + eval $1=\$$1\$2 + } ;; +esac +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else case e in #( + e) as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } ;; +esac +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable. + # In both cases, we have to default to 'cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g" +as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated + +# Sed expression to map a string onto a valid variable name. +as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g" +as_tr_sh="eval sed '$as_sed_sh'" # deprecated + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by PCRE2 $as_me 10.44, which was +generated by GNU Autoconf 2.72. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +'$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config='$ac_cs_config_escaped' +ac_cs_version="\\ +PCRE2 config.status 10.44 +configured by $0, generated by GNU Autoconf 2.72, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2023 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + printf "%s\n" "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + printf "%s\n" "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: '$1' +Try '$0 --help' for more information.";; + --help | --hel | -h ) + printf "%s\n" "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: '$1' +Try '$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + printf "%s\n" "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +FILECMD='`$ECHO "$FILECMD" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +lt_ar_flags='`$ECHO "$lt_ar_flags" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_import='`$ECHO "$lt_cv_sys_global_symbol_to_import" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +lt_cv_nm_interface='`$ECHO "$lt_cv_nm_interface" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +lt_cv_truncate_bin='`$ECHO "$lt_cv_truncate_bin" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +configure_time_dlsearch_path='`$ECHO "$configure_time_dlsearch_path" | $SED "$delay_single_quote_subst"`' +configure_time_lt_sys_library_path='`$ECHO "$configure_time_lt_sys_library_path" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in AS \ +DLLTOOL \ +OBJDUMP \ +SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +FILECMD \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +sharedlib_from_linklib_cmd \ +AR \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_import \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +lt_cv_nm_interface \ +nm_file_list_spec \ +lt_cv_truncate_bin \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +configure_time_dlsearch_path \ +configure_time_lt_sys_library_path; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' + +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile' + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "src/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "libpcre2-8.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-8.pc" ;; + "libpcre2-16.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-16.pc" ;; + "libpcre2-32.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-32.pc" ;; + "libpcre2-posix.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-posix.pc" ;; + "pcre2-config") CONFIG_FILES="$CONFIG_FILES pcre2-config" ;; + "src/pcre2.h") CONFIG_FILES="$CONFIG_FILES src/pcre2.h" ;; + "script-chmod") CONFIG_COMMANDS="$CONFIG_COMMANDS script-chmod" ;; + "delete-old-chartables") CONFIG_COMMANDS="$CONFIG_COMMANDS delete-old-chartables" ;; + + *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers + test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to '$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with './config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with './config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script 'defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain ':'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is 'configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`printf "%s\n" "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when '$srcdir' = '.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +printf "%s\n" "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE=\"gmake\" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See 'config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +# Copyright (C) 2024 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# The names of the tagged configurations supported by this script. +available_tags='' + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Assembler program. +AS=$lt_AS + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Object dumper program. +OBJDUMP=$lt_OBJDUMP + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shared archive member basename,for filename based shared library versioning on AIX. +shared_archive_member_spec=$shared_archive_member_spec + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# A file(cmd) program that detects file types. +FILECMD=$lt_FILECMD + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive (by configure). +lt_ar_flags=$lt_ar_flags + +# Flags to create an archive. +AR_FLAGS=\${ARFLAGS-"\$lt_ar_flags"} + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm into a list of symbols to manually relocate. +global_symbol_to_import=$lt_lt_cv_sys_global_symbol_to_import + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# The name lister interface. +nm_interface=$lt_lt_cv_nm_interface + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and where our libraries should be installed. +lt_sysroot=$lt_sysroot + +# Command to truncate a binary pipe. +lt_truncate_bin=$lt_lt_cv_truncate_bin + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Detected run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_configure_time_dlsearch_path + +# Explicit LT_SYS_LIBRARY_PATH set during ./configure time. +configure_time_lt_sys_library_path=$lt_configure_time_lt_sys_library_path + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e. impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + + +ltmain=$ac_aux_dir/ltmain.sh + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + ;; + "script-chmod":C) chmod a+x pcre2-config ;; + "delete-old-chartables":C) rm -f pcre2_chartables.c ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + +# --disable-stack-for-recursion is obsolete and has no effect. + +if test "$enable_stack_for_recursion" = "no"; then +cat < + #endif + #include + + int a, b; + size_t m; + ]], [[__builtin_mul_overflow(a, b, &m)]])], + [pcre2_cc_cv_builtin_mul_overflow=yes], + [pcre2_cc_cv_builtin_mul_overflow=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow]) +if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then + AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1, + [Define this if your compiler provides __builtin_mul_overflow()]) +fi +AC_LANG_POP([C]) + +# Check for Clang __attribute__((uninitialized)) feature + +AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_LANG_PUSH([C]) +tmp_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, + [[char buf[128] __attribute__((uninitialized));(void)buf]])], + [pcre2_cc_cv_attribute_uninitialized=yes], + [pcre2_cc_cv_attribute_uninitialized=no]) +AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler + supports __attribute__((uninitialized))]) +fi +CFLAGS=$tmp_CFLAGS +AC_LANG_POP([C]) + +# Versioning + +PCRE2_MAJOR="pcre2_major" +PCRE2_MINOR="pcre2_minor" +PCRE2_PRERELEASE="pcre2_prerelease" +PCRE2_DATE="pcre2_date" + +if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" +then + echo "***" + echo "*** Minor version number $PCRE2_MINOR must not be used. ***" + echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***" + echo "***" + exit 1 +fi + +AC_SUBST(PCRE2_MAJOR) +AC_SUBST(PCRE2_MINOR) +AC_SUBST(PCRE2_PRERELEASE) +AC_SUBST(PCRE2_DATE) + +# Set a more sensible default value for $(htmldir). +if test "x$htmldir" = 'x${docdir}' +then + htmldir='${docdir}/html' +fi + +# Force an error for PCRE1 size options +AC_ARG_ENABLE(pcre8,,,enable_pcre8=no) +AC_ARG_ENABLE(pcre16,,,enable_pcre16=no) +AC_ARG_ENABLE(pcre32,,,enable_pcre32=no) + +if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" +then + echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]" + exit 1 +fi + +# Handle --disable-pcre2-8 (enabled by default) +AC_ARG_ENABLE(pcre2-8, + AS_HELP_STRING([--disable-pcre2-8], + [disable 8 bit character support]), + , enable_pcre2_8=unset) +AC_SUBST(enable_pcre2_8) + +# Handle --enable-pcre2-16 (disabled by default) +AC_ARG_ENABLE(pcre2-16, + AS_HELP_STRING([--enable-pcre2-16], + [enable 16 bit character support]), + , enable_pcre2_16=unset) +AC_SUBST(enable_pcre2_16) + +# Handle --enable-pcre2-32 (disabled by default) +AC_ARG_ENABLE(pcre2-32, + AS_HELP_STRING([--enable-pcre2-32], + [enable 32 bit character support]), + , enable_pcre2_32=unset) +AC_SUBST(enable_pcre2_32) + +# Handle --enable-debug (disabled by default) +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--enable-debug], + [enable debugging code]), + , enable_debug=no) + +# Handle --enable-jit (disabled by default) +AC_ARG_ENABLE(jit, + AS_HELP_STRING([--enable-jit], + [enable Just-In-Time compiling support]), + , enable_jit=no) + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + AC_LANG(C) + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #define SLJIT_CONFIG_AUTO 1 + #include "src/sljit/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif]])], enable_jit=yes, enable_jit=no) + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + AC_ARG_ENABLE(jit-sealloc, + AS_HELP_STRING([--enable-jit-sealloc], + [enable SELinux compatible execmem allocator in JIT (experimental)]), + ,enable_jit_sealloc=no) + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +AC_ARG_ENABLE(pcre2grep-jit, + AS_HELP_STRING([--disable-pcre2grep-jit], + [disable JIT support in pcre2grep]), + , enable_pcre2grep_jit=yes) + +# Handle --disable-pcre2grep-callout (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout-fork, + AS_HELP_STRING([--disable-pcre2grep-callout-fork], + [disable callout script fork support in pcre2grep]), + , enable_pcre2grep_callout_fork=yes) + +# Handle --enable-rebuild-chartables +AC_ARG_ENABLE(rebuild-chartables, + AS_HELP_STRING([--enable-rebuild-chartables], + [rebuild character tables in current locale]), + , enable_rebuild_chartables=no) + +# Handle --disable-unicode (enabled by default) +AC_ARG_ENABLE(unicode, + AS_HELP_STRING([--disable-unicode], + [disable Unicode support]), + , enable_unicode=unset) + +# Handle newline options +ac_pcre2_newline=lf +AC_ARG_ENABLE(newline-is-cr, + AS_HELP_STRING([--enable-newline-is-cr], + [use CR as newline character]), + ac_pcre2_newline=cr) +AC_ARG_ENABLE(newline-is-lf, + AS_HELP_STRING([--enable-newline-is-lf], + [use LF as newline character (default)]), + ac_pcre2_newline=lf) +AC_ARG_ENABLE(newline-is-crlf, + AS_HELP_STRING([--enable-newline-is-crlf], + [use CRLF as newline sequence]), + ac_pcre2_newline=crlf) +AC_ARG_ENABLE(newline-is-anycrlf, + AS_HELP_STRING([--enable-newline-is-anycrlf], + [use CR, LF, or CRLF as newline sequence]), + ac_pcre2_newline=anycrlf) +AC_ARG_ENABLE(newline-is-any, + AS_HELP_STRING([--enable-newline-is-any], + [use any valid Unicode newline sequence]), + ac_pcre2_newline=any) +AC_ARG_ENABLE(newline-is-nul, + AS_HELP_STRING([--enable-newline-is-nul], + [use NUL (binary zero) as newline character]), + ac_pcre2_newline=nul) +enable_newline="$ac_pcre2_newline" + +# Handle --enable-bsr-anycrlf +AC_ARG_ENABLE(bsr-anycrlf, + AS_HELP_STRING([--enable-bsr-anycrlf], + [\R matches only CR, LF, CRLF by default]), + , enable_bsr_anycrlf=no) + +# Handle --enable-never-backslash-C +AC_ARG_ENABLE(never-backslash-C, + AS_HELP_STRING([--enable-never-backslash-C], + [use of \C causes an error]), + , enable_never_backslash_C=no) + +# Handle --enable-ebcdic +AC_ARG_ENABLE(ebcdic, + AS_HELP_STRING([--enable-ebcdic], + [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + , enable_ebcdic=no) + +# Handle --enable-ebcdic-nl25 +AC_ARG_ENABLE(ebcdic-nl25, + AS_HELP_STRING([--enable-ebcdic-nl25], + [set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]), + , enable_ebcdic_nl25=no) + +# Handle --enable-pcre2grep-libz +AC_ARG_ENABLE(pcre2grep-libz, + AS_HELP_STRING([--enable-pcre2grep-libz], + [link pcre2grep with libz to handle .gz files]), + , enable_pcre2grep_libz=no) + +# Handle --enable-pcre2grep-libbz2 +AC_ARG_ENABLE(pcre2grep-libbz2, + AS_HELP_STRING([--enable-pcre2grep-libbz2], + [link pcre2grep with libbz2 to handle .bz2 files]), + , enable_pcre2grep_libbz2=no) + +# Handle --with-pcre2grep-bufsize=N +AC_ARG_WITH(pcre2grep-bufsize, + AS_HELP_STRING([--with-pcre2grep-bufsize=N], + [pcre2grep initial buffer size (default=20480, minimum=8192)]), + , with_pcre2grep_bufsize=20480) + +# Handle --with-pcre2grep-max-bufsize=N +AC_ARG_WITH(pcre2grep-max-bufsize, + AS_HELP_STRING([--with-pcre2grep-max-bufsize=N], + [pcre2grep maximum buffer size (default=1048576, minimum=8192)]), + , with_pcre2grep_max_bufsize=1048576) + +# Handle --enable-pcre2test-libedit +AC_ARG_ENABLE(pcre2test-libedit, + AS_HELP_STRING([--enable-pcre2test-libedit], + [link pcre2test with libedit]), + , enable_pcre2test_libedit=no) + +# Handle --enable-pcre2test-libreadline +AC_ARG_ENABLE(pcre2test-libreadline, + AS_HELP_STRING([--enable-pcre2test-libreadline], + [link pcre2test with libreadline]), + , enable_pcre2test_libreadline=no) + +# Handle --with-link-size=N +AC_ARG_WITH(link-size, + AS_HELP_STRING([--with-link-size=N], + [internal link size (2, 3, or 4 allowed; default=2)]), + , with_link_size=2) + +# Handle --with-max-varlookbehind=N +AC_ARG_WITH(max-varlookbehind, + AS_HELP_STRING([--with-max-varlookbehind=N], + [maximum length of variable lookbehind (default=255)]), + , with_max_varlookbehind=255) + +# Handle --with-parens-nest-limit=N +AC_ARG_WITH(parens-nest-limit, + AS_HELP_STRING([--with-parens-nest-limit=N], + [nested parentheses limit (default=250)]), + , with_parens_nest_limit=250) + +# Handle --with-heap-limit +AC_ARG_WITH(heap-limit, + AS_HELP_STRING([--with-heap-limit=N], + [default limit on heap memory (kibibytes, default=20000000)]), + , with_heap_limit=20000000) + +# Handle --with-match-limit=N +AC_ARG_WITH(match-limit, + AS_HELP_STRING([--with-match-limit=N], + [default limit on internal looping (default=10000000)]), + , with_match_limit=10000000) + +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion +# +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). +# +AC_ARG_WITH(match-limit-depth, + AS_HELP_STRING([--with-match-limit-depth=N], + [default limit on match tree depth (default=MATCH_LIMIT)]), + , with_match_limit_depth=MATCH_LIMIT) + +AC_ARG_WITH(match-limit-recursion,, + , with_match_limit_recursion=UNSET) + +# Handle --enable-valgrind +AC_ARG_ENABLE(valgrind, + AS_HELP_STRING([--enable-valgrind], + [enable valgrind support]), + , enable_valgrind=no) + +# Enable code coverage reports using gcov +AC_ARG_ENABLE(coverage, + AS_HELP_STRING([--enable-coverage], + [enable code coverage reports using gcov]), + , enable_coverage=no) + +# Handle --enable-fuzz-support +AC_ARG_ENABLE(fuzz_support, + AS_HELP_STRING([--enable-fuzz-support], + [enable fuzzer support]), + , enable_fuzz_support=no) + +# Handle --enable-diff-fuzz-support +AC_ARG_ENABLE(diff_fuzz_support, + AS_HELP_STRING([--enable-diff-fuzz-support], + [enable differential fuzzer support]), + , enable_diff_fuzz_support=no) + +# Handle --disable-stack-for-recursion +# This option became obsolete at release 10.30. +AC_ARG_ENABLE(stack-for-recursion,, + , enable_stack_for_recursion=yes) + +# Original code +# AC_ARG_ENABLE(stack-for-recursion, +# AS_HELP_STRING([--disable-stack-for-recursion], +# [don't use stack recursion when matching]), +# , enable_stack_for_recursion=yes) + +# Handle --disable-percent_zt (set as "auto" by default) +AC_ARG_ENABLE(percent-zt, + AS_HELP_STRING([--disable-percent-zt], + [disable the use of z and t formatting modifiers]), + , enable_percent_zt=auto) + +# Set the default value for pcre2-8 +if test "x$enable_pcre2_8" = "xunset" +then + enable_pcre2_8=yes +fi + +# Set the default value for pcre2-16 +if test "x$enable_pcre2_16" = "xunset" +then + enable_pcre2_16=no +fi + +# Set the default value for pcre2-32 +if test "x$enable_pcre2_32" = "xunset" +then + enable_pcre2_32=no +fi + +# Make sure at least one library is selected +if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono" +then + AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled]) +fi + +# Unicode is enabled by default. +if test "x$enable_unicode" = "xunset" +then + enable_unicode=yes +fi + +# Convert the newline identifier into the appropriate integer value. These must +# agree with the PCRE2_NEWLINE_xxx values in pcre2.h. + +case "$enable_newline" in + cr) ac_pcre2_newline_value=1 ;; + lf) ac_pcre2_newline_value=2 ;; + crlf) ac_pcre2_newline_value=3 ;; + any) ac_pcre2_newline_value=4 ;; + anycrlf) ac_pcre2_newline_value=5 ;; + nul) ac_pcre2_newline_value=6 ;; + *) + AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option]) + ;; +esac + +# --enable-ebcdic-nl25 implies --enable-ebcdic +if test "x$enable_ebcdic_nl25" = "xyes"; then + enable_ebcdic=yes +fi + +# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. +# Also check that UTF support is not requested, because PCRE2 cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. +# +if test "x$enable_ebcdic" = "xyes"; then + enable_rebuild_chartables=yes + if test "x$enable_unicode" = "xyes"; then + AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time]) + fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library]) + fi +fi + +# Check argument to --with-link-size +case "$with_link_size" in + 2|3|4) ;; + *) + AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option]) + ;; +esac + +AH_TOP([ +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */]) + +# Checks for header files. +AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) +AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) +AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) + +# Conditional compilation +AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes") +AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes") +AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") +AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") +AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes") +AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") +AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes") +AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes") + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [ + Define to any value to enable differential fuzzing support.]) +fi + +# Checks for typedefs, structures, and compiler characteristics. + +AC_C_CONST +AC_TYPE_SIZE_T + +# Checks for library functions. + +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) +AC_MSG_CHECKING([for realpath]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +]],[[ +char buffer[PATH_MAX]; +realpath(".", buffer); +]])], +[AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_REALPATH], 1, + [Define to 1 if you have the `realpath' function.]) +], +AC_MSG_RESULT([no])) + +# Check for the availability of libz (aka zlib) + +AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1]) +AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1]) + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + +AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1]) +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +AC_MSG_CHECKING([for libbz2]) +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#ifdef HAVE_BZLIB_H +#include +#endif]], +[[return (int)BZ2_bzopen("conftest", "rb");]])], +[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;], +AC_MSG_RESULT([no])) +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1]) + AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1]) + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"], + [LIBREADLINE=""], + [-ltermcap])], + [-lncursesw])], + [-lncurses])], + [-lcurses])], + [-ltinfo])]) + AC_SUBST(LIBREADLINE) + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [ + HAVE_LIBEDIT_HEADER=1 + break + ]) + AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"]) +fi + +PCRE2_STATIC_CFLAG="" +if test "x$enable_shared" = "xno" ; then + AC_DEFINE([PCRE2_STATIC], [1], [ + Define to any value if linking statically (TODO: make nice with Libtool)]) + PCRE2_STATIC_CFLAG="-DPCRE2_STATIC" +fi +AC_SUBST(PCRE2_STATIC_CFLAG) + +PCRE2POSIX_CFLAG="" +if test "x$enable_shared" = "xyes" ; then + PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED" +fi +AC_SUBST(PCRE2POSIX_CFLAG) + +# Here is where PCRE2-specific defines are handled + +if test "$enable_pcre2_8" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_8], [], [ + Define to any value to enable the 8 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_16" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_16], [], [ + Define to any value to enable the 16 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_32" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_32], [], [ + Define to any value to enable the 32 bit PCRE2 library.]) +fi + +if test "$enable_debug" = "yes"; then + AC_DEFINE([PCRE2_DEBUG], [], [ + Define to any value to include debugging code.]) +fi + +if test "$enable_percent_zt" = "no"; then + AC_DEFINE([DISABLE_PERCENT_ZT], [], [ + Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed).]) +else + enable_percent_zt=auto +fi + +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])]) + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + AC_DEFINE([SUPPORT_JIT], [], [ + Define to any value to enable support for Just-In-Time compiling.]) +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [ + Define to any non-zero number to enable support for SELinux + compatible executable memory allocator in JIT. Note that this + will have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [ + Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [ + Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also + defined.]) + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) +else + enable_pcre2grep_callout_fork="no" +fi + +if test "$enable_unicode" = "yes"; then + AC_DEFINE([SUPPORT_UNICODE], [], [ + Define to any value to enable support for Unicode and UTF encoding. + This will work even in an EBCDIC environment, but it is incompatible + with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC + code *or* ASCII/Unicode, but not both at once.]) +fi + +if test "$enable_pcre2grep_libz" = "yes"; then + AC_DEFINE([SUPPORT_LIBZ], [], [ + Define to any value to allow pcre2grep to be linked with libz, so that it is + able to handle .gz files.]) +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + AC_DEFINE([SUPPORT_LIBBZ2], [], [ + Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files.]) +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192]) + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize]) + fi +fi + +AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ + The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very + long lines. The actual amount of memory used by pcre2grep is three times this + number, because it allows for the buffering of "before" and "after" lines.]) + +AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [ + The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines.]) + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_DEFINE([SUPPORT_LIBEDIT], [], [ + Define to any value to allow pcre2test to be linked with libedit.]) + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + AC_DEFINE([SUPPORT_LIBREADLINE], [], [ + Define to any value to allow pcre2test to be linked with libreadline.]) +fi + +AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ + The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), + 5 (ANYCRLF), and 6 (NUL).]) + +if test "$enable_bsr_anycrlf" = "yes"; then + AC_DEFINE([BSR_ANYCRLF], [], [ + By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime.]) +fi + +if test "$enable_never_backslash_C" = "yes"; then + AC_DEFINE([NEVER_BACKSLASH_C], [], [ + Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.]) +fi + +AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [ + The value of LINK_SIZE determines the number of bytes used to store + links as offsets within the compiled regex. The default is 2, which + allows for compiled patterns up to 65535 code units long. This covers the + vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4 + bytes instead. This allows for longer patterns in extreme cases.]) + +AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [ + The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion.]) + +AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [ + The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern.]) + +AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ + The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases.]) + +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt new file mode 100644 index 0000000..851976a --- /dev/null +++ b/doc/html/NON-AUTOTOOLS-BUILD.txt @@ -0,0 +1,430 @@ +Building PCRE2 without using autotools +-------------------------------------- + +This document contains the following sections: + + General + Generic instructions for the PCRE2 C libraries + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE2 on Windows with CMake + Building PCRE2 on Windows with Visual Studio + Testing with RunTest.bat + Building PCRE2 on native z/OS and z/VM + Building PCRE2 under VMS + + +GENERAL + +The source of the PCRE2 libraries consists entirely of code written in Standard +C, and so should compile successfully on any system that has a Standard C +compiler and library. + +The PCRE2 distribution includes a "configure" file for use by the +configure/make (autotools) build system, as found in many Unix-like +environments. The README file contains information about the options for +"configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE2 on Windows with CMake" below. + +Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs +under the names src/config.h.generic and src/pcre2.h.generic. These are +provided for those who build PCRE2 without using "configure" or CMake. If you +use "configure" or CMake, the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES + +There are three possible PCRE2 libraries, each handling data with a specific +code unit width: 8, 16, or 32 bits. You can build any combination of them. The +following are generic instructions for building a PCRE2 C library "by hand". If +you are going to use CMake, this section does not apply to you; you can skip +ahead to the CMake section. Note that the settings concerned with 8-bit, +16-bit, and 32-bit code units relate to the type of data string that PCRE2 +processes. They are NOT referring to the underlying operating system bit width. +You do not have to do anything special to compile in a 64-bit environment, for +example. + + (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the + macro settings that it contains to whatever is appropriate for your + environment. In particular, you can alter the definition of the NEWLINE + macro to specify what character(s) you want to be interpreted as line + terminators by default. You need to #define at least one of + SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which + libraries you are going to build. You must set all that apply. + + When you subsequently compile any of the PCRE2 modules, you must specify + -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the + sources. + + An alternative approach is not to edit src/config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in src/config.h are used has changed between releases. (In the + configure/make world, this is handled automatically.) When upgrading to a + new release, you are strongly advised to review src/config.h.generic + before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_DLFCN_H). + + (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. + + (3) EITHER: + Copy or rename file src/pcre2_chartables.c.dist as + src/pcre2_chartables.c. + + OR: + Compile src/pcre2_dftables.c as a stand-alone program (using + -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with + the single argument "src/pcre2_chartables.c". This generates a set of + standard character tables and writes them to that file. The tables are + generated using the default C locale for your system. If you want to use + a locale that is specified by LC_xxx environment variables, add the -L + option to the pcre2_dftables command. You must use this method if you + are building on a system that uses EBCDIC code. + + The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can + specify alternative tables at run time. + + (4) For a library that supports 8-bit code units in the character strings that + it processes, compile the following source files from the src directory, + setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set + -DHAVE_CONFIG_H if you have set up src/config.h with your configuration, + or else use other -D settings to change the configuration as required. + + pcre2_auto_possess.c + pcre2_chkdint.c + pcre2_chartables.c + pcre2_compile.c + pcre2_config.c + pcre2_context.c + pcre2_convert.c + pcre2_dfa_match.c + pcre2_error.c + pcre2_extuni.c + pcre2_find_bracket.c + pcre2_jit_compile.c + pcre2_maketables.c + pcre2_match.c + pcre2_match_data.c + pcre2_newline.c + pcre2_ord2utf.c + pcre2_pattern_info.c + pcre2_script_run.c + pcre2_serialize.c + pcre2_string_utils.c + pcre2_study.c + pcre2_substitute.c + pcre2_substring.c + pcre2_tables.c + pcre2_ucd.c + pcre2_valid_utf.c + pcre2_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE2 header files are first + sought in the src directory under the current directory. Otherwise you run + the risk of picking up a previously-installed file from somewhere else. + + Note that you must compile pcre2_jit_compile.c, even if you have not + defined SUPPORT_JIT in src/config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre2_jit_compile.c #includes other files from the sljit subdirectory, + all of whose names begin with "sljit". It also #includes + src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile + those yourself. + + Note also that the pcre2_fuzzsupport.c file contains special code that is + useful to those who want to run fuzzing tests on the PCRE2 library. Unless + you are doing that, you can ignore it. + + (5) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the PCRE2 C 8-bit library, + typically called something like libpcre2-8. If your system has static and + shared libraries, you may have to do this once for each type. + + (6) If you want to build a library that supports 16-bit or 32-bit code units, + set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4 + above. If you want to build more than one PCRE2 library, repeat steps 4 + and 5 as necessary. + + (7) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the src/pcre2posix.h file and then + compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix + library. If targeting a DLL in Windows, make sure to include + -DPCRE2POSIX_SHARED with your compiler flags. + + (8) The pcre2test program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you specfied in + src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if + necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the + appropriate library/ies. If you compiled an 8-bit library, pcre2test also + needs the pcre2posix wrapper library. + + (9) Run pcre2test on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE2" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcre2test with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for Unicode support, and will not run if you have + built PCRE2 without it. See the comments at the start of each testinput + file. If you have a suitable Unix-like shell, the RunTest script will run + the appropriate tests for you. The command "RunTest list" will output a + list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. + +(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested + by running pcre2test with the -jit option. This is done automatically by + the RunTest script. You might also like to build and run the freestanding + JIT test program, src/pcre2_jit_test.c. + +(11) The pcre2test program tests the POSIX wrapper library, but there is also a + freestanding test program in src/pcre2posix_test.c. It must be linked with + both the pcre2posix library and the 8-bit PCRE2 library. + +(12) If you want to use the pcre2grep command, compile and link + src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need + the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +Prior to release 10.30 the default system stack size of 1MiB in some Windows +environments caused issues with some tests. This should no longer be the case +for 10.30 and later releases. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE2 library in the form of +a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in a +PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE2 using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE2 under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + +On both MinGW and Cygwin, PCRE2 should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre2-8 and libpcre2-posix. These +are independent libraries: when you link with libpcre2-posix you must also link +with libpcre2-8, which contains the basic functions. + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE2 to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE2 are in UNIX format, with LF +characters as line terminators. Unless your PCRE2 library uses a default +newline option that includes LF as a valid newline, it may be necessary to +change the line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE2 ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE2 source and build +directories. + +The following instructions were contributed by a PCRE1 user, but they should +also work for PCRE2. If they are not followed exactly, errors may occur. In the +event that errors do occur, it is recommended that you delete the CMake cache +before attempting to repeat the CMake build process. In the CMake GUI, the +cache can be deleted by selecting "File > Delete Cache". + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. + +4. Run cmake-gui from the Shell environment of your build tool, for example, + Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try + to start Cmake from the Windows Start menu, as this can lead to errors. + +5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. + +6. Hit the "Configure" button. + +7. Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + +8. The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. + +9. Hit "Configure" again. The adjacent "Generate" button should now be + active. + +10. Hit "Generate". + +11. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + +12. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + +BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO + +The code currently cannot be compiled without an inttypes.h header, which is +available only with Visual Studio 2013 or newer. However, this portable and +permissively-licensed implementation of the stdint.h header could be used as an +alternative: + + http://www.azillionmonkeys.com/qed/pstdint.h + +Just rename it and drop it into the top level of the build tree. + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. The pcre2_test.bat script runs RunTest.bat with correct source and +exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe + have been created. + +2. Edit RunTest.bat to identify the full or relative location of + the pcre2 source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre2\pcre2-10.00 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre2_jit_test.exe. + + +BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment it should be possible to build PCRE2 in the same way as in other +systems, with the EBCDIC related configuration settings, but it is not known if +anybody has tried this. + +In native z/OS (without UNIX System Services) and in z/VM, special ports are +required. For details, please see file 939 on this web site: + + http://www.cbttape.org + +Everything in that location, source and executable, is in EBCDIC and native +z/OS file formats. The port provides an API for LE languages such as COBOL and +for the z/OS and z/VM versions of the Rexx languages. + + +BUILDING PCRE2 UNDER VMS + +Alexey Chuphin has contributed some auxiliary files for building PCRE2 under +OpenVMS. They are in the "vms" directory in the distribution tarball. Please +read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep +programs contain some VMS-specific code. + +=========================== +Last Updated: 16 April 2024 +=========================== diff --git a/doc/html/README.txt b/doc/html/README.txt new file mode 100644 index 0000000..dab5e94 --- /dev/null +++ b/doc/html/README.txt @@ -0,0 +1,956 @@ +README file for PCRE2 (Perl-compatible regular expression library) +------------------------------------------------------------------ + +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features, and the internals have been improved. The original PCRE1 library is +now obsolete and no longer maintained. The latest release of PCRE2 is available +in .tar.gz, tar.bz2, or .zip form from this GitHub repository: + +https://github.com/PCRE2Project/pcre2/releases + +There is a mailing list for discussion about the development of PCRE2 at +pcre2-dev@googlegroups.com. You can subscribe by sending an email to +pcre2-dev+subscribe@googlegroups.com. + +You can access the archives and also subscribe or manage your subscription +here: + +https://groups.google.com/g/pcre2-dev + +Please read the NEWS file if you are upgrading from a previous release. The +contents of this README file are: + + The PCRE2 APIs + Documentation for PCRE2 + Building PCRE2 on non-Unix-like systems + Building PCRE2 without using autotools + Building PCRE2 using autotools + Retrieving configuration information + Shared libraries + Cross-compiling using autotools + Making new tarballs + Testing PCRE2 + Character tables + File manifest + + +The PCRE2 APIs +-------------- + +PCRE2 is written in C, and it has its own API. There are three sets of +functions, one for the 8-bit library, which processes strings of bytes, one for +the 16-bit library, which processes strings of 16-bit values, and one for the +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. + +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. + +The header file for the POSIX-style functions is called pcre2posix.h. The +official POSIX name is regex.h, but I did not want to risk possible problems +with existing files of that name by distributing it that way. To use PCRE2 with +an existing program that uses the POSIX API, pcre2posix.h will have to be +renamed or pointed at by a link (or the program modified, of course). See the +pcre2posix documentation for more details. + + +Documentation for PCRE2 +----------------------- + +If you install PCRE2 in the normal way on a Unix-like system, you will end up +with a set of man pages whose names all start with "pcre2". The one that is +just called "pcre2" lists all the others. In addition to these man pages, the +PCRE2 documentation is supplied in two other forms: + + 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and + doc/pcre2test.txt in the source distribution. The first of these is a + concatenation of the text forms of all the section 3 man pages except the + listing of pcre2demo.c and those that summarize individual functions. The + other two are the text forms of the section 1 man pages for the pcre2grep + and pcre2test commands. These text forms are provided for ease of scanning + with text editors or similar tools. They are installed in + /share/doc/pcre2, where is the installation prefix + (defaulting to /usr/local). + + 2. A set of files containing all the documentation in HTML form, hyperlinked + in various ways, and rooted in a file called index.html, is distributed in + doc/html and installed in /share/doc/pcre2/html. + + +Building PCRE2 on non-Unix-like systems +--------------------------------------- + +For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if +your system supports the use of "configure" and "make" you may be able to build +PCRE2 using autotools in the same way as for many Unix-like systems. + +PCRE2 can also be configured using CMake, which can be run in various ways +(command line, GUI, etc). This creates Makefiles, solution files, etc. The file +NON-AUTOTOOLS-BUILD has information about CMake. + +PCRE2 has been compiled on many different operating systems. It should be +straightforward to build PCRE2 on any system that has a Standard C compiler and +library, because it uses only Standard C functions. + + +Building PCRE2 without using autotools +-------------------------------------- + +The use of autotools (in particular, libtool) is problematic in some +environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD +file for ways of building PCRE2 without using autotools. + + +Building PCRE2 using autotools +------------------------------ + +The following instructions assume the use of the widely used "configure; make; +make install" (autotools) process. + +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set +to the directory where you want the files to be created. This command is a +standard GNU "autoconf" configuration script, for which generic instructions +are supplied in the file INSTALL. + +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + +Most commonly, people build PCRE2 within its own distribution directory, and in +this case, on many systems, just running "./configure" is sufficient. However, +the usual methods of changing standard defaults are available. For example: + +CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local + +This command specifies that the C compiler should be run with the flags '-O2 +-Wall' instead of the default, and that "make install" should install PCRE2 +under /opt/local instead of the default /usr/local. + +If you want to build in a different directory, just run "configure" with that +directory as current. For example, suppose you have unpacked the PCRE2 source +into /source/pcre2/pcre2-xxx, but you want to build it in +/build/pcre2/pcre2-xxx: + +cd /build/pcre2/pcre2-xxx +/source/pcre2/pcre2-xxx/configure + +PCRE2 is written in C and is normally compiled as a C library. However, it is +possible to build it as a C++ library, though the provided building apparatus +does not have any features to support this. + +There are some optional features that can be included or omitted from the PCRE2 +library. They are also documented in the pcre2build man page. + +. By default, both shared and static libraries are built. You can change this + by adding one of these options to the "configure" command: + + --disable-shared + --disable-static + + Setting --disable-shared ensures that PCRE2 libraries are built as static + libraries. The binaries that are then created as part of the build process + (for example, pcre2test and pcre2grep) are linked statically with one or more + PCRE2 libraries, but may also be dynamically linked with other libraries such + as libc. If you want these binaries to be fully statically linked, you can + set LDFLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. See also + "Shared libraries" below. + +. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to + the "configure" command, the 16-bit library is also built. If you add + --enable-pcre2-32 to the "configure" command, the 32-bit library is also + built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 + to disable building the 8-bit library. + +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware + architectures. If you try to enable it on an unsupported architecture, there + will be a compile time error. If in doubt, use --enable-jit=auto, which + enables JIT only if the current hardware is supported. + +. If you are enabling JIT under SELinux environment you may also want to add + --enable-jit-sealloc, which enables the use of an executable memory allocator + that is compatible with SELinux. Warning: this allocator is experimental! + It does not support fork() operation and may crash when no disk space is + available. This option has no effect if JIT is disabled. + +. If you do not want to make use of the default support for UTF-8 Unicode + character strings in the 8-bit library, UTF-16 Unicode character strings in + the 16-bit library, or UTF-32 Unicode character strings in the 32-bit + library, you can add --disable-unicode to the "configure" command. This + reduces the size of the libraries. It is not possible to configure one + library with Unicode support, and another without, in the same configuration. + It is also not possible to use --enable-ebcdic (see below) with Unicode + support, so if this option is set, you must also use --disable-unicode. + + When Unicode support is available, the use of a UTF encoding still has to be + enabled by setting the PCRE2_UTF option at run time or starting a pattern + with (*UTF). When PCRE2 is compiled with Unicode support, its input can only + either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. + + As well as supporting UTF strings, Unicode support includes support for the + \P, \p, and \X sequences that recognize Unicode character properties. + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). + +. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any + of the preceding, or any of the Unicode newline sequences, or the NUL (zero) + character as indicating the end of a line. Whatever you specify at build time + is the default; the caller of PCRE2 can change the selection at run time. The + default newline indicator is a single LF character (the Unix standard). You + can specify the default newline indicator by adding --enable-newline-is-cr, + --enable-newline-is-lf, --enable-newline-is-crlf, + --enable-newline-is-anycrlf, --enable-newline-is-any, or + --enable-newline-is-nul to the "configure" command, respectively. + +. By default, the sequence \R in a pattern matches any Unicode line ending + sequence. This is independent of the option specifying what PCRE2 considers + to be the end of a line (see above). However, the caller of PCRE2 can + restrict \R to match only CR, LF, or CRLF. You can make this the default by + adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). + +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + +. PCRE2 has a counter that limits the depth of nesting of parentheses in a + pattern. This limits the amount of system stack that a pattern uses when it + is compiled. The default is 250, but you can change it by setting, for + example, + + --with-parens-nest-limit=500 + +. PCRE2 has a counter that can be set to limit the amount of computing resource + it uses when matching a pattern. If the limit is exceeded during a match, the + match fails. The default is ten million. You can change the default by + setting, for example, + + --with-match-limit=500000 + + on the "configure" command. This is just the default; individual calls to + pcre2_match() or pcre2_dfa_match() can supply their own value. There is more + discussion in the pcre2api man page (search for pcre2_set_match_limit). + +. There is a separate counter that limits the depth of nested backtracking + (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a + matching process, which indirectly limits the amount of heap memory that is + used, and in the case of pcre2_dfa_match() the amount of stack as well. This + counter also has a default of ten million, which is essentially "unlimited". + You can change the default by setting, for example, + + --with-match-limit-depth=5000 + + There is more discussion in the pcre2api man page (search for + pcre2_set_depth_limit). + +. You can also set an explicit limit on the amount of heap memory used by + the pcre2_match() and pcre2_dfa_match() interpreters: + + --with-heap-limit=500 + + The units are kibibytes (units of 1024 bytes). This limit does not apply when + the JIT optimization (which has its own memory control features) is used. + There is more discussion on the pcre2api man page (search for + pcre2_set_heap_limit). + +. In the 8-bit library, the default maximum compiled pattern size is around + 64 kibibytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. + +. Lookbehind assertions in which one or more branches can match a variable + number of characters are supported only if there is a maximum matching length + for each top-level branch. There is a limit to this maximum that defaults to + 255 characters. You can alter this default by a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbehind(). + Lookbehind assertions in which every branch matches a fixed number of + characters (not necessarily all the same) are not constrained by this limit. + +. For speed, PCRE2 uses four tables for manipulating and identifying characters + whose code point values are less than 256. By default, it uses a set of + tables for ASCII encoding that is part of the distribution. If you specify + + --enable-rebuild-chartables + + a program called pcre2_dftables is compiled and run in the default C locale + when you obey "make". It builds a source file called pcre2_chartables.c. If + you do not specify this option, pcre2_chartables.c is created as a copy of + pcre2_chartables.c.dist. See "Character tables" below for further + information. + +. It is possible to compile PCRE2 for use on systems that use EBCDIC as their + character code (as opposed to ASCII/Unicode) by specifying + + --enable-ebcdic --disable-unicode + + This automatically implies --enable-rebuild-chartables (see above). However, + when PCRE2 is built this way, it always operates in EBCDIC. It cannot support + both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, + which specifies that the code value for the EBCDIC NL character is 0x25 + instead of the default 0x15. + +. If you specify --enable-debug, additional debugging code is included in the + build. This option is intended for use by the PCRE2 maintainers. + +. In environments where valgrind is installed, if you specify + + --enable-valgrind + + PCRE2 will use valgrind annotations to mark certain memory regions as + unaddressable. This allows it to detect invalid memory accesses, and is + mostly useful for debugging PCRE2 itself. + +. In environments where the gcc compiler is used and lcov is installed, if you + specify + + --enable-coverage + + the build process implements a code coverage report for the test suite. The + report is generated by running "make coverage". If ccache is installed on + your system, it must be disabled when building PCRE2 for coverage reporting. + You can do this by setting the environment variable CCACHE_DISABLE=1 before + running "make" to build PCRE2. There is more information about coverage + reporting in the "pcre2build" documentation. + +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. There is support for calling external programs during matching in the + pcre2grep command, using PCRE2's callout facility with string arguments. This + support can be disabled by adding --disable-pcre2grep-callout to the + "configure" command. There are two kinds of callout: one that generates + output from inbuilt code, and another that calls an external program. The + latter has special support for Windows and VMS; otherwise it assumes the + existence of the fork() function. This facility can be disabled by adding + --disable-pcre2grep-callout-fork to the "configure" command. + +. The pcre2grep program currently supports only 8-bit data files, and so + requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use + libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by + specifying one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + Of course, the relevant libraries must be installed on your system. + +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: + + --with-pcre2grep-bufsize=51200 + + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. + +. It is possible to compile pcre2test so that it links with the libreadline + or libedit libraries, by specifying, respectively, + + --enable-pcre2test-libreadline or --enable-pcre2test-libedit + + If this is done, when pcre2test's input is from a terminal, it reads it using + the readline() function. This provides line-editing and history facilities. + Note that libreadline is GPL-licenced, so if you distribute a binary of + pcre2test linked in this way, there may be licensing issues. These can be + avoided by linking with libedit (which has a BSD licence) instead. + + Enabling libreadline causes the -lreadline option to be added to the + pcre2test build. In many operating environments with a system-installed + readline library this is sufficient. However, in some environments (e.g. if + an unmodified distribution version of readline is in use), it may be + necessary to specify something like LIBS="-lncurses" as well. This is + because, to quote the readline INSTALL, "Readline uses the termcap functions, + but does not link with the termcap or curses library itself, allowing + applications which link with readline the option to choose an appropriate + library." If you get error messages about missing functions tgetstr, tgetent, + tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses + library should fix it. + +. The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in + environments other than Microsoft Visual Studio versions earlier than 2013 + when __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating C99). However, there is at least one environment that + claims to be C99 but does not support these modifiers. If + --disable-percent-zt is specified, no use is made of the z or t modifiers. + Instead of %td or %zu, %lu is used, with a cast for size_t values. + +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit + library. If set, it causes an extra library called libpcre2-fuzzsupport.a to + be built, but not installed. This contains a single function called + LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the + length of the string. When called, this function tries to compile the string + as a pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. + Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to + be created. This is normally run under valgrind or used when PCRE2 is + compiled with address sanitizing enabled. It calls the fuzzing function and + outputs information about what it is doing. The input strings are specified + by arguments: if an argument starts with "=" the rest of it is a literal + input string. Otherwise, it is assumed to be a file name, and the contents + of the file are the test string. + +. Releases before 10.30 could be compiled with --disable-stack-for-recursion, + which caused pcre2_match() to use individual blocks on the heap for + backtracking instead of recursive function calls (which use the stack). This + is now obsolete because pcre2_match() was refactored always to use the heap + (in a much more efficient way than before). This option is retained for + backwards compatibility, but has no effect other than to output a warning. + +The "configure" script builds the following files for the basic C library: + +. Makefile the makefile that builds the library +. src/config.h build-time configuration options for the library +. src/pcre2.h the public PCRE2 header file +. pcre2-config script that shows the building settings such as CFLAGS + that were set for "configure" +. libpcre2-8.pc ) +. libpcre2-16.pc ) data for the pkg-config command +. libpcre2-32.pc ) +. libpcre2-posix.pc ) +. libtool script that builds shared and/or static libraries + +Versions of config.h and pcre2.h are distributed in the src directory of PCRE2 +tarballs under the names config.h.generic and pcre2.h.generic. These are +provided for those who have to build PCRE2 without using "configure" or CMake. +If you use "configure" or CMake, the .generic versions are not used. + +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". This builds whichever of the +libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test +program called pcre2test. If you enabled JIT support with --enable-jit, another +test program called pcre2_jit_test is built as well. If the 8-bit library is +built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also +built. Running "make" with the -j option may speed up compilation on +multiprocessor systems. + +The command "make check" runs all the appropriate tests. Details of the PCRE2 +tests are given below in a separate section of this document. The -j option of +"make" can also be used when running the tests. + +You can use "make install" to install PCRE2 into live directories on your +system. The following are installed (file names are all relative to the + that is set when "configure" is run): + + Commands (bin): + pcre2test + pcre2grep (if 8-bit support is enabled) + pcre2-config + + Libraries (lib): + libpcre2-8 (if 8-bit support is enabled) + libpcre2-16 (if 16-bit support is enabled) + libpcre2-32 (if 32-bit support is enabled) + libpcre2-posix (if 8-bit support is enabled) + + Configuration information (lib/pkgconfig): + libpcre2-8.pc + libpcre2-16.pc + libpcre2-32.pc + libpcre2-posix.pc + + Header files (include): + pcre2.h + pcre2posix.h + + Man pages (share/man/man{1,3}): + pcre2grep.1 + pcre2test.1 + pcre2-config.1 + pcre2.3 + pcre2*.3 (lots more pages, all starting "pcre2") + + HTML documentation (share/doc/pcre2/html): + index.html + *.html (lots more pages, hyperlinked from index.html) + + Text file documentation (share/doc/pcre2): + AUTHORS + COPYING + ChangeLog + LICENCE + NEWS + README + pcre2.txt (a concatenation of the man(3) pages) + pcre2test.txt the pcre2test man page + pcre2grep.txt the pcre2grep man page + pcre2-config.txt the pcre2-config man page + +If you want to remove PCRE2 from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information +------------------------------------ + +Running "make install" installs the command pcre2-config, which can be used to +recall information about the PCRE2 configuration and installation. For example: + + pcre2-config --version + +prints the version number, and + + pcre2-config --libs8 + +outputs information about where the 8-bit library is installed. This command +can be included in makefiles for programs that use PCRE2, saving the programmer +from having to remember too many details. Run pcre2-config with no arguments to +obtain a list of possible arguments. + +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: + + pkg-config --libs libpcre2-16 + +The data is held in *.pc files that are installed in a directory called +/lib/pkgconfig. + + +Shared libraries +---------------- + +The default distribution builds PCRE2 as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the +"configure" process. + +The libtool script is used to compile and link both shared and static +libraries. They are placed in a subdirectory called .libs when they are newly +built. The programs pcre2test and pcre2grep are built to use these uninstalled +libraries (by means of wrapper scripts in the case of shared libraries). When +you use "make install" to install shared libraries, pcre2grep and pcre2test are +automatically re-built to use the newly installed shared libraries before being +installed themselves. However, the versions left in the build directory still +use the uninstalled libraries. + +To build PCRE2 using static libraries only you must use --disable-shared when +configuring it. For example: + +./configure --prefix=/usr/gnu --disable-shared + +Then run "make" in the usual way. Similarly, you can use --disable-static to +build only shared libraries. Note, however, that when you build only static +libraries, binary programs such as pcre2test and pcre2grep may still be +dynamically linked with other libraries (for example, libc) unless you set +LDFLAGS to --static when running "configure". + + +Cross-compiling using autotools +------------------------------- + +You can specify CC and CFLAGS in the normal way to the "configure" command, in +order to cross-compile PCRE2 for some other host. However, you should NOT +specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c +source file is compiled and run on the local host, in order to generate the +inbuilt character tables (the pcre2_chartables.c file). This will probably not +work, because pcre2_dftables.c needs to be compiled with the local compiler, +not the cross compiler. + +When --enable-rebuild-chartables is not specified, pcre2_chartables.c is +created by making a copy of pcre2_chartables.c.dist, which is a default set of +tables that assumes ASCII code. Cross-compiling with the default tables should +not be a problem. + +If you need to modify the character tables when cross-compiling, you should +move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by +hand and run it on the local host to make a new version of +pcre2_chartables.c.dist. See the pcre2build section "Creating character tables +at build time" for more details. + + +Making new tarballs +------------------- + +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. + +If you have modified any of the man page sources in the doc directory, you +should first run the PrepareRelease script before making a distribution. This +script creates the .txt and HTML forms of the documentation from the man pages. + + +Testing PCRE2 +------------- + +To test the basic PCRE2 library on a Unix-like system, run the RunTest script. +There is another script called RunGrepTest that tests the pcre2grep command. +When the 8-bit library is built, a test program for the POSIX wrapper, called +pcre2posix_test, is compiled, and when JIT support is enabled, a test program +called pcre2_jit_test is built. The scripts and the program tests are all run +when you obey "make check". For other environments, see the instructions in +NON-AUTOTOOLS-BUILD. + +The RunTest script runs the pcre2test test program (which is documented in its +own man page) on each of the relevant testinput files in the testdata +directory, and compares the output with the contents of the corresponding +testoutput files. RunTest uses a file called testtry to hold the main output +from pcre2test. Other files whose names begin with "test" are used as working +files in some tests. + +Some tests are relevant only when certain build-time options were selected. For +example, the tests for UTF-8/16/32 features are run only when Unicode support +is available. RunTest outputs a comment when it skips a test. + +Many (but not all) of the tests that are not skipped are run twice if JIT +support is available. On the second run, JIT compilation is forced. This +testing can be suppressed by putting "-nojit" on the RunTest command line. + +The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit +libraries that are enabled. If you want to run just one set of tests, call +RunTest with either the -8, -16 or -32 option. + +If valgrind is installed, you can run the tests under it by putting "-valgrind" +on the RunTest command line. To run pcre2test on just one or more specific test +files, give their numbers as arguments to RunTest, for example: + + RunTest 2 7 11 + +You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the +end), or a number preceded by ~ to exclude a test. For example: + + Runtest 3-15 ~10 + +This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests +except test 13. Whatever order the arguments are in, the tests are always run +in numerical order. + +You can also call RunTest with the single argument "list" to cause it to output +a list of tests. + +The test sequence starts with "test 0", which is a special test that has no +input file, and whose output is not checked. This is because it will be +different on different hardware and with different configurations. The test +exists in order to exercise some of pcre2test's code that would not otherwise +be run. + +Tests 1 and 2 can always be run, as they expect only plain text strings (not +UTF) and make no use of Unicode properties. The first test file can be fed +directly into the perltest.sh script to check that Perl gives the same results. +The only difference you should see is in the first few lines, where the Perl +version is given instead of the PCRE2 version. The second set of tests check +auxiliary functions, error detection, and run-time flags that are specific to +PCRE2. It also uses the debugging flags to check some of the internals of +pcre2_compile(). + +If you build PCRE2 with a locale setting that is not the standard C locale, the +character tables may be different (see next paragraph). In some cases, this may +cause failures in the second set of tests. For example, in a locale where the +isprint() function yields TRUE for characters in the range 128-255, the use of +[:isascii:] inside a character class defines a different set of characters, and +this shows up in this test as a difference in the compiled code, which is being +listed for checking. For example, where the comparison test output contains +[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other +cases. This is not a bug in PCRE2. + +Test 3 checks pcre2_maketables(), the facility for building a set of character +tables for a specific locale and using them instead of the default tables. The +script uses the "locale" command to check for the availability of the "fr_FR", +"french", or "fr" locale, and uses the first one that it finds. If the "locale" +command fails, or if its output doesn't include "fr_FR", "french", or "fr" in +the list of available locales, the third test cannot be run, and a comment is +output to say why. If running this test produces an error like this: + + ** Failed to set locale "fr_FR" + +it means that the given locale is not available on your system, despite being +listed by "locale". This does not mean that PCRE2 is broken. There are three +alternative output files for the third test, because three different versions +of the French locale have been encountered. The test passes if its output +matches any one of them. + +Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible +with the perltest.sh script, and test 5 checking PCRE2-specific things. + +Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in +non-UTF mode and UTF-mode with Unicode property support, respectively. + +Test 8 checks some internal offsets and code size features, but it is run only +when Unicode support is enabled. The output is different in 8-bit, 16-bit, and +32-bit modes and for different link sizes, so there are different output files +for each mode and link size. + +Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in +16-bit and 32-bit modes. These are tests that generate different output in +8-bit mode. Each pair are for general cases and Unicode support, respectively. + +Test 13 checks the handling of non-UTF characters greater than 255 by +pcre2_dfa_match() in 16-bit and 32-bit modes. + +Test 14 contains some special UTF and UCP tests that give different output for +different code unit widths. + +Test 15 contains a number of tests that must not be run with JIT. They check, +among other non-JIT things, the match-limiting features of the interpretive +matcher. + +Test 16 is run only when JIT support is not available. It checks that an +attempt to use JIT has the expected behaviour. + +Test 17 is run only when JIT support is available. It checks JIT complete and +partial modes, match-limiting under JIT, and other JIT-specific features. + +Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to +the 8-bit library, without and with Unicode support, respectively. + +Test 20 checks the serialization functions by writing a set of compiled +patterns to a file, and then reloading and checking them. + +Tests 21 and 22 test \C support when the use of \C is not locked out, without +and with UTF support, respectively. Test 23 tests \C when it is locked out. + +Tests 24 and 25 test the experimental pattern conversion functions, without and +with UTF support, respectively. + +Test 26 checks Unicode property support using tests that are generated +automatically from the Unicode data tables. + + +Character tables +---------------- + +For speed, PCRE2 uses four tables for manipulating and identifying characters +whose code point values are less than 256. By default, a set of tables that is +built into the library is used. The pcre2_maketables() function can be called +by an application to create a new set of tables in the current locale. This are +passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a +compile context. + +The source file called pcre2_chartables.c contains the default set of tables. +By default, this is created as a copy of pcre2_chartables.c.dist, which +contains tables for ASCII coding. However, if --enable-rebuild-chartables is +specified for ./configure, a new version of pcre2_chartables.c is built by the +program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C +character handling functions such as isalnum(), isalpha(), isupper(), +islower(), etc. to build the table sources. This means that the default C +locale that is set for your system will control the contents of these default +tables. You can change the default tables by editing pcre2_chartables.c and +then re-building PCRE2. If you do this, you should take care to ensure that the +file does not get automatically re-generated. The best way to do this is to +move pcre2_chartables.c.dist out of the way and replace it with your customized +tables. + +When the pcre2_dftables program is run as a result of specifying +--enable-rebuild-chartables, it uses the default C locale that is set on your +system. It does not pay attention to the LC_xxx environment variables. In other +words, it uses the system's default locale rather than whatever the compiling +user happens to have set. If you really do want to build a source set of +character tables in a locale that is specified by the LC_xxx variables, you can +run the pcre2_dftables program by hand with the -L option. For example: + + ./pcre2_dftables -L pcre2_chartables.c.special + +The second argument names the file where the source code for the tables is +written. The first two 256-byte tables provide lower casing and case flipping +functions, respectively. The next table consists of a number of 32-byte bit +maps which identify certain character classes such as digits, "word" +characters, white space, etc. These are used when building 32-byte bit maps +that represent character classes for code points less than 256. The final +256-byte table has bits indicating various character types, as follows: + + 1 white space character + 2 letter + 4 lower case letter + 8 decimal digit + 16 alphanumeric or '_' + +You can also specify -b (with or without -L) when running pcre2_dftables. This +causes the tables to be written in binary instead of as source code. A set of +binary tables can be loaded into memory by an application and passed to +pcre2_compile() in the same way as tables created dynamically by calling +pcre2_maketables(). The tables are just a string of bytes, independent of +hardware characteristics such as endianness. This means they can be bundled +with an application that runs in different environments, to ensure consistent +behaviour. + +See also the pcre2build section "Creating character tables at build time". + + +File manifest +------------- + +The distribution should contain the files listed below. + +(A) Source files for the PCRE2 library functions and their headers are found in + the src directory: + + src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c + when --enable-rebuild-chartables is specified + + src/pcre2_chartables.c.dist a default set of character tables that assume + ASCII coding; unless --enable-rebuild-chartables is + specified, used by copying to pcre2_chartables.c + + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) + + src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support + + src/config.h.in template for config.h, when built by "configure" + src/pcre2.h.in template for pcre2.h when built by "configure" + src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_internal.h header for internal use + src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_neon_inc.h header used by JIT + src/pcre2_jit_simd_inc.h header used by JIT + src/pcre2_ucp.h header for Unicode property handling + + sljit/* source files for the JIT compiler + +(B) Source files for programs that use PCRE2: + + src/pcre2demo.c simple demonstration of coding calls to PCRE2 + src/pcre2grep.c source of a grep utility that uses PCRE2 + src/pcre2test.c comprehensive test program + src/pcre2_jit_test.c JIT test program + src/pcre2posix_test.c POSIX wrapper API test program + +(C) Auxiliary files: + + 132html script to turn "man" pages into HTML + AUTHORS information about the author of PCRE2 + ChangeLog log of changes to the code + CleanTxt script to clean nroff output for txt man pages + Detrail script to remove trailing spaces + HACKING some notes about the internals of PCRE2 + INSTALL generic installation instructions + LICENCE conditions for the use of PCRE2 + COPYING the same, using GNU's standard name + Makefile.in ) template for Unix Makefile, which is built by + ) "configure" + Makefile.am ) the automake input that was used to create + ) Makefile.in + NEWS important changes in this release + NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools + PrepareRelease script to make preparations for "make dist" + README this file + RunTest a Unix shell script for running tests + RunGrepTest a Unix shell script for pcre2grep tests + aclocal.m4 m4 macros (generated by "aclocal") + config.guess ) files used by libtool, + config.sub ) used only when building a shared library + configure a configuring shell script (built by autoconf) + configure.ac ) the autoconf input that was used to build + ) "configure" and config.h + depcomp ) script to find program dependencies, generated by + ) automake + doc/*.3 man page sources for PCRE2 + doc/*.1 man page sources for pcre2grep and pcre2test + doc/index.html.src the base HTML page + doc/html/* HTML documentation + doc/pcre2.txt plain text version of the man pages + doc/pcre2test.txt plain text documentation of test program + install-sh a shell script for installing files + libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config + libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config + libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config + ltmain.sh file used to build a libtool script + missing ) common stub for a few missing GNU programs while + ) installing, generated by automake + mkinstalldirs script for making install directories + perltest.sh Script for running a Perl test program + pcre2-config.in source of script which retains PCRE2 information + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcre2grep tests + testdata/* other supporting test files + +(D) Auxiliary files for cmake support + + cmake/COPYING-CMAKE-SCRIPTS + cmake/FindPackageHandleStandardArgs.cmake + cmake/FindEditline.cmake + cmake/FindReadline.cmake + CMakeLists.txt + config-cmake.h.in + +(E) Auxiliary files for building PCRE2 "by hand" + + src/pcre2.h.generic ) a version of the public PCRE2 header file + ) for use in non-"configure" environments + src/config.h.generic ) a version of config.h for use in non-"configure" + ) environments + +(F) Auxiliary files for building PCRE2 under OpenVMS + + vms/configure.com ) + vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. + vms/pcre2.h_patch ) + vms/stdint.h ) + +Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com +Last updated: 15 April 2024 diff --git a/doc/html/index.html b/doc/html/index.html new file mode 100644 index 0000000..e4dc786 --- /dev/null +++ b/doc/html/index.html @@ -0,0 +1,318 @@ + + + +PCRE2 specification + + +

Perl-compatible Regular Expressions (revised API: PCRE2)

+

+The HTML documentation for PCRE2 consists of a number of pages that are listed +below in alphabetical order. If you are new to PCRE2, please read the first one +first. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2  Introductory page
pcre2-config  Information about the installation configuration
pcre2api  PCRE2's native API
pcre2build  Building PCRE2
pcre2callout  The callout facility
pcre2compat  Compability with Perl
pcre2convert  Experimental foreign pattern conversion functions
pcre2demo  A demonstration C program that uses the PCRE2 library
pcre2grep  The pcre2grep command
pcre2jit  Discussion of the just-in-time optimization support
pcre2limits  Details of size and other limits
pcre2matching  Discussion of the two matching algorithms
pcre2partial  Using PCRE2 for partial matching
pcre2pattern  Specification of the regular expressions supported by PCRE2
pcre2perform  Some comments on performance
pcre2posix  The POSIX API to the PCRE2 8-bit library
pcre2sample  Discussion of the pcre2demo program
pcre2serialize  Serializing functions for saving precompiled patterns
pcre2syntax  Syntax quick-reference summary
pcre2test  The pcre2test command for testing PCRE2
pcre2unicode  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support
+ +

+There are also individual pages that summarize the interface for each function +in the library. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2_callout_enumerate  Enumerate callouts in a compiled pattern
pcre2_code_copy  Copy a compiled pattern
pcre2_code_copy_with_tables  Copy a compiled pattern and its character tables
pcre2_code_free  Free a compiled pattern
pcre2_compile  Compile a regular expression pattern
pcre2_compile_context_copy  Copy a compile context
pcre2_compile_context_create  Create a compile context
pcre2_compile_context_free  Free a compile context
pcre2_config  Show build-time configuration options
pcre2_convert_context_copy  Copy a convert context
pcre2_convert_context_create  Create a convert context
pcre2_convert_context_free  Free a convert context
pcre2_converted_pattern_free  Free converted foreign pattern
pcre2_dfa_match  Match a compiled pattern to a subject string + (DFA algorithm; not Perl compatible)
pcre2_general_context_copy  Copy a general context
pcre2_general_context_create  Create a general context
pcre2_general_context_free  Free a general context
pcre2_get_error_message  Get textual error message for error number
pcre2_get_mark  Get a (*MARK) name
pcre2_get_match_data_size  Get the size of a match data block
pcre2_get_ovector_count  Get the ovector count
pcre2_get_ovector_pointer  Get a pointer to the ovector
pcre2_get_startchar  Get the starting character offset
pcre2_jit_compile  Process a compiled pattern with the JIT compiler
pcre2_jit_free_unused_memory  Free unused JIT memory
pcre2_jit_match  Fast path interface to JIT matching
pcre2_jit_stack_assign  Assign stack for JIT matching
pcre2_jit_stack_create  Create a stack for JIT matching
pcre2_jit_stack_free  Free a JIT matching stack
pcre2_maketables  Build character tables in current locale
pcre2_maketables_free  Free character tables
pcre2_match  Match a compiled pattern to a subject string + (Perl compatible)
pcre2_match_context_copy  Copy a match context
pcre2_match_context_create  Create a match context
pcre2_match_context_free  Free a match context
pcre2_match_data_create  Create a match data block
pcre2_match_data_create_from_pattern  Create a match data block getting size from pattern
pcre2_match_data_free  Free a match data block
pcre2_pattern_convert  Experimental foreign pattern converter
pcre2_pattern_info  Extract information about a pattern
pcre2_serialize_decode  Decode serialized compiled patterns
pcre2_serialize_encode  Serialize compiled patterns for save/restore
pcre2_serialize_free  Free serialized compiled patterns
pcre2_serialize_get_number_of_codes  Get number of serialized compiled patterns
pcre2_set_bsr  Set \R convention
pcre2_set_callout  Set up a callout function
pcre2_set_character_tables  Set character tables
pcre2_set_compile_extra_options  Set compile time extra options
pcre2_set_compile_recursion_guard  Set up a compile recursion guard function
pcre2_set_depth_limit  Set the match backtracking depth limit
pcre2_set_glob_escape  Set glob escape character
pcre2_set_glob_separator  Set glob separator character
pcre2_set_heap_limit  Set the match backtracking heap limit
pcre2_set_match_limit  Set the match limit
pcre2_set_max_pattern_compiled_length  Set the maximum length of a compiled pattern
pcre2_set_max_pattern_length  Set the maximum length of a pattern
pcre2_set_max_varlookbehind  Set the maximum match length for a variable-length lookbehind
pcre2_set_newline  Set the newline convention
pcre2_set_offset_limit  Set the offset limit
pcre2_set_parens_nest_limit  Set the parentheses nesting limit
pcre2_set_recursion_limit  Obsolete: use pcre2_set_depth_limit
pcre2_set_recursion_memory_management  Obsolete function that (from 10.30 onwards) does nothing
pcre2_substitute  Match a compiled pattern to a subject string and do + substitutions
pcre2_substring_copy_byname  Extract named substring into given buffer
pcre2_substring_copy_bynumber  Extract numbered substring into given buffer
pcre2_substring_free  Free extracted substring
pcre2_substring_get_byname  Extract named substring into new memory
pcre2_substring_get_bynumber  Extract numbered substring into new memory
pcre2_substring_length_byname  Find length of named substring
pcre2_substring_length_bynumber  Find length of numbered substring
pcre2_substring_list_free  Free list of extracted substrings
pcre2_substring_list_get  Extract all substrings into new memory
pcre2_substring_nametable_scan  Find table entries for given string name
pcre2_substring_number_from_name  Convert captured string name to number
+ + + diff --git a/doc/html/pcre2-config.html b/doc/html/pcre2-config.html new file mode 100644 index 0000000..b71d760 --- /dev/null +++ b/doc/html/pcre2-config.html @@ -0,0 +1,102 @@ + + +pcre2-config specification + + +

pcre2-config man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2-config [--prefix] [--exec-prefix] [--version] + [--libs8] [--libs16] [--libs32] [--libs-posix] + [--cflags] [--cflags-posix] +

+
DESCRIPTION
+

+pcre2-config returns the configuration of the installed PCRE2 libraries +and the options required to compile a program to use them. Some of the options +apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are +not available for libraries that have not been built. If an unavailable option +is encountered, the "usage" information is output. +

+
OPTIONS
+

+--prefix +Writes the directory prefix used in the PCRE2 installation for architecture +independent files (/usr on many systems, /usr/local on some +systems) to the standard output. +

+

+--exec-prefix +Writes the directory prefix used in the PCRE2 installation for architecture +dependent files (normally the same as --prefix) to the standard output. +

+

+--version +Writes the version number of the installed PCRE2 libraries to the standard +output. +

+

+--libs8 +Writes to the standard output the command line options required to link +with the 8-bit PCRE2 library (-lpcre2-8 on many systems). +

+

+--libs16 +Writes to the standard output the command line options required to link +with the 16-bit PCRE2 library (-lpcre2-16 on many systems). +

+

+--libs32 +Writes to the standard output the command line options required to link +with the 32-bit PCRE2 library (-lpcre2-32 on many systems). +

+

+--libs-posix +Writes to the standard output the command line options required to link with +PCRE2's POSIX API wrapper library (-lpcre2-posix -lpcre2-8 on many +systems). +

+

+--cflags +Writes to the standard output the command line options required to compile +files that use PCRE2 (this may include some -I options, but is blank on +many systems). +

+

+--cflags-posix +Writes to the standard output the command line options required to compile +files that use PCRE2's POSIX API wrapper library (this may include some +-I options, but is blank on many systems). +

+
SEE ALSO
+

+pcre2(3) +

+
AUTHOR
+

+This manual page was originally written by Mark Baker for the Debian GNU/Linux +system. It has been subsequently revised as a generic PCRE2 man page. +

+
REVISION
+

+Last updated: 28 September 2014 +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html new file mode 100644 index 0000000..4cb83dc --- /dev/null +++ b/doc/html/pcre2.html @@ -0,0 +1,214 @@ + + +pcre2 specification + + +

pcre2 man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
INTRODUCTION
+

+PCRE2 is the name used for a revised API for the PCRE library, which is a set +of functions, written in C, that implement regular expression pattern matching +using the same syntax and semantics as Perl, with just a few differences. After +nearly two decades, the limitations of the original API were making development +increasingly difficult. The new API is more extensible, and it was simplified +by abolishing the separate "study" optimizing function; in PCRE2, patterns are +automatically optimized where possible. Since forking from PCRE1, the code has +been extensively refactored and new features introduced. The old library is now +obsolete and is no longer maintained. +

+

+As well as Perl-style regular expression patterns, some features that appeared +in Python and the original PCRE before they appeared in Perl are available +using the Python syntax. There is also some support for one or two .NET and +Oniguruma syntax items, and there are options for requesting some minor changes +that give better ECMAScript (aka JavaScript) compatibility. +

+

+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit, +or 32-bit code units, which means that up to three separate libraries may be +installed, one for each code unit size. The size of code unit is not related to +the bit size of the underlying hardware. In a 64-bit environment that also +supports 32-bit applications, versions of PCRE2 that are compiled in both +64-bit and 32-bit modes may be needed. +

+

+The original work to extend PCRE to 16-bit and 32-bit code units was done by +Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings +can be interpreted either as one character per code unit, or as UTF-encoded +Unicode, with support for Unicode general category properties. Unicode support +is optional at build time (but is the default). However, processing strings as +UTF code units must be enabled explicitly at run time. The version of Unicode +in use can be discovered by running +

+  pcre2test -C
+
+

+

+The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, pcre2_compile_8()). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +one code unit width can be written using generic names such as +pcre2_compile(), and the documentation is written assuming that this is +the case. +

+

+In addition to the Perl-compatible matching function, PCRE2 contains an +alternative function that matches the same compiled patterns in a different +way. In certain circumstances, the alternative function has some advantages. +For a discussion of the two matching algorithms, see the +pcre2matching +page. +

+

+Details of exactly which Perl regular expression features are and are not +supported by PCRE2 are given in separate documents. See the +pcre2pattern +and +pcre2compat +pages. There is a syntax summary in the +pcre2syntax +page. +

+

+Some features of PCRE2 can be included, excluded, or changed when the library +is built. The +pcre2_config() +function makes it possible for a client to discover which features are +available. The features themselves are described in the +pcre2build +page. Documentation about building PCRE2 for various operating systems can be +found in the +README +and +NON-AUTOTOOLS_BUILD +files in the source distribution. +

+

+The libraries contains a number of undocumented internal functions and data +tables that are used by more than one of the exported external functions, but +which are not intended for use by external callers. Their names all begin with +"_pcre2", which hopefully will not provoke any name clashes. In some +environments, it is possible to control which external symbols are exported +when a shared library is built, and in these cases the undocumented symbols are +not exported. +

+
SECURITY CONSIDERATIONS
+

+If you are using PCRE2 in a non-UTF application that permits users to supply +arbitrary patterns for compilation, you should be aware of a feature that +allows users to turn on UTF support from within a pattern. For example, an +8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets +patterns and subjects as strings of UTF-8 code units instead of individual +8-bit characters. This causes both the pattern and any data against which it is +matched to be checked for UTF-8 validity. If the data string is very long, such +a check might use sufficiently many resources as to cause your application to +lose performance. +

+

+One way of guarding against this possibility is to use the +pcre2_pattern_info() function to check the compiled pattern's options for +PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling +pcre2_compile(). This causes a compile time error if the pattern contains +a UTF-setting sequence. +

+

+The use of Unicode properties for character types such as \d can also be +enabled from within the pattern, by specifying "(*UCP)". This feature can be +disallowed by setting the PCRE2_NEVER_UCP option. +

+

+If your application is one that supports UTF, be aware that validity checking +can take time. If the same data string is to be matched many times, you can use +the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid +running redundant checks. +

+

+The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to +problems, because it may leave the current matching point in the middle of a +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \C, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \C permanently +disabled. +

+

+Another way that performance can be hit is by running a pattern that has a very +large search tree against a string that will never match. Nested unlimited +repeats in a pattern are a common example. PCRE2 provides some protection +against this: see the pcre2_set_match_limit() function in the +pcre2api +page. There is a similar function called pcre2_set_depth_limit() that can +be used to restrict the amount of memory that is used. +

+
USER DOCUMENTATION
+

+The user documentation for PCRE2 comprises a number of different sections. In +the "man" format, each of these is a separate "man page". In the HTML format, +each is a separate page, linked from the index page. In the plain text format, +the descriptions of the pcre2grep and pcre2test programs are in +files called pcre2grep.txt and pcre2test.txt, respectively. The +remaining sections, except for the pcre2demo section (which is a program +listing), and the short pages for individual functions, are concatenated in +pcre2.txt, for ease of searching. The sections are as follows: +

+  pcre2              this document
+  pcre2-config       show PCRE2 installation configuration information
+  pcre2api           details of PCRE2's native C API
+  pcre2build         building PCRE2
+  pcre2callout       details of the pattern callout feature
+  pcre2compat        discussion of Perl compatibility
+  pcre2convert       details of pattern conversion functions
+  pcre2demo          a demonstration C program that uses PCRE2
+  pcre2grep          description of the pcre2grep command (8-bit only)
+  pcre2jit           discussion of just-in-time optimization support
+  pcre2limits        details of size and other limits
+  pcre2matching      discussion of the two matching algorithms
+  pcre2partial       details of the partial matching facility
+  pcre2pattern       syntax and semantics of supported regular expression patterns
+  pcre2perform       discussion of performance issues
+  pcre2posix         the POSIX-compatible C API for the 8-bit library
+  pcre2sample        discussion of the pcre2demo program
+  pcre2serialize     details of pattern serialization
+  pcre2syntax        quick syntax reference
+  pcre2test          description of the pcre2test command
+  pcre2unicode       discussion of Unicode and UTF support
+
+In the "man" and HTML formats, there is also a short page for each C library +function, listing its arguments and results. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+

+Putting an actual email address here is a spam magnet. If you want to email me, +use my two names separated by a dot at gmail.com. +

+
REVISION
+

+Last updated: 27 August 2021 +
+Copyright © 1997-2021 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_callout_enumerate.html b/doc/html/pcre2_callout_enumerate.html new file mode 100644 index 0000000..505ea7b --- /dev/null +++ b/doc/html/pcre2_callout_enumerate.html @@ -0,0 +1,63 @@ + + +pcre2_callout_enumerate specification + + +

pcre2_callout_enumerate man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function scans a compiled regular expression and calls the callback() +function for each callout within the pattern. The yield of the function is zero +for success and non-zero otherwise. The arguments are: +

+  code           Points to the compiled pattern
+  callback       The callback function
+  callout_data   User data that is passed to the callback
+
+The callback() function is passed a pointer to a data block containing +the following fields (not necessarily in this order): +
+  uint32_t   version                Block version number
+  uint32_t   callout_number         Number for numbered callouts
+  PCRE2_SIZE pattern_position       Offset to next item in pattern
+  PCRE2_SIZE next_item_length       Length of next item in pattern
+  PCRE2_SIZE callout_string_offset  Offset to string within pattern
+  PCRE2_SIZE callout_string_length  Length of callout string
+  PCRE2_SPTR callout_string         Points to callout string or is NULL
+
+The second argument passed to the callback() function is the callout data +that was passed to pcre2_callout_enumerate(). The callback() +function must return zero for success. Any other value causes the pattern scan +to stop, with the value being passed back as the result of +pcre2_callout_enumerate(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_code_copy.html b/doc/html/pcre2_code_copy.html new file mode 100644 index 0000000..667d7b7 --- /dev/null +++ b/doc/html/pcre2_code_copy.html @@ -0,0 +1,43 @@ + + +pcre2_code_copy specification + + +

pcre2_code_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_code_copy(const pcre2_code *code); +

+
+DESCRIPTION +
+

+This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +pcre2_code_copy_with_tables()). The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_code_copy_with_tables.html b/doc/html/pcre2_code_copy_with_tables.html new file mode 100644 index 0000000..67b2e1f --- /dev/null +++ b/doc/html/pcre2_code_copy_with_tables.html @@ -0,0 +1,44 @@ + + +pcre2_code_copy_with_tables specification + + +

pcre2_code_copy_with_tables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +

+
+DESCRIPTION +
+

+This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. +Unlike pcre2_code_copy(), a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when pcre2_code_free() is called. The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_code_free.html b/doc/html/pcre2_code_free.html new file mode 100644 index 0000000..ff302fc --- /dev/null +++ b/doc/html/pcre2_code_free.html @@ -0,0 +1,42 @@ + + +pcre2_code_free specification + + +

pcre2_code_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_code_free(pcre2_code *code); +

+
+DESCRIPTION +
+

+If code is NULL, this function does nothing. Otherwise, code must +point to a compiled pattern. This function frees its memory, including any +memory used by the JIT compiler. If the compiled pattern was created by a call +to pcre2_code_copy_with_tables(), the memory for the character tables is +also freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_compile.html b/doc/html/pcre2_compile.html new file mode 100644 index 0000000..f0080ea --- /dev/null +++ b/doc/html/pcre2_compile.html @@ -0,0 +1,119 @@ + + +pcre2_compile specification + + +

pcre2_compile man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function compiles a regular expression pattern into an internal form. Its +arguments are: +

+  pattern       A string containing expression to be compiled
+  length        The length of the string or PCRE2_ZERO_TERMINATED
+  options       Primary option bits
+  errorcode     Where to put an error code
+  erroffset     Where to put an error offset
+  ccontext      Pointer to a compile context or NULL
+
+The length of the pattern and any error offset that is returned are in code +units, not characters. A NULL pattern with zero length is treated as an empty +string. A compile context is needed only if you want to provide custom memory +allocation functions, or to provide an external function for system stack size +checking (see pcre2_set_compile_recursion_guard()), or to change one or +more of these parameters: +
+  What \R matches (Unicode newlines, or CR, LF, CRLF only);
+  PCRE2's character tables;
+  The newline character sequence;
+  The compile time nested parentheses limit;
+  The maximum pattern length (in code units) that is allowed;
+  The additional options bits.
+
+The primary option bits are: +
+  PCRE2_ANCHORED           Force pattern anchoring
+  PCRE2_ALLOW_EMPTY_CLASS  Allow empty classes
+  PCRE2_ALT_BSUX           Alternative handling of \u, \U, and \x
+  PCRE2_ALT_CIRCUMFLEX     Alternative handling of ^ in multiline mode
+  PCRE2_ALT_VERBNAMES      Process backslashes in verb names
+  PCRE2_AUTO_CALLOUT       Compile automatic callouts
+  PCRE2_CASELESS           Do caseless matching
+  PCRE2_DOLLAR_ENDONLY     $ not to match newline at end
+  PCRE2_DOTALL             . matches anything including NL
+  PCRE2_DUPNAMES           Allow duplicate names for subpatterns
+  PCRE2_ENDANCHORED        Pattern can match only at end of subject
+  PCRE2_EXTENDED           Ignore white space and # comments
+  PCRE2_FIRSTLINE          Force matching to be before newline
+  PCRE2_LITERAL            Pattern characters are all literal
+  PCRE2_MATCH_INVALID_UTF  Enable support for matching invalid UTF
+  PCRE2_MATCH_UNSET_BACKREF  Match unset backreferences
+  PCRE2_MULTILINE          ^ and $ match newlines within data
+  PCRE2_NEVER_BACKSLASH_C  Lock out the use of \C in patterns
+  PCRE2_NEVER_UCP          Lock out PCRE2_UCP, e.g. via (*UCP)
+  PCRE2_NEVER_UTF          Lock out PCRE2_UTF, e.g. via (*UTF)
+  PCRE2_NO_AUTO_CAPTURE    Disable numbered capturing paren-
+                            theses (named ones available)
+  PCRE2_NO_AUTO_POSSESS    Disable auto-possessification
+  PCRE2_NO_DOTSTAR_ANCHOR  Disable automatic anchoring for .*
+  PCRE2_NO_START_OPTIMIZE  Disable match-time start optimizations
+  PCRE2_NO_UTF_CHECK       Do not check the pattern for UTF validity
+                             (only relevant if PCRE2_UTF is set)
+  PCRE2_UCP                Use Unicode properties for \d, \w, etc.
+  PCRE2_UNGREEDY           Invert greediness of quantifiers
+  PCRE2_USE_OFFSET_LIMIT   Enable offset limit for unanchored matching
+  PCRE2_UTF                Treat pattern and subjects as UTF strings
+
+PCRE2 must be built with Unicode support (the default) in order to use +PCRE2_UTF, PCRE2_UCP and related options. +

+

+Additional options may be set in the compile context via the +pcre2_set_compile_extra_options +function. +

+

+If either of errorcode or erroroffset is NULL, the function returns +NULL immediately. Otherwise, the yield of this function is a pointer to a +private data structure that contains the compiled pattern, or NULL if an error +was detected. In the error case, a text error message can be obtained by +passing the value returned via the errorcode argument to the +pcre2_get_error_message() function. The offset (in code units) where the +error was encountered is returned via the erroroffset argument. +

+

+If there is no error, the value passed via errorcode returns the message +"no error" if passed to pcre2_get_error_message(), and the value passed +via erroroffset is zero. +

+

+There is a complete description of the PCRE2 native API, with more detail on +each option, in the +pcre2api +page, and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_compile_context_copy.html b/doc/html/pcre2_compile_context_copy.html new file mode 100644 index 0000000..9e9884b --- /dev/null +++ b/doc/html/pcre2_compile_context_copy.html @@ -0,0 +1,41 @@ + + +pcre2_compile_context_copy specification + + +

pcre2_compile_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a compile context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_compile_context_create.html b/doc/html/pcre2_compile_context_create.html new file mode 100644 index 0000000..5eacd4e --- /dev/null +++ b/doc/html/pcre2_compile_context_create.html @@ -0,0 +1,42 @@ + + +pcre2_compile_context_create specification + + +

pcre2_compile_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates and initializes a new compile context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_compile_context_free.html b/doc/html/pcre2_compile_context_free.html new file mode 100644 index 0000000..b4159b1 --- /dev/null +++ b/doc/html/pcre2_compile_context_free.html @@ -0,0 +1,41 @@ + + +pcre2_compile_context_free specification + + +

pcre2_compile_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a compile context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_config.html b/doc/html/pcre2_config.html new file mode 100644 index 0000000..f05bd06 --- /dev/null +++ b/doc/html/pcre2_config.html @@ -0,0 +1,84 @@ + + +pcre2_config specification + + +

pcre2_config man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_config(uint32_t what, void *where); +

+
+DESCRIPTION +
+

+This function makes it possible for a client program to find out which optional +features are available in the version of the PCRE2 library it is using. The +arguments are as follows: +

+  what     A code specifying what information is required
+  where    Points to where to put the information
+
+If where is NULL, the function returns the amount of memory needed for +the requested information. When the information is a string, the value is in +code units; for other types of data it is in bytes. +

+

+If where is not NULL, for PCRE2_CONFIG_JITTARGET, +PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a +buffer that is large enough to hold the string. For all other codes it must +point to a uint32_t integer variable. The available codes are: +

+  PCRE2_CONFIG_BSR             Indicates what \R matches by default:
+                                 PCRE2_BSR_UNICODE
+                                 PCRE2_BSR_ANYCRLF
+  PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled
+  PCRE2_CONFIG_DEPTHLIMIT      Default backtracking depth limit
+  PCRE2_CONFIG_HEAPLIMIT       Default heap memory limit
+  PCRE2_CONFIG_JIT             Availability of just-in-time compiler support (1=yes 0=no)
+  PCRE2_CONFIG_JITTARGET       Information (a string) about the target architecture for the JIT compiler
+  PCRE2_CONFIG_LINKSIZE        Configured internal link size (2, 3, 4)
+  PCRE2_CONFIG_MATCHLIMIT      Default internal resource limit
+  PCRE2_CONFIG_NEVER_BACKSLASH_C  Whether or not \C is disabled
+  PCRE2_CONFIG_NEWLINE         Code for the default newline sequence:
+                                 PCRE2_NEWLINE_CR
+                                 PCRE2_NEWLINE_LF
+                                 PCRE2_NEWLINE_CRLF
+                                 PCRE2_NEWLINE_ANY
+                                 PCRE2_NEWLINE_ANYCRLF
+                                 PCRE2_NEWLINE_NUL
+  PCRE2_CONFIG_PARENSLIMIT     Default parentheses nesting limit
+  PCRE2_CONFIG_RECURSIONLIMIT  Obsolete: use PCRE2_CONFIG_DEPTHLIMIT
+  PCRE2_CONFIG_STACKRECURSE    Obsolete: always returns 0
+  PCRE2_CONFIG_UNICODE         Availability of Unicode support (1=yes 0=no)
+  PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string)
+  PCRE2_CONFIG_VERSION         The PCRE2 version (a string)
+
+The function yields a non-negative value on success or the negative value +PCRE2_ERROR_BADOPTION otherwise. This is also the result for the +PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is +requested, the function returns the number of code units used, including the +terminating zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_convert_context_copy.html b/doc/html/pcre2_convert_context_copy.html new file mode 100644 index 0000000..3c44ac6 --- /dev/null +++ b/doc/html/pcre2_convert_context_copy.html @@ -0,0 +1,40 @@ + + +pcre2_convert_context_copy specification + + +

pcre2_convert_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It makes a new copy of a convert context, using the memory allocation function +that was used for the original context. The result is NULL if the memory cannot +be obtained. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_convert_context_create.html b/doc/html/pcre2_convert_context_create.html new file mode 100644 index 0000000..2564780 --- /dev/null +++ b/doc/html/pcre2_convert_context_create.html @@ -0,0 +1,41 @@ + + +pcre2_convert_context_create specification + + +

pcre2_convert_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It creates and initializes a new convert context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_convert_context_free.html b/doc/html/pcre2_convert_context_free.html new file mode 100644 index 0000000..e9b142b --- /dev/null +++ b/doc/html/pcre2_convert_context_free.html @@ -0,0 +1,40 @@ + + +pcre2_convert_context_free specification + + +

pcre2_convert_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a convert context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_converted_pattern_free.html b/doc/html/pcre2_converted_pattern_free.html new file mode 100644 index 0000000..01d28d7 --- /dev/null +++ b/doc/html/pcre2_converted_pattern_free.html @@ -0,0 +1,40 @@ + + +pcre2_converted_pattern_free specification + + +

pcre2_converted_pattern_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a converted pattern that was obtained by +calling pcre2_pattern_convert() with arguments that caused it to place +the converted pattern into newly obtained heap memory. If the argument is NULL, +the function returns immediately without doing anything. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_dfa_match.html b/doc/html/pcre2_dfa_match.html new file mode 100644 index 0000000..0ae428c --- /dev/null +++ b/doc/html/pcre2_dfa_match.html @@ -0,0 +1,86 @@ + + +pcre2_dfa_match specification + + +

pcre2_dfa_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using an alternative matching algorithm that scans the subject string +just once (except when processing lookaround assertions). This function is +not Perl-compatible (the Perl-compatible matching function is +pcre2_match()). The arguments for this function are: +

+  code         Points to the compiled pattern
+  subject      Points to the subject string
+  length       Length of the subject string
+  startoffset  Offset in the subject at which to start matching
+  options      Option bits
+  match_data   Points to a match data block, for results
+  mcontext     Points to a match context, or is NULL
+  workspace    Points to a vector of ints used as working space
+  wscount      Number of elements in the vector
+
+The size of output vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using pcre2_match_data_create_from_pattern() to create the match +data block is therefore not advisable when using this function. +

+

+A match context is needed only if you want to set up a callout function or +specify the heap limit or the match or the recursion depth limits. The +length and startoffset values are code units, not characters. The +options are: +

+  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
+  PCRE2_ENDANCHORED       Pattern can match only at end of subject
+  PCRE2_NOTBOL            Subject is not the beginning of a line
+  PCRE2_NOTEOL            Subject is not the end of a line
+  PCRE2_NOTEMPTY          An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject is not a valid match
+  PCRE2_NO_UTF_CHECK      Do not check the subject for UTF validity (only relevant if PCRE2_UTF
+                           was set at compile time)
+  PCRE2_PARTIAL_HARD      Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
+  PCRE2_PARTIAL_SOFT      Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
+  PCRE2_DFA_RESTART       Restart after a partial match
+  PCRE2_DFA_SHORTEST      Return only the shortest match
+
+There are restrictions on what may appear in a pattern when using this matching +function. Details are given in the +pcre2matching +documentation. For details of partial matching, see the +pcre2partial +page. There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_general_context_copy.html b/doc/html/pcre2_general_context_copy.html new file mode 100644 index 0000000..0018534 --- /dev/null +++ b/doc/html/pcre2_general_context_copy.html @@ -0,0 +1,42 @@ + + +pcre2_general_context_copy specification + + +

pcre2_general_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a general context, using the memory +allocation functions in the context, if set, to get the necessary memory. +Otherwise malloc() is used. The result is NULL if the memory cannot be +obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_general_context_create.html b/doc/html/pcre2_general_context_create.html new file mode 100644 index 0000000..a1a165d --- /dev/null +++ b/doc/html/pcre2_general_context_create.html @@ -0,0 +1,44 @@ + + +pcre2_general_context_create specification + + +

pcre2_general_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +

+
+DESCRIPTION +
+

+This function creates and initializes a general context. The arguments define +custom memory management functions and a data value that is passed to them when +they are called. The private_malloc() function is used to get memory for +the context. If either of the first two arguments is NULL, the system memory +management function is used. The result is NULL if no memory could be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_general_context_free.html b/doc/html/pcre2_general_context_free.html new file mode 100644 index 0000000..9f335f5 --- /dev/null +++ b/doc/html/pcre2_general_context_free.html @@ -0,0 +1,40 @@ + + +pcre2_general_context_free specification + + +

pcre2_general_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_general_context_free(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a general context, using the memory +freeing function within the context, if set. If the argument is NULL, the +function returns immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_error_message.html b/doc/html/pcre2_get_error_message.html new file mode 100644 index 0000000..7005760 --- /dev/null +++ b/doc/html/pcre2_get_error_message.html @@ -0,0 +1,51 @@ + + +pcre2_get_error_message specification + + +

pcre2_get_error_message man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +

+
+DESCRIPTION +
+

+This function provides a textual error message for each PCRE2 error code. +Compilation errors are positive numbers; UTF formatting errors and matching +errors are negative numbers. The arguments are: +

+  errorcode   an error code (positive or negative)
+  buffer      where to put the message
+  bufflen     the length of the buffer (code units)
+
+The function returns the length of the message in code units, excluding the +trailing zero, or the negative error code PCRE2_ERROR_NOMEMORY if the buffer is +too small. In this case, the returned message is truncated (but still with a +trailing zero). If errorcode does not contain a recognized error code +number, the negative value PCRE2_ERROR_BADDATA is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_mark.html b/doc/html/pcre2_get_mark.html new file mode 100644 index 0000000..88e6326 --- /dev/null +++ b/doc/html/pcre2_get_mark.html @@ -0,0 +1,47 @@ + + +pcre2_get_mark specification + + +

pcre2_get_mark man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+After a call of pcre2_match() that was passed the match block that is +this function's argument, this function returns a pointer to the last (*MARK), +(*PRUNE), or (*THEN) name that was encountered during the matching process. The +name is zero-terminated, and is within the compiled pattern. The length of the +name is in the preceding code unit. If no name is available, NULL is returned. +

+

+After a successful match, the name that is returned is the last one on the +matching path. After a failed match or a partial match, the last encountered +name is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_match_data_heapframes_size.html b/doc/html/pcre2_get_match_data_heapframes_size.html new file mode 100644 index 0000000..3c705c6 --- /dev/null +++ b/doc/html/pcre2_get_match_data_heapframes_size.html @@ -0,0 +1,40 @@ + + +pcre2_get_match_data_heapframes_size specification + + +

pcre2_get_match_data_heapframes_size man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the size, in bytes, of the heapframes data block that is +owned by its argument. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_match_data_size.html b/doc/html/pcre2_get_match_data_size.html new file mode 100644 index 0000000..113ecaa --- /dev/null +++ b/doc/html/pcre2_get_match_data_size.html @@ -0,0 +1,39 @@ + + +pcre2_get_match_data_size specification + + +

pcre2_get_match_data_size man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the size, in bytes, of the match data block that is its +argument. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_ovector_count.html b/doc/html/pcre2_get_ovector_count.html new file mode 100644 index 0000000..05aacb6 --- /dev/null +++ b/doc/html/pcre2_get_ovector_count.html @@ -0,0 +1,39 @@ + + +pcre2_get_ovector_count specification + + +

pcre2_get_ovector_count man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the number of pairs of offsets in the ovector that forms +part of the given match data block. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_ovector_pointer.html b/doc/html/pcre2_get_ovector_pointer.html new file mode 100644 index 0000000..ff6317e --- /dev/null +++ b/doc/html/pcre2_get_ovector_pointer.html @@ -0,0 +1,40 @@ + + +pcre2_get_ovector_pointer specification + + +

pcre2_get_ovector_pointer man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns a pointer to the vector of offsets that forms part of the +given match data block. The number of pairs can be found by calling +pcre2_get_ovector_count(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_get_startchar.html b/doc/html/pcre2_get_startchar.html new file mode 100644 index 0000000..d2c28b2 --- /dev/null +++ b/doc/html/pcre2_get_startchar.html @@ -0,0 +1,44 @@ + + +pcre2_get_startchar specification + + +

pcre2_get_startchar man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+After a successful call of pcre2_match() that was passed the match block +that is this function's argument, this function returns the code unit offset of +the character at which the successful match started. For a non-partial match, +this can be different to the value of ovector[0] if the pattern contains +the \K escape sequence. After a partial match, however, this value is always +the same as ovector[0] because \K does not affect the result of a +partial match. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_compile.html b/doc/html/pcre2_jit_compile.html new file mode 100644 index 0000000..873d0dd --- /dev/null +++ b/doc/html/pcre2_jit_compile.html @@ -0,0 +1,63 @@ + + +pcre2_jit_compile specification + + +

pcre2_jit_compile man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +

+
+DESCRIPTION +
+

+This function requests JIT compilation, which, if the just-in-time compiler is +available, further processes a compiled pattern into machine code that executes +much faster than the pcre2_match() interpretive matching function. Full +details are given in the +pcre2jit +documentation. +

+

+The first argument is a pointer that was returned by a successful call to +pcre2_compile(), and the second must contain one or more of the following +bits: +

+  PCRE2_JIT_COMPLETE      compile code for full matching
+  PCRE2_JIT_PARTIAL_SOFT  compile code for soft partial matching
+  PCRE2_JIT_PARTIAL_HARD  compile code for hard partial matching
+
+There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been +superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF. The old +option is deprecated and may be removed in the future. +

+

+The yield of the function is 0 for success, or a negative error code otherwise. +In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or +if an unknown bit is set in options. The function can also return +PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the +compiler, even if it was because of a system security restriction. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_free_unused_memory.html b/doc/html/pcre2_jit_free_unused_memory.html new file mode 100644 index 0000000..7f37e58 --- /dev/null +++ b/doc/html/pcre2_jit_free_unused_memory.html @@ -0,0 +1,43 @@ + + +pcre2_jit_free_unused_memory specification + + +

pcre2_jit_free_unused_memory man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function frees unused JIT executable memory. The argument is a general +context, for custom memory management, or NULL for standard memory management. +JIT memory allocation retains some memory in order to improve future JIT +compilation speed. In low memory conditions, +pcre2_jit_free_unused_memory() can be used to cause this memory to be +freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_match.html b/doc/html/pcre2_jit_match.html new file mode 100644 index 0000000..56144ff --- /dev/null +++ b/doc/html/pcre2_jit_match.html @@ -0,0 +1,70 @@ + + +pcre2_jit_match specification + + +

pcre2_jit_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression that has been successfully +processed by the JIT compiler against a given subject string, using a matching +algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and +it bypasses some of the sanity checks that pcre2_match() applies. +

+

+In UTF mode, the subject string is not checked for UTF validity. Unless +PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an +invalid UTF string results in undefined behaviour. Your program may crash or +loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should only call pcre2_jit_match() in UTF mode if you are sure the +subject is valid. +

+

+The arguments for pcre2_jit_match() are exactly the same as for +pcre2_match(), +except that the subject string must be specified with a length; +PCRE2_ZERO_TERMINATED is not supported. +

+

+The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported +options are ignored. +

+

+The return values are the same as for pcre2_match() plus +PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested +that was not compiled. For details of partial matching, see the +pcre2partial +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the JIT API in the +pcre2jit +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_stack_assign.html b/doc/html/pcre2_jit_stack_assign.html new file mode 100644 index 0000000..4b3abb9 --- /dev/null +++ b/doc/html/pcre2_jit_stack_assign.html @@ -0,0 +1,75 @@ + + +pcre2_jit_stack_assign specification + + +

pcre2_jit_stack_assign man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +

+
+DESCRIPTION +
+

+This function provides control over the memory used by JIT as a run-time stack +when pcre2_match() or pcre2_jit_match() is called with a pattern +that has been successfully processed by the JIT compiler. The information that +determines which stack is used is put into a match context that is subsequently +passed to a matching function. The arguments of this function are: +

+  mcontext       a pointer to a match context
+  callback       a callback function
+  callback_data  a JIT stack or a value to be passed to the callback
+
+

+

+If mcontext is NULL, the function returns immediately, without doing +anything. +

+

+If callback is NULL and callback_data is NULL, an internal 32KiB +block on the machine stack is used. +

+

+If callback is NULL and callback_data is not NULL, +callback_data must be a valid JIT stack, the result of calling +pcre2_jit_stack_create(). +

+

+If callback not NULL, it is called with callback_data as an +argument at the start of matching, in order to set up a JIT stack. If the +result is NULL, the internal 32KiB stack is used; otherwise the return value +must be a valid JIT stack, the result of calling +pcre2_jit_stack_create(). +

+

+You may safely use the same JIT stack for multiple patterns, as long as they +are all matched in the same thread. In a multithread application, each thread +must use its own JIT stack. For more details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_stack_create.html b/doc/html/pcre2_jit_stack_create.html new file mode 100644 index 0000000..b9dc59d --- /dev/null +++ b/doc/html/pcre2_jit_stack_create.html @@ -0,0 +1,50 @@ + + +pcre2_jit_stack_create specification + + +

pcre2_jit_stack_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function is used to create a stack for use by the code compiled by the JIT +compiler. The first two arguments are a starting size for the stack, and a +maximum size to which it is allowed to grow. The final argument is a general +context, for memory allocation functions, or NULL for standard memory +allocation. The result can be passed to the JIT run-time code by calling +pcre2_jit_stack_assign() to associate the stack with a compiled pattern, +which can then be processed by pcre2_match() or pcre2_jit_match(). +A maximum stack size of 512KiB to 1MiB should be more than enough for any +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_jit_stack_free.html b/doc/html/pcre2_jit_stack_free.html new file mode 100644 index 0000000..1d078d7 --- /dev/null +++ b/doc/html/pcre2_jit_stack_free.html @@ -0,0 +1,43 @@ + + +pcre2_jit_stack_free specification + + +

pcre2_jit_stack_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+
+DESCRIPTION +
+

+This function is used to free a JIT stack that was created by +pcre2_jit_stack_create() when it is no longer needed. If the argument is +NULL, the function returns immediately without doing anything. For more +details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_maketables.html b/doc/html/pcre2_maketables.html new file mode 100644 index 0000000..1963654 --- /dev/null +++ b/doc/html/pcre2_maketables.html @@ -0,0 +1,48 @@ + + +pcre2_maketables specification + + +

pcre2_maketables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function builds a set of character tables for character code points that +are less than 256. These can be passed to pcre2_compile() in a compile +context in order to override the internal, built-in tables (which were either +defaulted or made by pcre2_maketables() when PCRE2 was compiled). See the +pcre2_set_character_tables() +page. You might want to do this if you are using a non-standard locale. +

+

+If the argument is NULL, malloc() is used to get memory for the tables. +Otherwise it must point to a general context, which can supply pointers to a +custom memory manager. The function yields a pointer to the tables. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_maketables_free.html b/doc/html/pcre2_maketables_free.html new file mode 100644 index 0000000..7316ab2 --- /dev/null +++ b/doc/html/pcre2_maketables_free.html @@ -0,0 +1,44 @@ + + +pcre2_maketables_free specification + + +

pcre2_maketables_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+
+DESCRIPTION +
+

+This function discards a set of character tables that were created by a call +to +pcre2_maketables(). +

+

+The gcontext parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system free() is used. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match.html b/doc/html/pcre2_match.html new file mode 100644 index 0000000..5584ae3 --- /dev/null +++ b/doc/html/pcre2_match.html @@ -0,0 +1,87 @@ + + +pcre2_match specification + + +

pcre2_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It returns +offsets to what it has matched and to captured substrings via the +match_data block, which can be processed by functions with names that +start with pcre2_get_ovector_...() or pcre2_substring_...(). The +return from pcre2_match() is one more than the highest numbered capturing +pair that has been set (for example, 1 if there are no captures), zero if the +vector of offsets is too small, or a negative error code for no match and other +errors. The function arguments are: +

+  code         Points to the compiled pattern
+  subject      Points to the subject string
+  length       Length of the subject string
+  startoffset  Offset in the subject at which to start matching
+  options      Option bits
+  match_data   Points to a match data block, for results
+  mcontext     Points to a match context, or is NULL
+
+A match context is needed only if you want to: +
+  Set up a callout function
+  Set a matching offset limit
+  Change the heap memory limit
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management specifically for the match
+
+The length and startoffset values are code units, not characters. +The length may be given as PCRE2_ZERO_TERMINATED for a subject that is +terminated by a binary zero code unit. The options are: +
+  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+                          Only useful in rare cases; use with care
+  PCRE2_ENDANCHORED       Pattern can match only at end of subject
+  PCRE2_NOTBOL            Subject string is not the beginning of a line
+  PCRE2_NOTEOL            Subject string is not the end of a line
+  PCRE2_NOTEMPTY          An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT            Do not use JIT matching
+  PCRE2_NO_UTF_CHECK      Do not check the subject for UTF validity (only relevant if PCRE2_UTF
+                           was set at compile time)
+  PCRE2_PARTIAL_HARD      Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
+  PCRE2_PARTIAL_SOFT      Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
+
+For details of partial matching, see the +pcre2partial +page. There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_context_copy.html b/doc/html/pcre2_match_context_copy.html new file mode 100644 index 0000000..4a719d6 --- /dev/null +++ b/doc/html/pcre2_match_context_copy.html @@ -0,0 +1,41 @@ + + +pcre2_match_context_copy specification + + +

pcre2_match_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a match context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_context_create.html b/doc/html/pcre2_match_context_create.html new file mode 100644 index 0000000..f7f2735 --- /dev/null +++ b/doc/html/pcre2_match_context_create.html @@ -0,0 +1,42 @@ + + +pcre2_match_context_create specification + + +

pcre2_match_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates and initializes a new match context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_context_free.html b/doc/html/pcre2_match_context_free.html new file mode 100644 index 0000000..7f00ea9 --- /dev/null +++ b/doc/html/pcre2_match_context_free.html @@ -0,0 +1,41 @@ + + +pcre2_match_context_free specification + + +

pcre2_match_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_match_context_free(pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a match context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_data_create.html b/doc/html/pcre2_match_data_create.html new file mode 100644 index 0000000..c26c3b3 --- /dev/null +++ b/doc/html/pcre2_match_data_create.html @@ -0,0 +1,50 @@ + + +pcre2_match_data_create specification + + +

pcre2_match_data_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates a new match data block, which is used for holding the +result of a match. The first argument specifies the number of pairs of offsets +that are required. These form the "output vector" (ovector) within the match +data block, and are used to identify the matched string and any captured +substrings when matching with pcre2_match(), or a number of different +matches at the same point when used with pcre2_dfa_match(). There is +always one pair of offsets; if ovecsize is zero, it is treated as one. +

+

+The second argument points to a general context, for custom memory management, +or is NULL for system memory management. The result of the function is NULL if +the memory for the block could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_data_create_from_pattern.html b/doc/html/pcre2_match_data_create_from_pattern.html new file mode 100644 index 0000000..db58ab9 --- /dev/null +++ b/doc/html/pcre2_match_data_create_from_pattern.html @@ -0,0 +1,53 @@ + + +pcre2_match_data_create_from_pattern specification + + +

pcre2_match_data_create_from_pattern man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates a new match data block for holding the result of a match. +The first argument points to a compiled pattern. The number of capturing +parentheses within the pattern is used to compute the number of pairs of +offsets that are required in the match data block. These form the "output +vector" (ovector) within the match data block, and are used to identify the +matched string and any captured substrings when matching with +pcre2_match(). If you are using pcre2_dfa_match(), which uses the +output vector in a different way, you should use pcre2_match_data_create() +instead of this function. +

+

+The second argument points to a general context, for custom memory management, +or is NULL to use the same memory allocator as was used for the compiled +pattern. The result of the function is NULL if the memory for the block could +not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_match_data_free.html b/doc/html/pcre2_match_data_free.html new file mode 100644 index 0000000..1c2520b --- /dev/null +++ b/doc/html/pcre2_match_data_free.html @@ -0,0 +1,48 @@ + + +pcre2_match_data_free specification + + +

pcre2_match_data_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_match_data_free(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+If match_data is NULL, this function does nothing. Otherwise, +match_data must point to a match data block, which this function frees, +using the memory freeing function from the general context or compiled pattern +with which it was created, or free() if that was not set. If the match +data block was previously passed to pcre2_match(), it will have an +attached heapframe vector; this is also freed. +

+

+If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +match data block, the copy of the subject that was referenced within the block +is also freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_pattern_convert.html b/doc/html/pcre2_pattern_convert.html new file mode 100644 index 0000000..2fcd7cc --- /dev/null +++ b/doc/html/pcre2_pattern_convert.html @@ -0,0 +1,70 @@ + + +pcre2_pattern_convert specification + + +

pcre2_pattern_convert man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It converts a foreign pattern (for example, a glob) into a PCRE2 regular +expression pattern. Its arguments are: +

+  pattern     The foreign pattern
+  length      The length of the input pattern or PCRE2_ZERO_TERMINATED
+  options     Option bits
+  buffer      Pointer to pointer to output buffer, or NULL
+  blength     Pointer to output length field
+  cvcontext   Pointer to a convert context or NULL
+
+The length of the converted pattern (excluding the terminating zero) is +returned via blength. If buffer is NULL, the function just returns +the output length. If buffer points to a NULL pointer, heap memory is +obtained for the converted pattern, using the allocator in the context if +present (or else malloc()), and the field pointed to by buffer is +updated. If buffer points to a non-NULL field, that must point to a +buffer whose size is in the variable pointed to by blength. This value is +updated. +

+

+The option bits are: +

+  PCRE2_CONVERT_UTF                     Input is UTF
+  PCRE2_CONVERT_NO_UTF_CHECK            Do not check UTF validity
+  PCRE2_CONVERT_POSIX_BASIC             Convert POSIX basic pattern
+  PCRE2_CONVERT_POSIX_EXTENDED          Convert POSIX extended pattern
+  PCRE2_CONVERT_GLOB                    ) Convert
+  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR  )   various types
+  PCRE2_CONVERT_GLOB_NO_STARSTAR        )     of glob
+
+The return value from pcre2_pattern_convert() is zero on success or a +non-zero PCRE2 error code. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_pattern_info.html b/doc/html/pcre2_pattern_info.html new file mode 100644 index 0000000..eaaac6c --- /dev/null +++ b/doc/html/pcre2_pattern_info.html @@ -0,0 +1,109 @@ + + +pcre2_pattern_info specification + + +

pcre2_pattern_info man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); +

+
+DESCRIPTION +
+

+This function returns information about a compiled pattern. Its arguments are: +

+  code     Pointer to a compiled regular expression pattern
+  what     What information is required
+  where    Where to put the information
+
+The recognized values for the what argument, and the information they +request are as follows: +
+  PCRE2_INFO_ALLOPTIONS      Final options after compiling
+  PCRE2_INFO_ARGOPTIONS      Options passed to pcre2_compile()
+  PCRE2_INFO_BACKREFMAX      Number of highest backreference
+  PCRE2_INFO_BSR             What \R matches:
+                               PCRE2_BSR_UNICODE: Unicode line endings
+                               PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
+  PCRE2_INFO_CAPTURECOUNT    Number of capturing subpatterns
+  PCRE2_INFO_DEPTHLIMIT      Backtracking depth limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_EXTRAOPTIONS    Extra options that were passed in the
+                               compile context
+  PCRE2_INFO_FIRSTBITMAP     Bitmap of first code units, or NULL
+  PCRE2_INFO_FIRSTCODETYPE   Type of start-of-match information
+                               0 nothing set
+                               1 first code unit is set
+                               2 start of string or after newline
+  PCRE2_INFO_FIRSTCODEUNIT   First code unit when type is 1
+  PCRE2_INFO_FRAMESIZE       Size of backtracking frame
+  PCRE2_INFO_HASBACKSLASHC   Return 1 if pattern contains \C
+  PCRE2_INFO_HASCRORLF       Return 1 if explicit CR or LF matches exist in the pattern
+  PCRE2_INFO_HEAPLIMIT       Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
+  PCRE2_INFO_JITSIZE         Size of JIT compiled code, or 0
+  PCRE2_INFO_LASTCODETYPE    Type of must-be-present information
+                               0 nothing set
+                               1 code unit is set
+  PCRE2_INFO_LASTCODEUNIT    Last code unit when type is 1
+  PCRE2_INFO_MATCHEMPTY      1 if the pattern can match an empty string, 0 otherwise
+  PCRE2_INFO_MATCHLIMIT      Match limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_MAXLOOKBEHIND   Length (in characters) of the longest lookbehind assertion
+  PCRE2_INFO_MINLENGTH       Lower bound length of matching strings
+  PCRE2_INFO_NAMECOUNT       Number of named subpatterns
+  PCRE2_INFO_NAMEENTRYSIZE   Size of name table entries
+  PCRE2_INFO_NAMETABLE       Pointer to name table
+  PCRE2_CONFIG_NEWLINE       Code for the newline sequence:
+                               PCRE2_NEWLINE_CR
+                               PCRE2_NEWLINE_LF
+                               PCRE2_NEWLINE_CRLF
+                               PCRE2_NEWLINE_ANY
+                               PCRE2_NEWLINE_ANYCRLF
+                               PCRE2_NEWLINE_NUL
+  PCRE2_INFO_RECURSIONLIMIT  Obsolete synonym for PCRE2_INFO_DEPTHLIMIT
+  PCRE2_INFO_SIZE            Size of compiled pattern
+
+If where is NULL, the function returns the amount of memory needed for +the requested information, in bytes. Otherwise, the where argument must +point to an unsigned 32-bit integer (uint32_t variable), except for the +following what values, when it must point to a variable of the type +shown: +
+  PCRE2_INFO_FIRSTBITMAP     const uint8_t *
+  PCRE2_INFO_JITSIZE         size_t
+  PCRE2_INFO_NAMETABLE       PCRE2_SPTR
+  PCRE2_INFO_SIZE            size_t
+
+The yield of the function is zero on success or: +
+  PCRE2_ERROR_NULL           the argument code is NULL
+  PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+  PCRE2_ERROR_BADOPTION      the value of what is invalid
+  PCRE2_ERROR_BADMODE        the pattern was compiled in the wrong mode
+  PCRE2_ERROR_UNSET          the requested information is not set
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_serialize_decode.html b/doc/html/pcre2_serialize_decode.html new file mode 100644 index 0000000..618ffa9 --- /dev/null +++ b/doc/html/pcre2_serialize_decode.html @@ -0,0 +1,65 @@ + + +pcre2_serialize_decode specification + + +

pcre2_serialize_decode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function decodes a serialized set of compiled patterns back into a list of +individual patterns. This is possible only on a host that is running the same +version of PCRE2, with the same code unit width, and the host must also have +the same endianness, pointer width and PCRE2_SIZE type. The arguments for +pcre2_serialize_decode() are: +

+  codes            pointer to a vector in which to build the list
+  number_of_codes  number of slots in the vector
+  bytes            the serialized byte stream
+  gcontext         pointer to a general context or NULL
+
+The bytes argument must point to a block of data that was originally +created by pcre2_serialize_encode(), though it may have been saved on +disc or elsewhere in the meantime. If there are more codes in the serialized +data than slots in the list, only those compiled patterns that will fit are +decoded. The yield of the function is the number of decoded patterns, or one of +the following negative error codes: +
+  PCRE2_ERROR_BADDATA   number_of_codes is zero or less
+  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in bytes
+  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
+  PCRE2_ERROR_NULL      codes or bytes is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_serialize_encode.html b/doc/html/pcre2_serialize_encode.html new file mode 100644 index 0000000..f153270 --- /dev/null +++ b/doc/html/pcre2_serialize_encode.html @@ -0,0 +1,66 @@ + + +pcre2_serialize_encode specification + + +

pcre2_serialize_encode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function encodes a list of compiled patterns into a byte stream that can +be saved on disc or elsewhere. Note that this is not an abstract format like +Java or .NET. Conversion of the byte stream back into usable compiled patterns +can only happen on a host that is running the same version of PCRE2, with the +same code unit width, and the host must also have the same endianness, pointer +width and PCRE2_SIZE type. The arguments for pcre2_serialize_encode() +are: +

+  codes             pointer to a vector containing the list
+  number_of_codes   number of slots in the vector
+  serialized_bytes  set to point to the serialized byte stream
+  serialized_size   set to the number of bytes in the byte stream
+  gcontext          pointer to a general context or NULL
+
+The context argument is used to obtain memory for the byte stream. When the +serialized data is no longer needed, it must be freed by calling +pcre2_serialize_free(). The yield of the function is the number of +serialized patterns, or one of the following negative error codes: +
+  PCRE2_ERROR_BADDATA      number_of_codes is zero or less
+  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
+  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
+  PCRE2_ERROR_NULL         an argument other than gcontext is NULL
+
+PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_serialize_free.html b/doc/html/pcre2_serialize_free.html new file mode 100644 index 0000000..26b435b --- /dev/null +++ b/doc/html/pcre2_serialize_free.html @@ -0,0 +1,41 @@ + + +pcre2_serialize_free specification + + +

pcre2_serialize_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_serialize_free(uint8_t *bytes); +

+
+DESCRIPTION +
+

+This function frees the memory that was obtained by +pcre2_serialize_encode() to hold a serialized byte stream. The argument +must point to such a byte stream or be NULL, in which case the function returns +without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_serialize_get_number_of_codes.html b/doc/html/pcre2_serialize_get_number_of_codes.html new file mode 100644 index 0000000..fdd2429 --- /dev/null +++ b/doc/html/pcre2_serialize_get_number_of_codes.html @@ -0,0 +1,49 @@ + + +pcre2_serialize_get_number_of_codes specification + + +

pcre2_serialize_get_number_of_codes man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +

+
+DESCRIPTION +
+

+The bytes argument must point to a serialized byte stream that was +originally created by pcre2_serialize_encode() (though it may have been +saved on disc or elsewhere in the meantime). The function returns the number of +serialized patterns in the byte stream, or one of the following negative error +codes: +

+  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in bytes
+  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
+  PCRE2_ERROR_NULL      the argument is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_bsr.html b/doc/html/pcre2_set_bsr.html new file mode 100644 index 0000000..8a62f18 --- /dev/null +++ b/doc/html/pcre2_set_bsr.html @@ -0,0 +1,42 @@ + + +pcre2_set_bsr specification + + +

pcre2_set_bsr man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the convention for processing \R within a compile context. +The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The +result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_callout.html b/doc/html/pcre2_set_callout.html new file mode 100644 index 0000000..4e7aca6 --- /dev/null +++ b/doc/html/pcre2_set_callout.html @@ -0,0 +1,43 @@ + + +pcre2_set_callout specification + + +

pcre2_set_callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function sets the callout fields in a match context (the first argument). +The second argument specifies a callout function, and the third argument is an +opaque data item that is passed to it. The result of this function is always +zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_character_tables.html b/doc/html/pcre2_set_character_tables.html new file mode 100644 index 0000000..8564eea --- /dev/null +++ b/doc/html/pcre2_set_character_tables.html @@ -0,0 +1,45 @@ + + +pcre2_set_character_tables specification + + +

pcre2_set_character_tables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +

+
+DESCRIPTION +
+

+This function sets a pointer to custom character tables within a compile +context. The second argument must point to a set of PCRE2 character tables or +be NULL to request the default tables. The result is always zero. Character +tables can be created by calling pcre2_maketables() or by running the +pcre2_dftables maintenance command in binary mode (see the +pcre2build +documentation). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_compile_extra_options.html b/doc/html/pcre2_set_compile_extra_options.html new file mode 100644 index 0000000..4924ed7 --- /dev/null +++ b/doc/html/pcre2_set_compile_extra_options.html @@ -0,0 +1,54 @@ + + +pcre2_set_compile_extra_options specification + + +

pcre2_set_compile_extra_options man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +

+
+DESCRIPTION +
+

+This function sets additional option bits for pcre2_compile() that are +housed in a compile context. It completely replaces all the bits. The extra +options are: +

+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
+  PCRE2_EXTRA_ASCII_BSD                \d remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSS                \s remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSW                \w remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_DIGIT              [:digit:] and [:xdigit:] POSIX classes remain ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_POSIX              POSIX classes remain ASCII in UCP mode
+  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
+  PCRE2_EXTRA_CASELESS_RESTRICT        Disable mixed ASCII/non-ASCII case folding
+  PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
+  PCRE2_EXTRA_MATCH_LINE               Pattern matches whole lines
+  PCRE2_EXTRA_MATCH_WORD               Pattern matches "words"
+
+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_compile_recursion_guard.html b/doc/html/pcre2_set_compile_recursion_guard.html new file mode 100644 index 0000000..c09942c --- /dev/null +++ b/doc/html/pcre2_set_compile_recursion_guard.html @@ -0,0 +1,46 @@ + + +pcre2_set_compile_recursion_guard specification + + +

pcre2_set_compile_recursion_guard man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +

+
+DESCRIPTION +
+

+This function defines, within a compile context, a function that is called +whenever pcre2_compile() starts to compile a parenthesized part of a +pattern. The first argument to the function gives the current depth of +parenthesis nesting, and the second is user data that is supplied when the +function is set up. The callout function should return zero if all is well, or +non-zero to force an error. This feature is provided so that applications can +check the available system stack space, in order to avoid running out. The +result of pcre2_set_compile_recursion_guard() is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_depth_limit.html b/doc/html/pcre2_set_depth_limit.html new file mode 100644 index 0000000..a1cf706 --- /dev/null +++ b/doc/html/pcre2_set_depth_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_depth_limit specification + + +

pcre2_set_depth_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the backtracking depth limit field in a match context. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_glob_escape.html b/doc/html/pcre2_set_glob_escape.html new file mode 100644 index 0000000..2b55627 --- /dev/null +++ b/doc/html/pcre2_set_glob_escape.html @@ -0,0 +1,43 @@ + + +pcre2_set_glob_escape specification + + +

pcre2_set_glob_escape man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It sets the escape character that is used when converting globs. The second +argument must either be zero (meaning there is no escape character) or a +punctuation character whose code point is less than 256. The default is grave +accent if running under Windows, otherwise backslash. The result of the +function is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_glob_separator.html b/doc/html/pcre2_set_glob_separator.html new file mode 100644 index 0000000..283648e --- /dev/null +++ b/doc/html/pcre2_set_glob_separator.html @@ -0,0 +1,42 @@ + + +pcre2_set_glob_separator specification + + +

pcre2_set_glob_separator man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It sets the component separator character that is used when converting globs. +The second argument must be one of the characters forward slash, backslash, or +dot. The default is backslash when running under Windows, otherwise forward +slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if +the second argument is invalid. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_heap_limit.html b/doc/html/pcre2_set_heap_limit.html new file mode 100644 index 0000000..3631ef6 --- /dev/null +++ b/doc/html/pcre2_set_heap_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_heap_limit specification + + +

pcre2_set_heap_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the backtracking heap limit field in a match context. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_match_limit.html b/doc/html/pcre2_set_match_limit.html new file mode 100644 index 0000000..e840c74 --- /dev/null +++ b/doc/html/pcre2_set_match_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_match_limit specification + + +

pcre2_set_match_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the match limit field in a match context. The result is +always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_max_pattern_compiled_length.html b/doc/html/pcre2_set_max_pattern_compiled_length.html new file mode 100644 index 0000000..ab570cf --- /dev/null +++ b/doc/html/pcre2_set_max_pattern_compiled_length.html @@ -0,0 +1,44 @@ + + +pcre2_set_max_pattern_compiled_length specification + + +

pcre2_set_max_pattern_compiled_length man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum size (in bytes) for the +memory needed to hold the compiled version of a pattern that is compiled with +this context. The result is always zero. If a pattern that is passed to +pcre2_compile() with this context needs more memory, an error is +generated. The default is the largest number that a PCRE2_SIZE variable can +hold, which is effectively unlimited. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_max_pattern_length.html b/doc/html/pcre2_set_max_pattern_length.html new file mode 100644 index 0000000..f6e422a --- /dev/null +++ b/doc/html/pcre2_set_max_pattern_length.html @@ -0,0 +1,43 @@ + + +pcre2_set_max_pattern_length specification + + +

pcre2_set_max_pattern_length man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to pcre2_compile() there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_max_varlookbehind.html b/doc/html/pcre2_set_max_varlookbehind.html new file mode 100644 index 0000000..1c03def --- /dev/null +++ b/doc/html/pcre2_set_max_varlookbehind.html @@ -0,0 +1,42 @@ + + +pcre2_set_max_varlookbehind specification + + +

pcre2_set_max_varlookbehind man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. The result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_newline.html b/doc/html/pcre2_set_newline.html new file mode 100644 index 0000000..ba81300 --- /dev/null +++ b/doc/html/pcre2_set_newline.html @@ -0,0 +1,51 @@ + + +pcre2_set_newline specification + + +

pcre2_set_newline man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the newline convention within a compile context. This +specifies which character(s) are recognized as newlines when compiling and +matching patterns. The second argument must be one of: +

+  PCRE2_NEWLINE_CR        Carriage return only
+  PCRE2_NEWLINE_LF        Linefeed only
+  PCRE2_NEWLINE_CRLF      CR followed by LF only
+  PCRE2_NEWLINE_ANYCRLF   Any of the above
+  PCRE2_NEWLINE_ANY       Any Unicode newline sequence
+  PCRE2_NEWLINE_NUL       The NUL character (binary zero)
+
+The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_offset_limit.html b/doc/html/pcre2_set_offset_limit.html new file mode 100644 index 0000000..6d9a85c --- /dev/null +++ b/doc/html/pcre2_set_offset_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_offset_limit specification + + +

pcre2_set_offset_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets the offset limit field in a match context. The result is +always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_parens_nest_limit.html b/doc/html/pcre2_set_parens_nest_limit.html new file mode 100644 index 0000000..95fd31c --- /dev/null +++ b/doc/html/pcre2_set_parens_nest_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_parens_nest_limit specification + + +

pcre2_set_parens_nest_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum depth of nested +parentheses in a pattern. The result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_recursion_limit.html b/doc/html/pcre2_set_recursion_limit.html new file mode 100644 index 0000000..9ff68c2 --- /dev/null +++ b/doc/html/pcre2_set_recursion_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_recursion_limit specification + + +

pcre2_set_recursion_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function is obsolete and should not be used in new code. Use +pcre2_set_depth_limit() instead. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_recursion_memory_management.html b/doc/html/pcre2_set_recursion_memory_management.html new file mode 100644 index 0000000..37af73c --- /dev/null +++ b/doc/html/pcre2_set_recursion_memory_management.html @@ -0,0 +1,42 @@ + + +pcre2_set_recursion_memory_management specification + + +

pcre2_set_recursion_memory_management man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +

+
+DESCRIPTION +
+

+From release 10.30 onwards, this function is obsolete and does nothing. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_set_substitute_callout.html b/doc/html/pcre2_set_substitute_callout.html new file mode 100644 index 0000000..7ae3a39 --- /dev/null +++ b/doc/html/pcre2_set_substitute_callout.html @@ -0,0 +1,43 @@ + + +pcre2_set_substitute_callout specification + + +

pcre2_set_substitute_callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function sets the substitute callout fields in a match context (the first +argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substitute.html b/doc/html/pcre2_substitute.html new file mode 100644 index 0000000..abf0a70 --- /dev/null +++ b/doc/html/pcre2_substitute.html @@ -0,0 +1,111 @@ + + +pcre2_substitute specification + + +

pcre2_substitute man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It then makes a +copy of the subject, substituting a replacement string for what was matched. +Its arguments are: +

+  code          Points to the compiled pattern
+  subject       Points to the subject string
+  length        Length of the subject string
+  startoffset   Offset in the subject at which to start matching
+  options       Option bits
+  match_data    Points to a match data block, or is NULL
+  mcontext      Points to a match context, or is NULL
+  replacement   Points to the replacement string
+  rlength       Length of the replacement string
+  outputbuffer  Points to the output buffer
+  outlengthptr  Points to the length of the output buffer
+
+A match data block is needed only if you want to inspect the data from the +final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is +set. A match context is needed only if you want to: +
+  Set up a callout function
+  Set a matching offset limit
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management in the match context
+
+The length, startoffset and rlength values are code units, +not characters, as is the contents of the variable pointed at by +outlengthptr. This variable must contain the length of the output buffer +when the function is called. If the function is successful, the value is +changed to the length of the new string, excluding the trailing zero that is +automatically added. +

+

+The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for +zero-terminated strings. The options are: +

+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
+  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
+
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, +PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. +

+

+If PCRE2_SUBSTITUTE_MATCHED is set, match_data must be non-NULL; its +contents must be the result of a call to pcre2_match() using the same +pattern and subject. +

+

+The function returns the number of substitutions, which may be zero if there +are no matches. The result may be greater than one only when +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_copy_byname.html b/doc/html/pcre2_substring_copy_byname.html new file mode 100644 index 0000000..fd01805 --- /dev/null +++ b/doc/html/pcre2_substring_copy_byname.html @@ -0,0 +1,58 @@ + + +pcre2_substring_copy_byname specification + + +

pcre2_substring_copy_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring, identified +by name, into a given buffer. The arguments are: +

+  match_data    The match data block for the match
+  name          Name of the required substring
+  buffer        Buffer to receive the string
+  bufflen       Length of buffer (code units)
+
+The bufflen variable is updated to contain the length of the extracted +string, excluding the trailing zero. The yield of the function is zero for +success or one of the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that name
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      the buffer is not big enough
+
+If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_copy_bynumber.html b/doc/html/pcre2_substring_copy_bynumber.html new file mode 100644 index 0000000..83e1a27 --- /dev/null +++ b/doc/html/pcre2_substring_copy_bynumber.html @@ -0,0 +1,57 @@ + + +pcre2_substring_copy_bynumber specification + + +

pcre2_substring_copy_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring into a given +buffer. The arguments are: +

+  match_data    The match data block for the match
+  number        Number of the required substring
+  buffer        Buffer to receive the string
+  bufflen       Length of buffer
+
+The bufflen variable is updated with the length of the extracted string, +excluding the terminating zero. The yield of the function is zero for success +or one of the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that number
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      the buffer is too small
+
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_free.html b/doc/html/pcre2_substring_free.html new file mode 100644 index 0000000..e0d0fbd --- /dev/null +++ b/doc/html/pcre2_substring_free.html @@ -0,0 +1,41 @@ + + +pcre2_substring_free specification + + +

pcre2_substring_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+
+DESCRIPTION +
+

+This is a convenience function for freeing the memory obtained by a previous +call to pcre2_substring_get_byname() or +pcre2_substring_get_bynumber(). Its only argument is a pointer to the +string. If the argument is NULL, the function does nothing. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_get_byname.html b/doc/html/pcre2_substring_get_byname.html new file mode 100644 index 0000000..a4b8771 --- /dev/null +++ b/doc/html/pcre2_substring_get_byname.html @@ -0,0 +1,60 @@ + + +pcre2_substring_get_byname specification + + +

pcre2_substring_get_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring by name into +newly acquired memory. The arguments are: +

+  match_data    The match data for the match
+  name          Name of the required substring
+  bufferptr     Where to put the string pointer
+  bufflen       Where to put the string length
+
+The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function pcre2_substring_free() can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that name
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      memory could not be obtained
+
+If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_get_bynumber.html b/doc/html/pcre2_substring_get_bynumber.html new file mode 100644 index 0000000..391bc82 --- /dev/null +++ b/doc/html/pcre2_substring_get_bynumber.html @@ -0,0 +1,58 @@ + + +pcre2_substring_get_bynumber specification + + +

pcre2_substring_get_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring by number +into newly acquired memory. The arguments are: +

+  match_data    The match data for the match
+  number        Number of the required substring
+  bufferptr     Where to put the string pointer
+  bufflen       Where to put the string length
+
+The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function pcre2_substring_free() can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that number
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      memory could not be obtained
+
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_length_byname.html b/doc/html/pcre2_substring_length_byname.html new file mode 100644 index 0000000..213bc94 --- /dev/null +++ b/doc/html/pcre2_substring_length_byname.html @@ -0,0 +1,46 @@ + + +pcre2_substring_length_byname specification + + +

pcre2_substring_length_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +

+
+DESCRIPTION +
+

+This function returns the length of a matched substring, identified by name. +The arguments are: +

+  match_data   The match data block for the match
+  name         The substring name
+  length       Where to return the length
+
+The yield is zero on success, or an error code if the substring is not found. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_length_bynumber.html b/doc/html/pcre2_substring_length_bynumber.html new file mode 100644 index 0000000..db01cca --- /dev/null +++ b/doc/html/pcre2_substring_length_bynumber.html @@ -0,0 +1,48 @@ + + +pcre2_substring_length_bynumber specification + + +

pcre2_substring_length_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +

+
+DESCRIPTION +
+

+This function returns the length of a matched substring, identified by number. +The arguments are: +

+  match_data   The match data block for the match
+  number       The substring number
+  length       Where to return the length, or NULL
+
+The third argument may be NULL if all you want to know is whether or not a +substring is set. The yield is zero on success, or a negative error code +otherwise. After a partial match, only substring 0 is available. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_list_free.html b/doc/html/pcre2_substring_list_free.html new file mode 100644 index 0000000..dea8bc5 --- /dev/null +++ b/doc/html/pcre2_substring_list_free.html @@ -0,0 +1,41 @@ + + +pcre2_substring_list_free specification + + +

pcre2_substring_list_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_substring_list_free(PCRE2_UCHAR **list); +

+
+DESCRIPTION +
+

+This is a convenience function for freeing the store obtained by a previous +call to pcre2substring_list_get(). Its only argument is a pointer to +the list of string pointers. If the argument is NULL, the function returns +immediately, without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_list_get.html b/doc/html/pcre2_substring_list_get.html new file mode 100644 index 0000000..fd43627 --- /dev/null +++ b/doc/html/pcre2_substring_list_get.html @@ -0,0 +1,56 @@ + + +pcre2_substring_list_get specification + + +

pcre2_substring_list_get man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting all the captured substrings after +a pattern match. It builds a list of pointers to the strings, and (optionally) +a second list that contains their lengths (in code units), excluding a +terminating zero that is added to each of them. All this is done in a single +block of memory that is obtained using the same memory allocation function that +was used to get the match data block. The convenience function +pcre2_substring_list_free() can be used to free it when it is no longer +needed. The arguments are: +

+  match_data    The match data block
+  listptr       Where to put a pointer to the list
+  lengthsptr    Where to put a pointer to the lengths, or NULL
+
+A pointer to a list of pointers is put in the variable whose address is in +listptr. The list is terminated by a NULL pointer. If lengthsptr is +not NULL, a matching list of lengths is created, and its address is placed in +lengthsptr. The yield of the function is zero on success or +PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_nametable_scan.html b/doc/html/pcre2_substring_nametable_scan.html new file mode 100644 index 0000000..277affa --- /dev/null +++ b/doc/html/pcre2_substring_nametable_scan.html @@ -0,0 +1,53 @@ + + +pcre2_substring_nametable_scan specification + + +

pcre2_substring_nametable_scan man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +

+
+DESCRIPTION +
+

+This convenience function finds, for a compiled pattern, the first and last +entries for a given name in the table that translates capture group names into +numbers. +

+  code    Compiled regular expression
+  name    Name whose entries required
+  first   Where to return a pointer to the first entry
+  last    Where to return a pointer to the last entry
+
+When the name is found in the table, if first is NULL, the function +returns a group number, but if there is more than one matching entry, it is not +defined which one. Otherwise, when both pointers have been set, the yield of +the function is the length of each entry in code units. If the name is not +found, PCRE2_ERROR_NOSUBSTRING is returned. +

+

+There is a complete description of the PCRE2 native API, including the format of +the table entries, in the +pcre2api +page, and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2_substring_number_from_name.html b/doc/html/pcre2_substring_number_from_name.html new file mode 100644 index 0000000..160fbda --- /dev/null +++ b/doc/html/pcre2_substring_number_from_name.html @@ -0,0 +1,50 @@ + + +pcre2_substring_number_from_name specification + + +

pcre2_substring_number_from_name man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +

+
+DESCRIPTION +
+

+This convenience function finds the number of a named substring capturing +parenthesis in a compiled pattern, provided that it is a unique name. The +function arguments are: +

+  code    Compiled regular expression
+  name    Name whose number is required
+
+The yield of the function is the number of the parenthesis if the name is +found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are +allowed (PCRE2_DUPNAMES is set), if the name is not unique, +PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers +with the same name by calling pcre2_substring_nametable_scan(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html new file mode 100644 index 0000000..6b60ee9 --- /dev/null +++ b/doc/html/pcre2api.html @@ -0,0 +1,4186 @@ + + +pcre2api specification + + +

pcre2api man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+

+#include <pcre2.h> +
+
+PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a +description of all its native functions. See the +pcre2 +document for an overview of all the PCRE2 documentation. +

+
PCRE2 NATIVE API BASIC FUNCTIONS
+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +
+
+void pcre2_code_free(pcre2_code *code); +
+
+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +
+
+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +
+
+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +
+
+void pcre2_match_data_free(pcre2_match_data *match_data); +

+
PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS
+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +
+
+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +
+
+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+
PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS
+

+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +
+
+void pcre2_general_context_free(pcre2_general_context *gcontext); +

+
PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
+

+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +
+
+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +
+
+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +
+
+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +
+
+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
+
+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +
+
+int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, +" uint32_t value); +
+
+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +

+
PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
+

+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +
+
+void pcre2_match_context_free(pcre2_match_context *mcontext); +
+
+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); +
+
+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
+
+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS
+

+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +
+
+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +
+
+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +
+
+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +
+
+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +
+
+void pcre2_substring_list_free(PCRE2_UCHAR **list); +
+
+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +

+
PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION
+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacementz, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+
PCRE2 NATIVE API JIT FUNCTIONS
+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +
+
+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +
+
+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +
+
+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +
+
+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+
PCRE2 NATIVE API SERIALIZATION FUNCTIONS
+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +
+
+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +
+
+void pcre2_serialize_free(uint8_t *bytes); +
+
+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +

+
PCRE2 NATIVE API AUXILIARY FUNCTIONS
+

+pcre2_code *pcre2_code_copy(const pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +
+
+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +
+
+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +
+
+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +
+
+int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); +
+
+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+int pcre2_config(uint32_t what, void *where); +

+
PCRE2 NATIVE API OBSOLETE FUNCTIONS
+

+int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+These functions became obsolete at release 10.30 and are retained only for +backward compatibility. They should not be used in new code. The first is +replaced by pcre2_set_depth_limit(); the second is no longer needed and +has no effect (it always returns zero). +

+
PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +
+
+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +
+
+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +
+
+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +
+
+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +
+
+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +
+
+These functions provide a way of converting non-PCRE2 patterns into +patterns that can be processed by pcre2_compile(). This facility is +experimental and may be changed in future releases. At present, "globs" and +POSIX basic and extended patterns can be converted. Details are given in the +pcre2convert +documentation. +

+
PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
+

+There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code +units, respectively. However, there is just one header file, pcre2.h. +This contains the function prototypes and other definitions for all three +libraries. One, two, or all three can be installed simultaneously. On Unix-like +systems the libraries are called libpcre2-8, libpcre2-16, and +libpcre2-32, and they can also co-exist with the original PCRE libraries. +Every PCRE2 function comes in three different forms, one for each library, for +example: +

+  pcre2_compile_8()
+  pcre2_compile_16()
+  pcre2_compile_32()
+
+There are also three different sets of data types: +
+  PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32
+  PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32
+
+The UCHAR types define unsigned code units of the appropriate widths. +For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. +The SPTR types are pointers to constants of the equivalent UCHAR types, +that is, they are pointers to vectors of unsigned code units. +

+

+Character strings are passed to a PCRE2 library as sequences of unsigned +integers in code units of the appropriate width. The length of a string may +be given as a number of code units, or the string may be specified as +zero-terminated. +

+

+Many applications use only one code unit width. For their convenience, macros +are defined whose names are the generic forms such as pcre2_compile() and +PCRE2_SPTR. These macros use the value of the macro PCRE2_CODE_UNIT_WIDTH to +generate the appropriate width-specific function and macro names. +PCRE2_CODE_UNIT_WIDTH is not defined by default. An application must define it +to be 8, 16, or 32 before including pcre2.h in order to make use of the +generic names. +

+

+Applications that use more than one code unit width can be linked with more +than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to be 0 before +including pcre2.h, and then use the real function names. Any code that is +to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is +unknown should also use the real function names. (Unfortunately, it is not +possible in C code to save and restore the value of a macro.) +

+

+If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a +compiler error occurs. +

+

+When using multiple libraries in an application, you must take care when +processing any particular pattern to use only functions from a single library. +For example, if you want to run a match using a pattern that was compiled with +pcre2_compile_16(), you must do so with pcre2_match_16(), not +pcre2_match_8() or pcre2_match_32(). +

+

+In the function summaries above, and in the rest of this document and other +PCRE2 documents, functions and data types are described using their generic +names, without the _8, _16, or _32 suffix. +

+
PCRE2 API OVERVIEW
+

+PCRE2 has its own native API, which is described in this document. There are +also some wrapper functions for the 8-bit library that correspond to the +POSIX regular expression API, but they do not give access to all the +functionality of PCRE2 and they are not thread-safe. They are described in the +pcre2posix +documentation. Both these APIs define a set of C function calls. +

+

+The native API C data types, function prototypes, option values, and error +codes are defined in the header file pcre2.h, which also contains +definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers +for the library. Applications can use these to include support for different +releases of PCRE2. +

+

+In a Windows environment, if you want to statically link an application program +against a non-dll PCRE2 library, you must define PCRE2_STATIC before including +pcre2.h. +

+

+The functions pcre2_compile() and pcre2_match() are used for +compiling and matching regular expressions in a Perl-compatible manner. A +sample program that demonstrates the simplest way of using them is provided in +the file called pcre2demo.c in the PCRE2 source distribution. A listing +of this program is given in the +pcre2demo +documentation, and the +pcre2sample +documentation describes how to compile and run it. +

+

+The compiling and matching functions recognize various options that are passed +as bits in an options argument. There are also some more complicated parameters +such as custom memory management functions and resource limits that are passed +in "contexts" (which are just memory blocks, described below). Simple +applications do not need to make use of contexts. +

+

+Just-in-time (JIT) compiler support is an optional feature of PCRE2 that can be +built in appropriate hardware environments. It greatly speeds up the matching +performance of many patterns. Programs can request that it be used if +available by calling pcre2_jit_compile() after a pattern has been +successfully compiled by pcre2_compile(). This does nothing if JIT +support is not available. +

+

+More complicated programs might need to make use of the specialist functions +pcre2_jit_stack_create(), pcre2_jit_stack_free(), and +pcre2_jit_stack_assign() in order to control the JIT code's memory usage. +

+

+JIT matching is automatically used by pcre2_match() if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance at the expense of less sanity +checking. The JIT-specific functions are discussed in the +pcre2jit +documentation. +

+

+A second matching function, pcre2_dfa_match(), which is not +Perl-compatible, is also provided. This uses a different algorithm for the +matching. The alternative algorithm finds all possible matches (at a given +point in the subject), and scans the subject just once (unless there are +lookaround assertions). However, this algorithm does not return captured +substrings. A description of the two matching algorithms and their advantages +and disadvantages is given in the +pcre2matching +documentation. There is no JIT support for pcre2_dfa_match(). +

+

+In addition to the main compiling and matching functions, there are convenience +functions for extracting captured substrings from a subject string that has +been matched by pcre2_match(). They are: +

+  pcre2_substring_copy_byname()
+  pcre2_substring_copy_bynumber()
+  pcre2_substring_get_byname()
+  pcre2_substring_get_bynumber()
+  pcre2_substring_list_get()
+  pcre2_substring_length_byname()
+  pcre2_substring_length_bynumber()
+  pcre2_substring_nametable_scan()
+  pcre2_substring_number_from_name()
+
+pcre2_substring_free() and pcre2_substring_list_free() are also +provided, to free memory used for extracted strings. If either of these +functions is called with a NULL argument, the function returns immediately +without doing anything. +

+

+The function pcre2_substitute() can be called to match a pattern and +return a copy of the subject string with substitutions for parts that were +matched. +

+

+Functions whose names begin with pcre2_serialize_ are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +

+

+Finally, there are functions for finding out information about a compiled +pattern (pcre2_pattern_info()) and about the configuration with which +PCRE2 was built (pcre2_config()). +

+

+Functions with names ending with _free() are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. +

+
STRING LENGTHS AND OFFSETS
+

+The PCRE2 API uses string lengths and offsets into strings of code units in +several places. These values are always of type PCRE2_SIZE, which is an +unsigned integer type, currently always defined as size_t. The largest +value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved +as a special indicator for zero-terminated strings and unset offsets. +Therefore, the longest string that can be handled is one less than this +maximum. Note that string lengths are always given in code units. Only in the +8-bit library is such a length the same as the number of bytes in the string. +

+
NEWLINES
+

+PCRE2 supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The Unicode newline sequences are the three just +mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +

+

+Each of the first three conventions is used by at least one operating system as +its standard newline sequence. When PCRE2 is built, a default can be specified. +If it is not, the default is set to LF, which is the Unix standard. However, +the newline convention can be changed by an application when calling +pcre2_compile(), or it can be specified by special text at the start of +the pattern itself; this overrides any other settings. See the +pcre2pattern +page for details of the special character sequences. +

+

+In the PCRE2 documentation the word "newline" is used to mean "the character or +pair of characters that indicate a line break". The choice of newline +convention affects the handling of the dot, circumflex, and dollar +metacharacters, the handling of #-comments in /x mode, and, when CRLF is a +recognized line ending sequence, the match position advancement for a +non-anchored pattern. There is more detail about this in the +section on pcre2_match() options +below. +

+

+The choice of newline convention does not affect the interpretation of +the \n or \r escape sequences, nor does it affect what \R matches; this has +its own separate convention. +

+
MULTITHREADING
+

+In a multithreaded application it is important to keep thread-specific data +separate from data that can be shared between threads. The PCRE2 library code +itself is thread-safe: it contains no static or global variables. The API is +designed to be fairly simple for non-threaded applications while at the same +time ensuring that multithreaded applications can use it. +

+

+There are several different blocks of data that are used to pass information +between the application and the PCRE2 libraries. +

+
+The compiled pattern +
+

+A pointer to the compiled form of a pattern is returned to the user when +pcre2_compile() is successful. The data in the compiled pattern is fixed, +and does not change when the pattern is matched. Therefore, it is thread-safe, +that is, the same compiled pattern can be used by more than one thread +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time (JIT) optimization feature is being used, it needs separate memory +stack areas for each thread. See the +pcre2jit +documentation for more details. +

+

+In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads. This +is somewhat tricky to do correctly. If you know that writing to a pointer is +atomic in your environment, you can use logic like this: +

+  Get a read-only (shared) lock (mutex) for pointer
+  if (pointer == NULL)
+    {
+    Get a write (unique) lock for pointer
+    if (pointer == NULL) pointer = pcre2_compile(...
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+
+Of course, testing for compilation errors should also be included in the code. +

+

+The reason for checking the pointer a second time is as follows: Several +threads may have acquired the shared lock and tested the pointer for being +NULL, but only one of them will be given the write lock, with the rest kept +waiting. The winning thread will compile the pattern and store the result. +After this thread releases the write lock, another thread will get it, and if +it does not retest pointer for being NULL, will recompile the pattern and +overwrite the pointer, creating a memory leak and possibly causing other +issues. +

+

+In an environment where writing to a pointer may not be atomic, the above logic +is not sufficient. The thread that is doing the compiling may be descheduled +after writing only part of the pointer, which could cause other threads to use +an invalid value. Instead of checking the pointer itself, a separate "pointer +is valid" flag (that can be updated atomically) must be used: +

+  Get a read-only (shared) lock (mutex) for pointer
+  if (!pointer_is_valid)
+    {
+    Get a write (unique) lock for pointer
+    if (!pointer_is_valid)
+      {
+      pointer = pcre2_compile(...
+      pointer_is_valid = TRUE
+      }
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+
+If JIT is being used, but the JIT compilation is not being done immediately +(perhaps waiting to see if the pattern is used often enough), similar logic is +required. JIT compilation updates a value within the compiled code block, so a +thread must gain unique write access to the pointer before calling +pcre2_jit_compile(). Alternatively, pcre2_code_copy() or +pcre2_code_copy_with_tables() can be used to obtain a private copy of the +compiled code before calling the JIT compiler. +

+
+Context blocks +
+

+The next main section below introduces the idea of "contexts" in which PCRE2 +functions are called. A context is nothing more than a collection of parameters +that control the way PCRE2 operates. Grouping a number of parameters together +in a context is a convenient way of passing them to a PCRE2 function without +using lots of arguments. The parameters that are stored in contexts are in some +sense "advanced features" of the API. Many straightforward applications will +not need to use contexts. +

+

+In a multithreaded application, if the parameters in a context are values that +are never changed, the same context can be used by all the threads. However, if +any thread needs to change any value in a context, it must make its own +thread-specific copy. +

+
+Match blocks +
+

+The matching functions need a block of memory for storing the results of a +match. This includes details of what was matched, as well as additional +information such as the name of a (*MARK) setting. Each thread must provide its +own copy of this memory. +

+
PCRE2 CONTEXTS
+

+Some PCRE2 functions have a lot of parameters, many of which are used only by +specialist applications, for example, those that use custom memory management +or non-standard character tables. To keep function argument lists at a +reasonable size, and at the same time to keep the API extensible, "uncommon" +parameters are passed to certain functions in a context instead of +directly. A context is just a block of memory that holds the parameter values. +Applications that do not need to adjust any of the context parameters can pass +NULL when a context pointer is required. +

+

+There are three different types of context: a general context that is relevant +for several PCRE2 operations, a compile-time context, and a match-time context. +

+
+The general context +
+

+At present, this context just contains pointers to (and data for) external +memory management functions that are called from several places in the PCRE2 +library. The context is named `general' rather than specifically `memory' +because in future other fields may be added. If you do not want to supply your +own custom memory management functions, you do not need to bother with a +general context. A general context is created by: +
+
+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+The two function pointers specify custom memory management functions, whose +prototypes are: +

+  void *private_malloc(PCRE2_SIZE, void *);
+  void  private_free(void *, void *);
+
+Whenever code in PCRE2 calls these functions, the final argument is the value +of memory_data. Either of the first two arguments of the creation +function may be NULL, in which case the system memory management functions +malloc() and free() are used. (This is not currently useful, as +there are no other fields in a general context, but in future there might be.) +The private_malloc() function is used (if supplied) to obtain memory for +storing the context, and all three values are saved as part of the context. +

+

+Whenever PCRE2 creates a data block of any kind, the block contains a pointer +to the free() function that matches the malloc() function that was +used. When the time comes to free the block, this function is called. +

+

+A general context can be copied by calling: +
+
+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +
+
+The memory used for a general context should be freed by calling: +
+
+void pcre2_general_context_free(pcre2_general_context *gcontext); +
+
+If this function is passed a NULL argument, it returns immediately without +doing anything. +

+
+The compile context +
+

+A compile context is required if you want to provide an external function for +stack checking during compilation or to change the default values of any of the +following compile-time parameters: +

+  What \R matches (Unicode newlines or CR, LF, CRLF only)
+  PCRE2's character tables
+  The newline character sequence
+  The compile time nested parentheses limit
+  The maximum length of the pattern string
+  The extra options bits (none set by default)
+
+A compile context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of +pcre2_compile(). +

+

+A compile context is created, copied, and freed by the following functions: +
+
+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +
+
+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +
+
+A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +
+
+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +
+
+The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line +ending sequence. The value is used by the JIT compiler and by the two +interpreted matching functions, pcre2_match() and +pcre2_dfa_match(). +
+
+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +
+
+The value must be the result of a call to pcre2_maketables(), whose only +argument is a general context. This function builds a set of character tables +in the current locale. +
+
+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +
+
+As PCRE2 has developed, almost all the 32 option bits that are available in +the options argument of pcre2_compile() have been used up. To avoid +running out, the compile context contains a set of extra option bits which are +used for some newer, assumed rarer, options. This function sets those bits. It +always sets all the bits (either on or off). It does not modify any existing +setting. The available options are defined in the section entitled "Extra +compile options" +below. +
+
+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
+
+This sets a maximum length, in code units, for any pattern string that is +compiled with this context. If the pattern is longer, an error is generated. +This facility is provided so that applications that accept patterns from +external sources can limit their size. The default is the largest number that a +PCRE2_SIZE variable can hold, which is effectively unlimited. +
+
+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +
+
+This sets a maximum size, in bytes, for the memory needed to hold the compiled +version of a pattern that is compiled with this context. If the pattern needs +more memory, an error is generated. This facility is provided so that +applications that accept patterns from external sources can limit the amount of +memory they use. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. +
+
+int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, +" uint32_t value); +
+
+This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. +
+
+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +
+
+This specifies which characters or character sequences are to be recognized as +newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), +PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the +NUL character, that is a binary zero). +

+

+A pattern can override the value set in the compile context by starting with a +sequence such as (*CRLF). See the +pcre2pattern +page for details. +

+

+When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE +option, the newline convention affects the recognition of the end of internal +comments starting with #. The value is saved with the compiled pattern for +subsequent use by the JIT compiler and by the two interpreted matching +functions, pcre2_match() and pcre2_dfa_match(). +
+
+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +
+
+This parameter adjusts the limit, set when PCRE2 is built (default 250), on the +depth of parenthesis nesting in a pattern. This limit stops rogue patterns +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. +
+
+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +
+
+There is at least one application that runs PCRE2 in threads with very limited +system stack, where running out of stack is to be avoided at all costs. The +parenthesis limit above cannot take account of how much stack is actually +available during compilation. For a finer control, you can supply a function +that is called whenever pcre2_compile() starts to compile a parenthesized +part of a pattern. This function can check the actual stack size (or anything +else that it wants to, of course). +

+

+The first argument to the callout function gives the current depth of +nesting, and the second is user data that is set up by the last argument of +pcre2_set_compile_recursion_guard(). The callout function should return +zero if all is well, or non-zero to force an error. +

+
+The match context +
+

+A match context is required if you want to: +

+  Set up a callout function
+  Set an offset limit for matching an unanchored pattern
+  Change the limit on the amount of heap used when matching
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management specifically for the match
+
+If none of these apply, just pass NULL as the context argument of +pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match(). +

+

+A match context is created, copied, and freed by the following functions: +
+
+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +
+
+void pcre2_match_context_free(pcre2_match_context *mcontext); +
+
+A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +
+
+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); +
+
+This sets up a callout function for PCRE2 to call at specified points +during a matching operation. Details are given in the +pcre2callout +documentation. +
+
+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+This sets up a callout function for PCRE2 to call after each substitution +made by pcre2_substitute(). Details are given in the section entitled +"Creating a new string with substitutions" +below. +
+
+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
+
+The offset_limit parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +pcre2_match() and pcre2_dfa_match() functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. The pcre2_substitute() function makes no more +substitutions. +

+

+For example, if the pattern /abc/ is matched against "123abc" with an offset +limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be +found if the startoffset argument of pcre2_match(), +pcre2_dfa_match(), or pcre2_substitute() is greater than the offset +limit set in the match context. +

+

+When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT option when +calling pcre2_compile() so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +

+

+The offset limit facility can be used to track progress when searching large +subject strings or to limit the extent of global substitutions. See also the +PCRE2_FIRSTLINE option, which requires a match to start before or at the first +newline that follows the start of matching in the subject. If this is set with +an offset limit, a match must occur in the first line and also within the +offset limit. In other words, whichever limit comes first is used. +
+
+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+The heap_limit parameter specifies, in units of kibibytes (1024 bytes), +the maximum amount of heap memory that pcre2_match() may use to hold +backtracking information when running an interpretive match. This limit also +applies to pcre2_dfa_match(), which may use the heap when processing +patterns with a lot of nested pattern recursion or lookarounds or atomic +groups. This limit does not apply to matching with the JIT optimization, which +has its own memory control arrangements (see the +pcre2jit +documentation for more details). If the limit is reached, the negative error +code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2 +is built; if it is not, the default is set very large and is essentially +unlimited. +

+

+A value for the heap limit may also be supplied by an item at the start of a +pattern of the form +

+  (*LIMIT_HEAP=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or, if no such +limit is set, less than the default. +

+

+The pcre2_match() function always needs some heap memory, so setting a +value of zero guarantees a "heap limit exceeded" error. Details of how +pcre2_match() uses the heap are given in the +pcre2perform +documentation. +

+

+For pcre2_dfa_match(), a vector on the system stack is used when +processing pattern recursions, lookarounds, or atomic groups, and only if this +is not big enough is heap memory used. In this case, setting a value of zero +disables the use of the heap. +
+
+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+The match_limit parameter provides a means of preventing PCRE2 from using +up too many computing resources when processing patterns that are not going to +match, but which have a very large number of possibilities in their search +trees. The classic example is a pattern that uses nested unlimited repeats. +

+

+There is an internal counter in pcre2_match() that is incremented each +time round its main matching loop. If this value reaches the match limit, +pcre2_match() returns the negative value PCRE2_ERROR_MATCHLIMIT. This has +the effect of limiting the amount of backtracking that can take place. For +patterns that are not anchored, the count restarts from zero for each position +in the subject string. This limit also applies to pcre2_dfa_match(), +though the counting is done in a different way. +

+

+When pcre2_match() is called with a pattern that was successfully +processed by pcre2_jit_compile(), the way in which matching is executed +is entirely different. However, there is still the possibility of runaway +matching that goes on for a very long time, and so the match_limit value +is also used in this case (but in a different way) to limit how long the +matching can continue. +

+

+The default value for the limit can be set when PCRE2 is built; the default is +10 million, which handles all but the most extreme cases. A value for the match +limit may also be supplied by an item at the start of a pattern of the form +

+  (*LIMIT_MATCH=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or +pcre2_dfa_match() or, if no such limit is set, less than the default. +
+
+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+This parameter limits the depth of nested backtracking in pcre2_match(). +Each time a nested backtracking point is passed, a new memory frame is used +to remember the state of matching at that point. Thus, this parameter +indirectly limits the amount of memory that is used in a match. However, +because the size of each memory frame depends on the number of capturing +parentheses, the actual memory limit varies from pattern to pattern. This limit +was more useful in versions before 10.30, where function recursion was used for +backtracking. +

+

+The depth limit is not relevant, and is ignored, when matching is done using +JIT compiled code. However, it is supported by pcre2_dfa_match(), which +uses it to limit the depth of nested internal recursive function calls that +implement atomic groups, lookaround assertions, and pattern recursions. This +limits, indirectly, the amount of system stack that is used. It was more useful +in versions before 10.32, when stack memory was used for local workspace +vectors for recursive function calls. From version 10.32, only local variables +are allocated on the stack and as each call uses only a few hundred bytes, even +a small stack can support quite a lot of recursion. +

+

+If the depth of internal recursive function calls is great enough, local +workspace vectors are allocated on the heap from version 10.32 onwards, so the +depth limit also indirectly limits the amount of heap memory that is used. A +recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string +using pcre2_dfa_match(), can use a great deal of memory. However, it is +probably better to limit heap usage directly by calling +pcre2_set_heap_limit(). +

+

+The default value for the depth limit can be set when PCRE2 is built; if it is +not, the default is set to the same value as the default for the match limit. +If the limit is exceeded, pcre2_match() or pcre2_dfa_match() +returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be +supplied by an item at the start of a pattern of the form +

+  (*LIMIT_DEPTH=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or +pcre2_dfa_match() or, if no such limit is set, less than the default. +

+
CHECKING BUILD-TIME OPTIONS
+

+int pcre2_config(uint32_t what, void *where); +

+

+The function pcre2_config() makes it possible for a PCRE2 client to find +the value of certain configuration parameters and to discover which optional +features have been compiled into the PCRE2 library. The +pcre2build +documentation has more details about these features. +

+

+The first argument for pcre2_config() specifies which information is +required. The second argument is a pointer to memory into which the information +is placed. If NULL is passed, the function returns the amount of memory that is +needed for the requested information. For calls that return numerical values, +the value is in bytes; when requesting these values, where should point +to appropriately aligned memory. For calls that return strings, the required +length is given in code units, not counting the terminating zero. +

+

+When requesting information, the returned value from pcre2_config() is +non-negative on success, or the negative error code PCRE2_ERROR_BADOPTION if +the value in the first argument is not recognized. The following information is +available: +

+  PCRE2_CONFIG_BSR
+
+The output is a uint32_t integer whose value indicates what character +sequences the \R escape sequence matches by default. A value of +PCRE2_BSR_UNICODE means that \R matches any Unicode line ending sequence; a +value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The +default can be overridden when a pattern is compiled. +
+  PCRE2_CONFIG_COMPILED_WIDTHS
+
+The output is a uint32_t integer whose lower bits indicate which code unit +widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support, +and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively. +
+  PCRE2_CONFIG_DEPTHLIMIT
+
+The output is a uint32_t integer that gives the default limit for the depth of +nested backtracking in pcre2_match() or the depth of nested recursions, +lookarounds, and atomic groups in pcre2_dfa_match(). Further details are +given with pcre2_set_depth_limit() above. +
+  PCRE2_CONFIG_HEAPLIMIT
+
+The output is a uint32_t integer that gives, in kibibytes, the default limit +for the amount of heap memory used by pcre2_match() or +pcre2_dfa_match(). Further details are given with +pcre2_set_heap_limit() above. +
+  PCRE2_CONFIG_JIT
+
+The output is a uint32_t integer that is set to one if support for just-in-time +compiling is included in the library; otherwise it is set to zero. Note that +having the support in the library does not guarantee that JIT will be used for +any given match. See the +pcre2jit +documentation for more details. +
+  PCRE2_CONFIG_JITTARGET
+
+The where argument should point to a buffer that is at least 48 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) The buffer is filled with a +string that contains the name of the architecture for which the JIT compiler is +configured, for example "x86 32bit (little endian + unaligned)". If JIT support +is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the number of +code units used is returned. This is the length of the string, plus one unit +for the terminating zero. +
+  PCRE2_CONFIG_LINKSIZE
+
+The output is a uint32_t integer that contains the number of bytes used for +internal linkage in compiled regular expressions. When PCRE2 is configured, the +value can be set to 2, 3, or 4, with the default being 2. This is the value +that is returned by pcre2_config(). However, when the 16-bit library is +compiled, a value of 3 is rounded up to 4, and when the 32-bit library is +compiled, internal linkages always use 4 bytes, so the configured value is not +relevant. +

+

+The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all +but the most massive patterns, since it allows the size of the compiled pattern +to be up to 65535 code units. Larger values allow larger regular expressions to +be compiled by those two libraries, but at the expense of slower matching. +

+  PCRE2_CONFIG_MATCHLIMIT
+
+The output is a uint32_t integer that gives the default match limit for +pcre2_match(). Further details are given with +pcre2_set_match_limit() above. +
+  PCRE2_CONFIG_NEWLINE
+
+The output is a uint32_t integer whose value specifies the default character +sequence that is recognized as meaning "newline". The values are: +
+  PCRE2_NEWLINE_CR       Carriage return (CR)
+  PCRE2_NEWLINE_LF       Linefeed (LF)
+  PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
+  PCRE2_NEWLINE_ANY      Any Unicode line ending
+  PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
+  PCRE2_NEWLINE_NUL      The NUL character (binary zero)
+
+The default should normally correspond to the standard sequence for your +operating system. +
+  PCRE2_CONFIG_NEVER_BACKSLASH_C
+
+The output is a uint32_t integer that is set to one if the use of \C was +permanently disabled when PCRE2 was built; otherwise it is set to zero. +
+  PCRE2_CONFIG_PARENSLIMIT
+
+The output is a uint32_t integer that gives the maximum depth of nesting +of parentheses (of any kind) in a pattern. This limit is imposed to cap the +amount of system stack used when a pattern is compiled. It is specified when +PCRE2 is built; the default is 250. This limit does not take into account the +stack that may already be used by the calling application. For finer control +over compilation stack usage, see pcre2_set_compile_recursion_guard(). +
+  PCRE2_CONFIG_STACKRECURSE
+
+This parameter is obsolete and should not be used in new code. The output is a +uint32_t integer that is always set to zero. +
+  PCRE2_CONFIG_TABLES_LENGTH
+
+The output is a uint32_t integer that gives the length of PCRE2's character +processing tables in bytes. For details of these tables see the +section on locale support +below. +
+  PCRE2_CONFIG_UNICODE_VERSION
+
+The where argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) If PCRE2 has been compiled +without Unicode support, the buffer is filled with the text "Unicode not +supported". Otherwise, the Unicode version string (for example, "8.0.0") is +inserted. The number of code units used is returned. This is the length of the +string plus one unit for the terminating zero. +
+  PCRE2_CONFIG_UNICODE
+
+The output is a uint32_t integer that is set to one if Unicode support is +available; otherwise it is set to zero. Unicode support implies UTF support. +
+  PCRE2_CONFIG_VERSION
+
+The where argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) The buffer is filled with +the PCRE2 version string, zero-terminated. The number of code units used is +returned. This is the length of the string plus one unit for the terminating +zero. +

+
COMPILING A PATTERN
+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +
+
+void pcre2_code_free(pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy(const pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +

+

+The pcre2_compile() function compiles a pattern into an internal form. +The pattern is defined by a pointer to a string of code units and a length in +code units. If the pattern is zero-terminated, the length can be specified as +PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated +as an empty string (NULL with a non-zero length causes an error return). The +function returns a pointer to a block of memory that contains the compiled +pattern and related data, or NULL if an error occurred. +

+

+If the compile context argument ccontext is NULL, memory for the compiled +pattern is obtained by calling malloc(). Otherwise, it is obtained from +the same memory function that was used for the compile context. The caller must +free the memory by calling pcre2_code_free() when it is no longer needed. +If pcre2_code_free() is called with a NULL argument, it returns +immediately, without doing anything. +

+

+The function pcre2_code_copy() makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +below), +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to pcre2_jit_compile() if required. If pcre2_code_copy() is +called with a NULL argument, it returns NULL. +

+

+The pcre2_code_copy() function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +"Locale Support" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The pcre2_code_copy_with_tables() provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +pcre2_code_free() is called for the new copy of the compiled code. If +pcre2_code_copy_with_tables() is called with a NULL argument, it returns +NULL. +

+

+NOTE: When one of the matching functions is called, pointers to the compiled +pattern and the subject string are set in the match data block so that they can +be referenced by the substring extraction functions after a successful match. +After running a match, you must not free a compiled pattern or a subject string +until after all operations on the +match data block +have taken place, unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for pcre2_match()" +below. +

+

+The options argument for pcre2_compile() contains various bit +settings that affect the compilation. It should be zero if none of them are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the +pcre2pattern +documentation). +

+

+For those options that can be different in different parts of the pattern, the +contents of the options argument specifies their settings at the start of +compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK +options can be set at the time of matching as well as at compile time. +

+

+Some additional options and less frequently required compile-time parameters +(for example, the newline setting) can be provided in a compile context (as +described +above). +

+

+If errorcode or erroroffset is NULL, pcre2_compile() returns +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when pcre2_compile() returns NULL because a compilation +error has occurred. +

+

+There are nearly 100 positive error codes that pcre2_compile() may return +if it finds an error in the pattern. There are also some negative error codes +that are used for invalid UTF strings when validity checking is in force. These +are the same as given by pcre2_match() and pcre2_dfa_match(), and +are described in the +pcre2unicode +documentation. There is no separate documentation for the positive error codes, +because the textual error messages that are obtained by calling the +pcre2_get_error_message() function (see "Obtaining a textual error +message" +below) +should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined +for both positive and negative error codes in pcre2.h. When compilation +is successful errorcode is set to a value that returns the message "no +error" if passed to pcre2_get_error_message(). +

+

+The value returned in erroroffset is an indication of where in the +pattern an error occurred. When there is no error, zero is returned. A non-zero +value is not necessarily the furthest point in the pattern that was read. For +example, after the error "lookbehind assertion is not fixed length", the error +offset points to the start of the failing assertion. For an invalid UTF-8 or +UTF-16 string, the offset is that of the first code unit of the failing +character. +

+

+Some errors are not detected until the whole pattern has been scanned; in these +cases, the offset passed back is the length of the pattern. Note that the +offset is in code units, not characters, even in a UTF mode. It may sometimes +point into the middle of a UTF-8 or UTF-16 character. +

+

+This code fragment shows a typical straightforward call to +pcre2_compile(): +

+  pcre2_code *re;
+  PCRE2_SIZE erroffset;
+  int errorcode;
+  re = pcre2_compile(
+    "^A.*Z",                /* the pattern */
+    PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */
+    0,                      /* default options */
+    &errorcode,             /* for error code */
+    &erroffset,             /* for error offset */
+    NULL);                  /* no compile context */
+
+
+

+
+Main compile options +
+

+The following names for option bits are defined in the pcre2.h header +file: +

+  PCRE2_ANCHORED
+
+If this bit is set, the pattern is forced to be "anchored", that is, it is +constrained to match only at the first matching point in the string that is +being searched (the "subject string"). This effect can also be achieved by +appropriate constructs in the pattern itself, which is the only way to do it in +Perl. +
+  PCRE2_ALLOW_EMPTY_CLASS
+
+By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match. +
+  PCRE2_ALT_BSUX
+
+This option request alternative handling of three escape sequences, which +makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: +

+

+(1) \U matches an upper case "U" character; by default \U causes a compile +time error (Perl uses \U to upper case subsequent characters). +

+

+(2) \u matches a lower case "u" character unless it is followed by four +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, \u causes a compile time error (Perl uses it to upper +case the following character). +

+

+(3) \x matches a lower case "x" character unless it is followed by two +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, as in Perl, a hexadecimal number is always expected after +\x, but it may have zero, one, or two digits (so, for example, \xz matches a +binary zero character followed by z). +

+

+ECMAscript 6 added additional functionality to \u. This can be accessed using +the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" +below). +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to +pcre2_substitute(). +

+  PCRE2_ALT_CIRCUMFLEX
+
+In multiline mode (when PCRE2_MULTILINE is set), the circumflex metacharacter +matches at the start of the subject (unless PCRE2_NOTBOL is set), and also +after any internal newline. However, it does not match after a newline at the +end of the subject, for compatibility with Perl. If you want a multiline +circumflex also to match after a terminating newline, you must set +PCRE2_ALT_CIRCUMFLEX. +
+  PCRE2_ALT_VERBNAMES
+
+By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED +or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped +whitespace in verb names is skipped and #-comments are recognized, exactly as +in the rest of the pattern. +
+  PCRE2_AUTO_CALLOUT
+
+If this bit is set, pcre2_compile() automatically inserts callout items, +all with number 255, before each pattern item, except immediately before or +after an explicit callout in the pattern. For discussion of the callout +facility, see the +pcre2callout +documentation. +
+  PCRE2_CASELESS
+
+If this bit is set, letters in the pattern match both upper and lower case +letters in the subject. It is equivalent to Perl's /i option, and it can be +changed within a pattern by a (?i) option setting. If either PCRE2_UTF or +PCRE2_UCP is set, Unicode properties are used for all characters with more than +one other case, and for all characters whose code points are greater than +U+007F. Note that there are two ASCII characters, K and S, that, in addition to +their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin +sign) and U+017F (long S) respectively. If you do not want this case +equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT. +

+

+For lower valued characters with only one other case, a lookup table is used +for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used +for all code points less than 256, and higher code points (available only in +16-bit or 32-bit mode) are treated as not having another case. +

+  PCRE2_DOLLAR_ENDONLY
+
+If this bit is set, a dollar metacharacter in the pattern matches only at the +end of the subject string. Without this option, a dollar also matches +immediately before a newline at the end of the string (but not before any other +newlines). The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is +set. There is no equivalent to this option in Perl, and no way to set it within +a pattern. +
+  PCRE2_DOTALL
+
+If this bit is set, a dot metacharacter in the pattern matches any character, +including one that indicates a newline. However, it only ever matches one +character, even if newlines are coded as CRLF. Without this option, a dot does +not match when the current position in the subject is at a newline. This option +is equivalent to Perl's /s option, and it can be changed within a pattern by a +(?s) option setting. A negative class such as [^a] always matches newline +characters, and the \N escape sequence always matches a non-newline character, +independent of the setting of PCRE2_DOTALL. +
+  PCRE2_DUPNAMES
+
+If this bit is set, names used to identify capture groups need not be unique. +This can be helpful for certain types of pattern when it is known that only one +instance of the named group can ever be matched. There are more details of +named capture groups below; see also the +pcre2pattern +documentation. +
+  PCRE2_ENDANCHORED
+
+If this bit is set, the end of any pattern match must be right at the end of +the string being searched (the "subject string"). If the pattern match +succeeds by reaching (*ACCEPT), but does not reach the end of the subject, the +match fails at the current starting point. For unanchored patterns, a new match +is then tried at the next starting point. However, if the match succeeds by +reaching the end of the pattern, but not the end of the subject, backtracking +occurs and an alternative match may be found. Consider these two patterns: +
+  .(*ACCEPT)|..
+  .|..
+
+If matched against "abc" with PCRE2_ENDANCHORED set, the first matches "c" +whereas the second matches "bc". The effect of PCRE2_ENDANCHORED can also be +achieved by appropriate constructs in the pattern itself, which is the only way +to do it in Perl. +

+

+For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only +to the first (that is, the longest) matched string. Other parallel matches, +which are necessarily substrings of the first one, must obviously end before +the end of the subject. +

+  PCRE2_EXTENDED
+
+If this bit is set, most white space characters in the pattern are totally +ignored except when escaped, inside a character class, or inside a \Q...\E +sequence. However, white space is not allowed within sequences such as (?> that +introduce various parenthesized groups, nor within numerical quantifiers such +as {1,3}. Ignorable white space is permitted between an item and a following +quantifier and between a quantifier and a following + that indicates +possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be +changed within a pattern by a (?x) option setting. +

+

+When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as +white space only those characters with code points less than 256 that are +flagged as white space in its low-character table. The table is normally +created by +pcre2_maketables(), +which uses the isspace() function to identify space characters. In most +ASCII environments, the relevant characters are those with code points 0x0009 +(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D +(carriage return), and 0x0020 (space). +

+

+When PCRE2 is compiled with Unicode support, in addition to these characters, +five more Unicode "Pattern White Space" characters are recognized by +PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark), +U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph +separator). This set of characters is the same as recognized by Perl's /x +option. Note that the horizontal and vertical space characters that are matched +by the \h and \v escapes in patterns are a much bigger set. +

+

+As well as ignoring most white space, PCRE2_EXTENDED also causes characters +between an unescaped # outside a character class and the next newline, +inclusive, to be ignored, which makes it possible to include comments inside +complicated patterns. Note that the end of this type of comment is a literal +newline sequence in the pattern; escape sequences that happen to represent a +newline do not count. +

+

+Which characters are interpreted as newlines can be specified by a setting in +the compile context that is passed to pcre2_compile() or by a special +sequence at the start of the pattern, as described in the section entitled +"Newline conventions" +in the pcre2pattern documentation. A default is defined when PCRE2 is +built. +

+  PCRE2_EXTENDED_MORE
+
+This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space +and horizontal tab characters are ignored inside a character class. Note: only +these two characters are ignored, not the full set of pattern white space +characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is +equivalent to Perl's /xx option, and it can be changed within a pattern by a +(?xx) option setting. +
+  PCRE2_FIRSTLINE
+
+If this option is set, the start of an unanchored pattern match must be before +or at the first newline in the subject string following the start of matching, +though the matched text may continue over the newline. If startoffset is +non-zero, the limiting newline is not necessarily the first newline in the +subject. For example, if the subject string is "abc\nxyz" (where \n +represents a single-character newline) a pattern match for "yz" succeeds with +PCRE2_FIRSTLINE if startoffset is greater than 3. See also +PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If +PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first +line and also within the offset limit. In other words, whichever limit comes +first is used. This option has no effect for anchored patterns. +
+  PCRE2_LITERAL
+
+If this option is set, all meta-characters in the pattern are disabled, and it +is treated as a literal string. Matching literal strings with a regular +expression engine is not the most efficient way of doing it. If you are doing a +lot of literal matching and are worried about efficiency, you should consider +using other approaches. The only other main options that are allowed with +PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, +PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_MATCH_INVALID_UTF, +PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and +PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and +PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error. +
+  PCRE2_MATCH_INVALID_UTF
+
+This option forces PCRE2_UTF (see below) and also enables support for matching +by pcre2_match() in subject strings that contain invalid UTF sequences. +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. This facility is not supported for DFA matching. For details, +see the +pcre2unicode +documentation. +
+  PCRE2_MATCH_UNSET_BACKREF
+
+If this option is set, a backreference to an unset capture group matches an +empty string (by default this causes the current matching alternative to fail). +A pattern such as (\1)(a) succeeds when this option is set (assuming it can +find an "a" in the subject), whereas it fails by default, for Perl +compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka +JavaScript). +
+  PCRE2_MULTILINE
+
+By default, for the purposes of matching "start of line" and "end of line", +PCRE2 treats the subject string as consisting of a single line of characters, +even if it actually contains newlines. The "start of line" metacharacter (^) +matches only at the start of the string, and the "end of line" metacharacter +($) matches only at the end of the string, or before a terminating newline +(except when PCRE2_DOLLAR_ENDONLY is set). Note, however, that unless +PCRE2_DOTALL is set, the "any character" metacharacter (.) does not match at a +newline. This behaviour (for ^, $, and dot) is the same as Perl. +

+

+When PCRE2_MULTILINE it is set, the "start of line" and "end of line" +constructs match immediately following or immediately before internal newlines +in the subject string, respectively, as well as at the very start and end. This +is equivalent to Perl's /m option, and it can be changed within a pattern by a +(?m) option setting. Note that the "start of line" metacharacter does not match +after a newline at the end of the subject, for compatibility with Perl. +However, you can change this by setting the PCRE2_ALT_CIRCUMFLEX option. If +there are no newlines in a subject string, or no occurrences of ^ or $ in a +pattern, setting PCRE2_MULTILINE has no effect. +

+  PCRE2_NEVER_BACKSLASH_C
+
+This option locks out the use of \C in the pattern that is being compiled. +This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because +it may leave the current matching point in the middle of a multi-code-unit +character. This option may be useful in applications that process patterns from +external sources. Note that there is also a build-time option that permanently +locks out the use of \C. +
+  PCRE2_NEVER_UCP
+
+This option locks out the use of Unicode properties for handling \B, \b, \D, +\d, \S, \s, \W, \w, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +option may be useful in applications that process patterns from external +sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. +
+  PCRE2_NEVER_UTF
+
+This option locks out interpretation of the pattern as UTF-8, UTF-16, or +UTF-32, depending on which library is in use. In particular, it prevents the +creator of the pattern from switching to UTF interpretation by starting the +pattern with (*UTF). This option may be useful in applications that process +patterns from external sources. The combination of PCRE2_UTF and +PCRE2_NEVER_UTF causes an error. +
+  PCRE2_NO_AUTO_CAPTURE
+
+If this option is set, it disables the use of numbered capturing parentheses in +the pattern. Any opening parenthesis that is not followed by ? behaves as if it +were followed by ?: but named parentheses can still be used for capturing (and +they acquire numbers in the usual way). This is the same as Perl's /n option. +Note that, when this option is set, references to capture groups +(backreferences or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number. +
+  PCRE2_NO_AUTO_POSSESS
+
+If this option is set, it disables "auto-possessification", which is an +optimization that, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +set this option if you want the matching functions to do a full unoptimized +search and run all the callouts, but it is mainly provided for testing +purposes. +
+  PCRE2_NO_DOTSTAR_ANCHOR
+
+If this option is set, it disables an optimization that is applied when .* is +the first significant item in a top-level branch of a pattern, and all the +other branches also start with .* or with \A or \G or ^. The optimization is +automatically disabled for .* if it is inside an atomic group or a capture +group that is the subject of a backreference, or if the pattern contains +(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is +automatically anchored if PCRE2_DOTALL is set for all the .* items and +PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match +must start either at the start of the subject or following a newline is +remembered. Like other optimizations, this can cause callouts to be skipped. +
+  PCRE2_NO_START_OPTIMIZE
+
+This is an option whose main effect is at matching time. It does not change +what pcre2_compile() generates, but it does affect the output of the JIT +compiler. +

+

+There are a number of optimizations that may occur at the start of a match, in +order to speed up the process. For example, if it is known that an unanchored +match must start with a specific code unit value, the matching code searches +the subject for that value, and fails immediately if it cannot find it, without +actually running the main matching function. This means that a special item +such as (*COMMIT) at the start of a pattern is not considered until after a +suitable starting point for the match has been found. Also, when callouts or +(*MARK) items are in use, these "start-up" optimizations can cause them to be +skipped if the pattern is never actually used. The start-up optimizations are +in effect a pre-scan of the subject that takes place before the pattern is run. +

+

+The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, +possibly causing performance to suffer, but ensuring that in cases where the +result is "no match", the callouts do occur, and that items such as (*COMMIT) +and (*MARK) are considered at every possible starting position in the subject +string. +

+

+Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation. +Consider the pattern +

+  (*COMMIT)ABC
+
+When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the +subject string does not happen. The first match attempt is run starting from +"D" and when this fails, (*COMMIT) prevents any further matches being tried, so +the overall result is "no match". +

+

+As another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +

+  (*MARK:1)B(*MARK:2)(X|Y)
+
+The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried +at every possible starting position, including at the end of the subject, where +(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is +returned is "1". In this case, the optimizations do not affect the overall +match result, which is still "no match", but they do affect the auxiliary +information that is returned. +
+  PCRE2_NO_UTF_CHECK
+
+When PCRE2_UTF is set, the validity of the pattern as a UTF string is +automatically checked. There are discussions about the validity of +UTF-8 strings, +UTF-16 strings, +and +UTF-32 strings +in the +pcre2unicode +document. If an invalid UTF sequence is found, pcre2_compile() returns a +negative error code. +

+

+If you know that your pattern is a valid UTF string, and you want to skip this +check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When +it is set, the effect of passing an invalid UTF string as a pattern is +undefined. It may cause your program to crash or loop. +

+

+Note that this option can also be passed to pcre2_match() and +pcre2_dfa_match(), to suppress UTF validity checking of the subject +string. +

+

+Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the +error that is given if an escape sequence for an invalid Unicode code point is +encountered in the pattern. In particular, the so-called "surrogate" code +points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences +such as \x{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option, as described in the section entitled "Extra compile options" +below. +However, this is possible only in UTF-8 and UTF-32 modes, because these values +are not representable in UTF-16. +

+  PCRE2_UCP
+
+This option has two effects. Firstly, it change the way PCRE2 processes \B, +\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes. By +default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode +properties are used to classify characters. There are some PCRE2_EXTRA +options (see below) that add finer control to this behaviour. More details are +given in the section on +generic character types +in the +pcre2pattern +page. +

+

+The second effect of PCRE2_UCP is to force the use of Unicode properties for +upper/lower casing operations, even when PCRE2_UTF is not set. This makes it +possible to process strings in the 16-bit UCS-2 code. This option is available +only if PCRE2 has been compiled with Unicode support (which is the default). +The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless +matching such that ASCII characters match only ASCII characters and non-ASCII +characters match only non-ASCII characters. +

+  PCRE2_UNGREEDY
+
+This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. +
+  PCRE2_USE_OFFSET_LIMIT
+
+This option must be set for pcre2_compile() if +pcre2_set_offset_limit() is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of pcre2_set_offset_limit() in the +section +that describes match contexts. See also the PCRE2_FIRSTLINE +option above. +
+  PCRE2_UTF
+
+This option causes PCRE2 to regard both the pattern and the subject strings +that are subsequently processed as strings of UTF characters instead of +single-code-unit strings. It is available when PCRE2 is built to include +Unicode support (which is the default). If Unicode support is not available, +the use of this option provokes an error. Details of how PCRE2_UTF changes the +behaviour of PCRE2 are given in the +pcre2unicode +page. In particular, note that it changes the way PCRE2_CASELESS works. +

+
+Extra compile options +
+

+The option bits that can be set in a compile context by calling the +pcre2_set_compile_extra_options() function are as follows: +

+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+
+Since release 10.38 PCRE2 has forbidden the use of \K within lookaround +assertions, following Perl's lead. This option is provided to re-enable the +previous behaviour (act in positive lookarounds, ignore in negative ones) in +case anybody is relying on it. +
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+
+This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is +forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate" +code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode +code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot +therefore be represented in UTF-16. They can be represented in UTF-8 and +UTF-32, but are defined as invalid code points, and cause errors if encountered +in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. +

+

+These values also cause errors if encountered in escape sequences such as +\x{d912} within a pattern. However, it seems that some applications, when +using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test +for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does +not disable the error that occurs, because it applies only to the testing of +input strings for UTF validity. +

+

+If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code +point values in UTF-8 and UTF-32 patterns no longer provoke errors and are +incorporated in the compiled pattern. However, they can only match subject +characters if the matching function is called with PCRE2_NO_UTF_CHECK set. +

+  PCRE2_EXTRA_ALT_BSUX
+
+The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal +character code, where hhh.. is any number of hexadecimal digits. +
+  PCRE2_EXTRA_ASCII_BSD
+
+This option forces \d to match only ASCII digits, even when PCRE2_UCP is set. +It can be changed within a pattern by means of the (?aD) option setting. +
+  PCRE2_EXTRA_ASCII_BSS
+
+This option forces \s to match only ASCII space characters, even when +PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS) +option setting. +
+  PCRE2_EXTRA_ASCII_BSW
+
+This option forces \w to match only ASCII word characters, even when PCRE2_UCP +is set. It can be changed within a pattern by means of the (?aW) option +setting. +
+  PCRE2_EXTRA_ASCII_DIGIT
+
+This option forces the POSIX character classes [:digit:] and [:xdigit:] to +match only ASCII digits, even when PCRE2_UCP is set. It can be changed within +a pattern by means of the (?aT) option setting. +
+  PCRE2_EXTRA_ASCII_POSIX
+
+This option forces all the POSIX character classes, including [:digit:] and +[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can +be changed within a pattern by means of the (?aP) option setting, but note that +this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets +all ASCII restrictions for POSIX classes. +
+  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+
+This is a dangerous option. Use with care. By default, an unrecognized escape +such as \j or a malformed one such as \x{2z} causes a compile-time error when +detected by pcre2_compile(). Perl is somewhat inconsistent in handling +such items: for example, \j is treated as a literal "j", and non-hexadecimal +digits in \x{} are just ignored, though warnings are given in both cases if +Perl's warning switch is enabled. However, a malformed octal number after \o{ +always causes an error in Perl. +

+

+If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to +pcre2_compile(), all unrecognized or malformed escape sequences are +treated as single-character escapes. For example, \j is a literal "j" and +\x{2z} is treated as the literal string "x{2z}". Setting this option means +that typos in patterns may go undetected and have unexpected results. Also note +that a sequence such as [\N{] is interpreted as a malformed attempt at +[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an +unqualified \N is a valid escape sequence but is not supported in a character +class. To reiterate: this is a dangerous option. Use with great care. +

+  PCRE2_EXTRA_CASELESS_RESTRICT
+
+When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode +rules, which allow for more than two cases per character. There are two +case-equivalent character sets that contain both ASCII and non-ASCII +characters. The ASCII letter S is case-equivalent to U+017f (long S) and the +ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables +recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a +caseless match, both characters must either be ASCII or non-ASCII. The option +can be changed with a pattern by the (?r) option setting. +
+  PCRE2_EXTRA_ESCAPED_CR_IS_LF
+
+There are some legacy applications where the escape sequence \r in a pattern +is expected to match a newline. If this option is set, \r in a pattern is +converted to \n so that it matches a LF (linefeed) instead of a CR (carriage +return) character. The option does not affect a literal CR in the pattern, nor +does it affect CR specified as an explicit code point such as \x{0D}. +
+  PCRE2_EXTRA_MATCH_LINE
+
+This option is provided for use by the -x option of pcre2grep. It +causes the pattern only to match complete lines. This is achieved by +automatically inserting the code for "^(?:" at the start of the compiled +pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched +line may be in the middle of the subject string. This option can be used with +PCRE2_LITERAL. +
+  PCRE2_EXTRA_MATCH_WORD
+
+This option is provided for use by the -w option of pcre2grep. It +causes the pattern only to match strings that have a word boundary at the start +and the end. This is achieved by automatically inserting the code for "\b(?:" +at the start of the compiled pattern and ")\b" at the end. The option may be +used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is +also set. +

+
JUST-IN-TIME (JIT) COMPILATION
+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +
+
+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +
+
+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +
+
+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +
+
+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+

+These functions provide support for JIT compilation, which, if the just-in-time +compiler is available, further processes a compiled pattern into machine code +that executes much faster than the pcre2_match() interpretive matching +function. Full details are given in the +pcre2jit +documentation. +

+

+JIT compilation is a heavyweight optimization. It can take some time for +patterns to be analyzed, and for one-off matches and simple patterns the +benefit of faster execution might be offset by a much slower compilation time. +Most (but not all) patterns can be optimized by the JIT compiler. +

+
LOCALE SUPPORT
+

+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +
+
+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+

+PCRE2 handles caseless matching, and determines whether characters are letters, +digits, or whatever, by reference to a set of tables, indexed by character code +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \w or +\d. +

+

+When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \p and \P, or, alternatively, the +PCRE2_UCP option can be set when a pattern is compiled; this causes \w and +friends to use Unicode property support instead of the built-in tables. +PCRE2_UCP also causes upper/lower casing operations on characters with code +points greater than 127 to use Unicode properties. These effects apply even +when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see +above) that can be used to modify or suppress them. +

+

+The use of locales with Unicode is discouraged. If you are handling characters +with code points greater than 127, you should either use Unicode support, or +use locales, but not try to mix the two. +

+

+PCRE2 contains a built-in set of character tables that are used by default. +These are sufficient for many applications. Normally, the internal tables +recognize only ASCII characters. However, when PCRE2 is built, it is possible +to cause the internal tables to be rebuilt in the default "C" locale of the +local system, which may cause them to be different. +

+

+The built-in tables can be overridden by tables supplied by the application +that calls PCRE2. These may be created in a different locale from the default. +As more and more applications change to using Unicode, the need for this locale +support is expected to die away. +

+

+External tables are built by calling the pcre2_maketables() function, in +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system malloc() is used. The result can be passed to +pcre2_compile() as often as necessary, by creating a compile context and +calling pcre2_set_character_tables() to set the tables pointer therein. +

+

+For example, to build and use tables that are appropriate for the French locale +(where accented characters with values greater than 127 are treated as +letters), the following code could be used: +

+  setlocale(LC_CTYPE, "fr_FR");
+  tables = pcre2_maketables(NULL);
+  ccontext = pcre2_compile_context_create(NULL);
+  pcre2_set_character_tables(ccontext, tables);
+  re = pcre2_compile(..., ccontext);
+
+The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +

+

+The pointer that is passed (via the compile context) to pcre2_compile() +is saved with the compiled pattern, and the same tables are used by the +matching functions. Thus, for any single pattern, compilation and matching both +happen in the same locale, but different patterns can be processed in different +locales. +

+

+It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using pcre2_maketables_free(), which should +pass as its first parameter the same global context that was used to create the +tables. +

+
+Saving locale tables +
+

+The tables described above are just a sequence of binary bytes, which makes +them independent of hardware characteristics such as endianness or whether the +processor is 32-bit or 64-bit. A copy of the result of pcre2_maketables() +can therefore be saved in a file or elsewhere and re-used later, even in a +different program or on another computer. The size of the tables (number of +bytes) must be obtained by calling pcre2_config() with the +PCRE2_CONFIG_TABLES_LENGTH option because pcre2_maketables() does not +return this value. Note that the pcre2_dftables program, which is part of +the PCRE2 build system, can be used stand-alone to create a file that contains +a set of binary tables. See the +pcre2build +documentation for details. +

+
INFORMATION ABOUT A COMPILED PATTERN
+

+int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where); +

+

+The pcre2_pattern_info() function returns general information about a +compiled pattern. For information about callouts, see the +next section. +The first argument for pcre2_pattern_info() is a pointer to the compiled +pattern. The second argument specifies which piece of information is required, +and the third argument is a pointer to a variable to receive the data. If the +third argument is NULL, the first argument is ignored, and the function returns +the size in bytes of the variable that is required for the information +requested. Otherwise, the yield of the function is zero for success, or one of +the following negative numbers: +

+  PCRE2_ERROR_NULL           the argument code was NULL
+  PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+  PCRE2_ERROR_BADOPTION      the value of what was invalid
+  PCRE2_ERROR_UNSET          the requested field is not set
+
+The "magic number" is placed at the start of each compiled pattern as a simple +check against passing an arbitrary memory pointer. Here is a typical call of +pcre2_pattern_info(), to obtain the length of the compiled pattern: +
+  int rc;
+  size_t length;
+  rc = pcre2_pattern_info(
+    re,               /* result of pcre2_compile() */
+    PCRE2_INFO_SIZE,  /* what is required */
+    &length);         /* where to put the data */
+
+The possible values for the second argument are defined in pcre2.h, and +are as follows: +
+  PCRE2_INFO_ALLOPTIONS
+  PCRE2_INFO_ARGOPTIONS
+  PCRE2_INFO_EXTRAOPTIONS
+
+Return copies of the pattern's options. The third argument should point to a +uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that +were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOPTIONS returns +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the +extra options that were set in the compile context by calling the +pcre2_set_compile_extra_options() function. +

+

+For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.) +

+

+A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if +the first significant item in every top-level branch is one of the following: +

+  ^     unless PCRE2_MULTILINE is set
+  \A    always
+  \G    always
+  .*    sometimes - see below
+
+When .* is the first significant item, anchoring is possible only when all the +following are true: +
+  .* is not in an atomic group
+  .* is not in a capture group that is the subject of a backreference
+  PCRE2_DOTALL is in force for .*
+  Neither (*PRUNE) nor (*SKIP) appears in the pattern
+  PCRE2_NO_DOTSTAR_ANCHOR is not set
+
+For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the +options returned for PCRE2_INFO_ALLOPTIONS. +
+  PCRE2_INFO_BACKREFMAX
+
+Return the number of the highest backreference in the pattern. The third +argument should point to a uint32_t variable. Named capture groups +acquire numbers as well as names, and these count towards the highest +backreference. Backreferences such as \4 or \g{12} match the captured +characters of the given group, but in addition, the check that a capture +group is set in a conditional group such as (?(3)a|b) is also a backreference. +Zero is returned if there are no backreferences. +
+  PCRE2_INFO_BSR
+
+The output is a uint32_t integer whose value indicates what character sequences +the \R escape sequence matches. A value of PCRE2_BSR_UNICODE means that \R +matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means +that \R matches only CR, LF, or CRLF. +
+  PCRE2_INFO_CAPTURECOUNT
+
+Return the highest capture group number in the pattern. In patterns where (?| +is not used, this is also the total number of capture groups. The third +argument should point to a uint32_t variable. +
+  PCRE2_INFO_DEPTHLIMIT
+
+If the pattern set a backtracking depth limit by including an item of the form +(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_FIRSTBITMAP
+
+In the absence of a single first code unit for a non-anchored pattern, +pcre2_compile() may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to a +const uint8_t * variable. +
+  PCRE2_INFO_FIRSTCODETYPE
+
+Return information about the first code unit of any matched string, for a +non-anchored pattern. The third argument should point to a uint32_t +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved +using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is +known that a match can occur only at the start of the subject or following a +newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0 +is returned. +
+  PCRE2_INFO_FIRSTCODEUNIT
+
+Return the value of the first code unit of any matched string for a pattern +where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third +argument should point to a uint32_t variable. In the 8-bit library, the +value is always less than 256. In the 16-bit library the value can be up to +0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, +and up to 0xffffffff when not using UTF-32 mode. +
+  PCRE2_INFO_FRAMESIZE
+
+Return the size (in bytes) of the data frames that are used to remember +backtracking positions when the pattern is processed by pcre2_match() +without the use of JIT. The third argument should point to a size_t +variable. The frame size depends on the number of capturing parentheses in the +pattern. Each additional capture group adds two PCRE2_SIZE variables. +
+  PCRE2_INFO_HASBACKSLASHC
+
+Return 1 if the pattern contains any instances of \C, otherwise 0. The third +argument should point to a uint32_t variable. +
+  PCRE2_INFO_HASCRORLF
+
+Return 1 if the pattern contains any explicit matches for CR or LF characters, +otherwise 0. The third argument should point to a uint32_t variable. An +explicit match is either a literal CR or LF character, or \r or \n or one of +the equivalent hexadecimal or octal escape sequences. +
+  PCRE2_INFO_HEAPLIMIT
+
+If the pattern set a heap memory limit by including an item of the form +(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_JCHANGED
+
+Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise +0. The third argument should point to a uint32_t variable. (?J) and +(?-J) set and unset the local PCRE2_DUPNAMES option, respectively. +
+  PCRE2_INFO_JITSIZE
+
+If the compiled pattern was successfully processed by +pcre2_jit_compile(), return the size of the JIT compiled code, otherwise +return zero. The third argument should point to a size_t variable. +
+  PCRE2_INFO_LASTCODETYPE
+
+Returns 1 if there is a rightmost literal code unit that must exist in any +matched string, other than at its start. The third argument should point to a +uint32_t variable. If there is no such value, 0 is returned. When 1 is +returned, the code unit value itself can be retrieved using +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0. +
+  PCRE2_INFO_LASTCODEUNIT
+
+Return the value of the rightmost literal code unit that must exist in any +matched string, other than at its start, for a pattern where +PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument +should point to a uint32_t variable. +
+  PCRE2_INFO_MATCHEMPTY
+
+Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to a uint32_t variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases. +
+  PCRE2_INFO_MATCHLIMIT
+
+If the pattern set a match limit by including an item of the form +(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_MAXLOOKBEHIND
+
+A lookbehind assertion moves back a certain number of characters (not code +units) when it starts to process each of its branches. This request returns the +largest of these backward moves. The third argument should point to a uint32_t +integer. The simple assertions \b and \B require a one-character lookbehind +and cause PCRE2_INFO_MAXLOOKBEHIND to return 1 in the absence of anything +longer. \A also registers a one-character lookbehind, though it does not +actually inspect the previous character. +

+

+Note that this information is useful for multi-segment matching only +if the pattern contains no nested lookbehinds. For example, the pattern +(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the +first lookbehind moves back by two characters, matches one character, then the +nested lookbehind also moves back by two characters. This puts the matching +point three characters earlier than it was at the start. +PCRE2_INFO_MAXLOOKBEHIND is really only useful as a debugging tool. See the +pcre2partial +documentation for a discussion of multi-segment matching. +

+  PCRE2_INFO_MINLENGTH
+
+If a minimum length for matching subject strings was computed, its value is +returned. Otherwise the returned value is 0. This value is not computed when +PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in +UTF mode may be different from the number of code units. The third argument +should point to a uint32_t variable. The value is a lower bound to the +length of any matching string. There may not be any strings of that length that +do actually match, but every string that does match is at least that long. +
+  PCRE2_INFO_NAMECOUNT
+  PCRE2_INFO_NAMEENTRYSIZE
+  PCRE2_INFO_NAMETABLE
+
+PCRE2 supports the use of named as well as numbered capturing parentheses. The +names are just an additional way of identifying the parentheses, which still +acquire numbers. Several convenience functions such as +pcre2_substring_get_byname() are provided for extracting captured +substrings by name. It is also possible to extract the data directly, by first +converting the name to a number in order to access the correct pointers in the +output vector (described with pcre2_match() below). To do the conversion, +you need to use the name-to-number map, which is described by these three +values. +

+

+The map consists of a number of fixed-size entries. PCRE2_INFO_NAMECOUNT gives +the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives the size of each +entry in code units; both of these return a uint32_t value. The entry +size depends on the length of the longest name. +

+

+PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. This is +a PCRE2_SPTR pointer to a block of code units. In the 8-bit library, the first +two bytes of each entry are the number of the capturing parenthesis, most +significant byte first. In the 16-bit library, the pointer points to 16-bit +code units, the first of which contains the parenthesis number. In the 32-bit +library, the pointer points to 32-bit code units, the first of which contains +the parenthesis number. The rest of the entry is the corresponding name, zero +terminated. +

+

+The names are in alphabetical order. If (?| is used to create multiple capture +groups with the same number, as described in the +section on duplicate group numbers +in the +pcre2pattern +page, the groups may be given the same name, but there is only one entry in the +table. Different names for groups of the same number are not permitted. +

+

+Duplicate names for capture groups with different numbers are permitted, but +only if PCRE2_DUPNAMES is set. They appear in the table in the order in which +they were found in the pattern. In the absence of (?| this is the order of +increasing number; when (?| is used this is not necessarily the case because +later capture groups may have lower numbers. +

+

+As a simple example of the name/number table, consider the following pattern +after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white +space - including newlines - is ignored): +

+  (?<date> (?<year>(\d\d)?\d\d) - (?<month>\d\d) - (?<day>\d\d) )
+
+There are four named capture groups, so the table has four entries, and each +entry in the table is eight bytes long. The table is as follows, with +non-printing bytes shows in hexadecimal, and undefined bytes shown as ??: +
+  00 01 d  a  t  e  00 ??
+  00 05 d  a  y  00 ?? ??
+  00 04 m  o  n  t  h  00
+  00 02 y  e  a  r  00 ??
+
+When writing code to extract data from named capture groups using the +name-to-number map, remember that the length of the entries is likely to be +different for each compiled pattern. +
+  PCRE2_INFO_NEWLINE
+
+The output is one of the following uint32_t values: +
+  PCRE2_NEWLINE_CR       Carriage return (CR)
+  PCRE2_NEWLINE_LF       Linefeed (LF)
+  PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
+  PCRE2_NEWLINE_ANY      Any Unicode line ending
+  PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
+  PCRE2_NEWLINE_NUL      The NUL character (binary zero)
+
+This identifies the character sequence that will be recognized as meaning +"newline" while matching. +
+  PCRE2_INFO_SIZE
+
+Return the size of the compiled pattern in bytes (for all three libraries). The +third argument should point to a size_t variable. This value includes the +size of the general data block that precedes the code units of the compiled +pattern itself. The value that is used when pcre2_compile() is getting +memory in which to place the compiled pattern may be slightly larger than the +value returned by this option, because there are cases where the code that +calculates the size has to over-estimate. Processing a pattern with the JIT +compiler does not alter the value returned by this option. +

+
INFORMATION ABOUT A PATTERN'S CALLOUTS
+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling pcre2_callout_enumerate(). The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +user_data value that was passed to pcre2_callout_enumerate(). The +contents of the callout enumeration block are described in the +pcre2callout +documentation, which also gives further details about callouts. +

+
SERIALIZATION AND PRECOMPILING
+

+It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. The host on which the patterns are +reloaded must be running the same version of PCRE2, with the same code unit +width, and must also have the same endianness, pointer width, and PCRE2_SIZE +type. Before compiled patterns can be saved, they must be converted to a +"serialized" form, which in the case of PCRE2 is really just a bytecode dump. +The functions whose names begin with pcre2_serialize_ are used for +converting to and from the serialized form. They are described in the +pcre2serialize +documentation. Note that PCRE2 serialization does not convert compiled patterns +to an abstract format like Java or .NET serialization. +

+
THE MATCH DATA BLOCK
+

+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +
+
+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +
+
+void pcre2_match_data_free(pcre2_match_data *match_data); +

+

+Information about a successful or unsuccessful match is placed in a match +data block, which is an opaque structure that is accessed by function calls. In +particular, the match data block contains a vector of offsets into the subject +string that define the matched parts of the subject. This is known as the +ovector. +

+

+Before calling pcre2_match(), pcre2_dfa_match(), or +pcre2_jit_match() you must create a match data block by calling one of +the creation functions above. For pcre2_match_data_create(), the first +argument is the number of pairs of offsets in the ovector. +

+

+When using pcre2_match(), one pair of offsets is required to identify the +string that matched the whole pattern, with an additional pair for each +captured substring. For example, a value of 4 creates enough space to record +the matched portion of the subject plus three captured substrings. +

+

+When using pcre2_dfa_match() there may be multiple matched substrings of +different lengths at the same point in the subject. The ovector should be made +large enough to hold as many as are expected. +

+

+A minimum of at least 1 pair is imposed by pcre2_match_data_create(), so +it is always possible to return the overall matched string in the case of +pcre2_match() or the longest match in the case of +pcre2_dfa_match(). The maximum number of pairs is 65535; if the first +argument of pcre2_match_data_create() is greater than this, 65535 is +used. +

+

+The second argument of pcre2_match_data_create() is a pointer to a +general context, which can specify custom memory management for obtaining the +memory for the match data block. If you are not using custom memory management, +pass NULL, which causes malloc() to be used. +

+

+For pcre2_match_data_create_from_pattern(), the first argument is a +pointer to a compiled pattern. The ovector is created to be exactly the right +size to hold all the substrings a pattern might capture when matched using +pcre2_match(). You should not use this call when matching with +pcre2_dfa_match(). The second argument is again a pointer to a general +context, but in this case if NULL is passed, the memory is obtained using the +same allocator that was used for the compiled pattern (custom or default). +

+

+A match data block can be used many times, with the same or different compiled +patterns. You can extract information from a match data block after a match +operation has finished, using functions that are described in the sections on +matched strings +and +other match data +below. +

+

+When a call of pcre2_match() fails, valid data is available in the match +block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ERROR_PARTIAL, or one +of the error codes for an invalid UTF string. Exactly what is available depends +on the error, and is detailed below. +

+

+When one of the matching functions is called, pointers to the compiled pattern +and the subject string are set in the match data block so that they can be +referenced by the extraction functions after a successful match. After running +a match, you must not free a compiled pattern or a subject string until after +all operations on the match data block (for that match) have taken place, +unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for pcre2_match()" +below. +

+

+When a match data block itself is no longer needed, it should be freed by +calling pcre2_match_data_free(). If this function is called with a NULL +argument, it returns immediately, without doing anything. +

+
MEMORY USE FOR MATCH DATA BLOCKS
+

+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +

+

+The size of a match data block depends on the size of the ovector that it +contains. The function pcre2_get_match_data_size() returns the size, in +bytes, of the block that is its argument. +

+

+When pcre2_match() runs interpretively (that is, without using JIT), it +makes use of a vector of data frames for remembering backtracking positions. +The size of each individual frame depends on the number of capturing +parentheses in the pattern and can be obtained by calling +pcre2_pattern_info() with the PCRE2_INFO_FRAMESIZE option (see the +section entitled "Information about a compiled pattern" +above). +

+

+Heap memory is used for the frames vector; if the initial memory block turns +out to be too small during matching, it is automatically expanded. When +pcre2_match() returns, the memory is not freed, but remains attached to +the match data block, for use by any subsequent matches that use the same +block. It is automatically freed when the match data block itself is freed. +

+

+You can find the current size of the frames vector that a match data block owns +by calling pcre2_get_match_data_heapframes_size(). For a newly created +match data block the size will be zero. Some types of match may require a lot +of frames and thus a large vector; applications that run in environments where +memory is constrained can check this and free the match data block if the heap +frames vector has become too big. +

+
MATCHING A PATTERN: THE TRADITIONAL FUNCTION
+

+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+

+The function pcre2_match() is called to match a subject string against a +compiled pattern, which is passed in the code argument. You can call +pcre2_match() with the same code argument as many times as you +like, in order to find multiple matches in the subject string or to match +different subject strings with the same pattern. +

+

+This function is the main matching facility of the library, and it operates in +a Perl-like manner. For specialist use there is also an alternative matching +function, which is described +below +in the section about the pcre2_dfa_match() function. +

+

+Here is an example of a simple call to pcre2_match(): +

+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    md,             /* the match data block */
+    NULL);          /* a match context; NULL means use defaults */
+
+If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +matching parameters are to be changed. For details, see the section on +the match context +above. +

+
+The string to be matched by pcre2_match() +
+

+The subject string is passed to pcre2_match() as a pointer in +subject, a length in length, and a starting offset in +startoffset. The length and offset are in code units, not characters. +That is, they are in bytes for the 8-bit library, 16-bit code units for the +16-bit library, and 32-bit code units for the 32-bit library, whether or not +UTF processing is enabled. As a special case, if subject is NULL and +length is zero, the subject is assumed to be an empty string. If +length is non-zero, an error occurs if subject is NULL. +

+

+If startoffset is greater than the length of the subject, +pcre2_match() returns PCRE2_ERROR_BADOFFSET. When the starting offset is +zero, the search for a match starts at the beginning of the subject, and this +is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset +must point to the start of a character, or to the end of the subject (in UTF-32 +mode, one code unit equals one character, so all offsets are valid). Like the +pattern string, the subject may contain binary zeros. +

+

+A non-zero starting offset is useful when searching for another match in the +same subject by calling pcre2_match() again after a previous success. +Setting startoffset differs from passing over a shortened string and +setting PCRE2_NOTBOL in the case of a pattern that begins with any kind of +lookbehind. For example, consider the pattern +

+  \Biss\B
+
+which finds occurrences of "iss" in the middle of words. (\B matches only if +the current position in the subject is not a word boundary.) When applied to +the string "Mississippi" the first call to pcre2_match() finds the first +occurrence. If pcre2_match() is called again with just the remainder of +the subject, namely "issippi", it does not match, because \B is always false +at the start of the subject, which is deemed to be a word boundary. However, if +pcre2_match() is passed the entire string again, but with +startoffset set to 4, it finds the second occurrence of "iss" because it +is able to look behind the starting point to discover that it is preceded by a +letter. +

+

+Finding all the matches in a subject is tricky when the pattern can match an +empty string. It is possible to emulate Perl's /g behaviour by first trying the +match again at the same offset, with the PCRE2_NOTEMPTY_ATSTART and +PCRE2_ANCHORED options, and then if that fails, advancing the starting offset +and trying an ordinary match again. There is some code that demonstrates how to +do this in the +pcre2demo +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +

+

+If a non-zero starting offset is passed when the pattern is anchored, a single +attempt to match at the given offset is made. This can only succeed if the +pattern does not require the match to be at the start of the subject. In other +words, the anchoring must be the result of setting the PCRE2_ANCHORED option or +the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \A. +

+
+Option bits for pcre2_match() +
+

+The unused bits of the options argument for pcre2_match() must be +zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. +Their action is described below. +

+

+Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by +the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the +interpretive code in pcre2_match() is run. +PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT +(obviously), the remaining options are supported for JIT matching. +

+  PCRE2_ANCHORED
+
+The PCRE2_ANCHORED option limits pcre2_match() to matching at the first +matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out +to be anchored by virtue of its contents, it cannot be made unachored at +matching time. Note that setting the option at match time disables JIT +matching. +
+  PCRE2_COPY_MATCHED_SUBJECT
+
+By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring +extraction functions. This means that the subject's memory must not be freed +until all such operations are complete. For some applications where the +lifetime of the subject string is not guaranteed, it may be necessary to make a +copy of the subject string, but it is wasteful to do this unless the match is +successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the +subject is copied and the new pointer is remembered in the match data block +instead of the original subject pointer. The memory allocator that was used for +the match block itself is used. The copy is automatically freed when +pcre2_match_data_free() is called to free the match data block. It is also +automatically freed if the match data block is re-used for another match +operation. +
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+
+This option is relevant only to pcre2_match() for interpretive matching. +It is ignored when JIT is used, and is forbidden for pcre2_dfa_match(). +

+

+The use of recursion in patterns can lead to infinite loops. In the +interpretive matcher these would be eventually caught by the match or heap +limits, but this could take a long time and/or use a lot of memory if the +limits are large. There is therefore a check at the start of each recursion. +If the same group is still active from a previous call, and the current subject +pointer is the same as it was at the start of that group, and the furthest +inspected character of the subject has not changed, an error is generated. +

+

+There are rare cases of matches that would complete, but nevertheless trigger +this error. This option disables the check. It is provided mainly for testing +when comparing JIT and interpretive behaviour. +

+  PCRE2_ENDANCHORED
+
+If the PCRE2_ENDANCHORED option is set, any string that pcre2_match() +matches must be right at the end of the subject string. Note that setting the +option at match time disables JIT matching. +
+  PCRE2_NOTBOL
+
+This option specifies that first character of the subject string is not the +beginning of a line, so the circumflex metacharacter should not match before +it. Setting this without having set PCRE2_MULTILINE at compile time causes +circumflex never to match. This option affects only the behaviour of the +circumflex metacharacter. It does not affect \A. +
+  PCRE2_NOTEOL
+
+This option specifies that the end of the subject string is not the end of a +line, so the dollar metacharacter should not match it nor (except in multiline +mode) a newline immediately before it. Setting this without having set +PCRE2_MULTILINE at compile time causes dollar never to match. This option +affects only the behaviour of the dollar metacharacter. It does not affect \Z +or \z. +
+  PCRE2_NOTEMPTY
+
+An empty string is not considered to be a valid match if this option is set. If +there are alternatives in the pattern, they are tried. If all the alternatives +match the empty string, the entire match fails. For example, if the pattern +
+  a?b?
+
+is applied to a string not beginning with "a" or "b", it matches an empty +string at the start of the subject. With PCRE2_NOTEMPTY set, this match is not +valid, so pcre2_match() searches further into the string for occurrences +of "a" or "b". +
+  PCRE2_NOTEMPTY_ATSTART
+
+This is like PCRE2_NOTEMPTY, except that it locks out an empty string match +only at the first matching position, that is, at the start of the subject plus +the starting offset. An empty string match later in the subject is permitted. +If the pattern is anchored, such a match can occur only if the pattern contains +\K. +
+  PCRE2_NO_JIT
+
+By default, if a pattern has been successfully processed by +pcre2_jit_compile(), JIT is automatically used when pcre2_match() +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter. +
+  PCRE2_NO_UTF_CHECK
+
+When PCRE2_UTF is set at compile time, the validity of the subject as a UTF +string is checked unless PCRE2_NO_UTF_CHECK is passed to pcre2_match() or +PCRE2_MATCH_INVALID_UTF was passed to pcre2_compile(). The latter special +case is discussed in detail in the +pcre2unicode +documentation. +

+

+In the default case, if a non-zero starting offset is given, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \b and \B are +one-character lookbehinds. +

+

+The check is carried out before any other processing takes place, and a +negative error code is returned if the check fails. There are several UTF error +codes for each code unit width, corresponding to different problems with the +code unit sequence. There are discussions about the validity of +UTF-8 strings, +UTF-16 strings, +and +UTF-32 strings +in the +pcre2unicode +documentation. +

+

+If you know that your subject is valid, and you want to skip this check for +performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling +pcre2_match(). You might want to do this for the second and subsequent +calls to pcre2_match() if you are making repeated calls to find multiple +matches in the same subject string. +

+

+Warning: Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when +PCRE2_NO_UTF_CHECK is set at match time the effect of passing an invalid +string as a subject, or an invalid value of startoffset, is undefined. +Your program may crash or loop indefinitely or give wrong results. +

+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+
+These options turn on the partial matching feature. A partial match occurs if +the end of the subject string is reached successfully, but there are not enough +subject characters to complete the match. In addition, either at least one +character must have been inspected or the pattern must contain a lookbehind, or +the pattern must be one that could match an empty string. +

+

+If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) +is set, matching continues by testing any remaining alternatives. Only if no +complete match can be found is PCRE2_ERROR_PARTIAL returned instead of +PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that the +caller is prepared to handle a partial match, but only if no complete match can +be found. +

+

+If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this case, if +a partial match is found, pcre2_match() immediately returns +PCRE2_ERROR_PARTIAL, without considering any other alternatives. In other +words, when PCRE2_PARTIAL_HARD is set, a partial match is considered to be more +important that an alternative complete match. +

+

+There is a more detailed discussion of partial and multi-segment matching, with +examples, in the +pcre2partial +documentation. +

+
NEWLINE HANDLING WHEN MATCHING
+

+When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +a +compile context +by calling pcre2_set_newline(). It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +section on newline conventions +in the +pcre2pattern +page. During matching, the newline choice affects the behaviour of the dot, +circumflex, and dollar metacharacters. It may also alter the way the match +starting position is advanced after a match failure for an unanchored pattern. +

+

+When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is set as +the newline convention, and a match attempt for an unanchored pattern fails +when the current starting position is at a CRLF sequence, and the pattern +contains no explicit matches for CR or LF characters, the match position is +advanced by two characters instead of one, in other words, to after the CRLF. +

+

+The above rule is a compromise that makes the most common cases work as +expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is +not set), it does not match the string "\r\nA" because, after failing at the +start, it skips both the CR and the LF before retrying. However, the pattern +[\r\n]A does match that string, because it contains an explicit CR or LF +reference, and so advances only by one character after the first failure. +

+

+An explicit match for CR of LF is either a literal appearance of one of those +characters in the pattern, or one of the \r or \n or equivalent octal or +hexadecimal escape sequences. Implicit matches such as [^X] do not count, nor +does \s, even though it includes CR and LF in the characters that it matches. +

+

+Notwithstanding the above, anomalous effects may still occur when CRLF is a +valid newline sequence and explicit \r or \n escapes appear in the pattern. +

+
HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
+

+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +
+
+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +

+

+In general, a pattern matches a certain portion of the subject, and in +addition, further substrings from the subject may be picked out by +parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's +book, this is called "capturing" in what follows, and the phrase "capture +group" (Perl terminology) is used for a fragment of a pattern that picks out a +substring. PCRE2 supports several other kinds of parenthesized group that do +not cause substrings to be captured. The pcre2_pattern_info() function +can be used to find out how many capture groups there are in a compiled +pattern. +

+

+You can use auxiliary functions for accessing captured substrings +by number +or +by name, +as described in sections below. +

+

+Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the ovector, which contains the offsets of captured strings. It is +part of the +match data block. +The function pcre2_get_ovector_pointer() returns the address of the +ovector, and pcre2_get_ovector_count() returns the number of pairs of +values it contains. +

+

+Within the ovector, the first in each pair of values is set to the offset of +the first code unit of a substring, and the second is set to the offset of the +first code unit after the end of a substring. These values are always code unit +offsets, not character offsets. That is, they are byte offsets in the 8-bit +library, 16-bit offsets in the 16-bit library, and 32-bit offsets in the 32-bit +library. +

+

+After a partial match (error return PCRE2_ERROR_PARTIAL), only the first pair +of offsets (that is, ovector[0] and ovector[1]) are set. They +identify the part of the subject that was partially matched. See the +pcre2partial +documentation for details of partial matching. +

+

+After a fully successful match, the first pair of offsets identifies the +portion of the subject string that was matched by the entire pattern. The next +pair is used for the first captured substring, and so on. The value returned by +pcre2_match() is one more than the highest numbered pair that has been +set. For example, if two substrings have been captured, the returned value is +3. If there are no captured substrings, the return value from a successful +match is 1, indicating that just the first pair of offsets has been set. +

+

+If a pattern uses the \K escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\K) is matched against "ab", the start and +end offset values for the match are 2 and 0. +

+

+If a capture group is matched repeatedly within a single match operation, it is +the last portion of the subject that it matched that is returned. +

+

+If the ovector is too small to hold all the captured substring offsets, as much +as possible is filled in, and the function returns a value of zero. If captured +substrings are not of interest, pcre2_match() may be called with a match +data block whose ovector is of minimum length (that is, one pair). +

+

+It is possible for capture group number n+1 to match some part of the +subject when group n has not been used at all. For example, if the string +"abc" is matched against the pattern (a|(z))(bc) the return from the function +is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both +values in the offset pairs corresponding to unused groups are set to +PCRE2_UNSET. +

+

+Offset values that correspond to unused groups at the end of the expression are +also set to PCRE2_UNSET. For example, if the string "abc" is matched against +the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the +function is 2, because the highest used capture group number is 1. The offsets +for the second and third capture groups (assuming the vector is large enough, +of course) are set to PCRE2_UNSET. +

+

+Elements in the ovector that do not correspond to capturing parentheses in the +pattern are never changed. That is, if a pattern contains n capturing +parentheses, no more than ovector[0] to ovector[2n+1] are set by +pcre2_match(). The other elements retain whatever values they previously +had. After a failed match attempt, the contents of the ovector are unchanged. +

+
OTHER INFORMATION ABOUT A MATCH
+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+

+As well as the offsets in the ovector, other information about a match is +retained in the match data block and can be retrieved by the above functions in +appropriate circumstances. If they are called at other times, the result is +undefined. +

+

+After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure +to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function +pcre2_get_mark() can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just +(*MARK). The same function applies to all the verbs. It returns a pointer to +the zero-terminated name, which is within the compiled pattern. If no name is +available, NULL is returned. The length of the name (excluding the terminating +zero) is stored in the code unit that precedes the name. You should use this +length instead of relying on the terminating zero if the name might contain a +binary zero. +

+

+After a successful match, the name that is returned is the last mark name +encountered on the matching path through the pattern. Instances of backtracking +verbs without names do not count. Thus, for example, if the matching path +contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a +partial match, the last encountered name is returned. For example, consider +this pattern: +

+  ^(*MARK:A)((*MARK:B)a|b)c
+
+When it matches "bc", the returned name is A. The B mark is "seen" in the first +branch of the group, but it is not on the matching path. On the other hand, +when this pattern fails to match "bx", the returned name is B. +

+

+Warning: By default, certain start-of-match optimizations are used to +give a fast "no match" result in some situations. For example, if the anchoring +is removed from the pattern above, there is an initial check for the presence +of "c" in the subject before running the matching engine. This check fails for +"bx", causing a match failure without seeing any marks. You can disable the +start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for +pcre2_compile() or by starting the pattern with (*NO_START_OPT). +

+

+After a successful match, a partial match, or one of the invalid UTF errors +(for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can be +called. After a successful or partial match it returns the code unit offset of +the character at which the match started. For a non-partial match, this can be +different to the value of ovector[0] if the pattern contains the \K +escape sequence. After a partial match, however, this value is always the same +as ovector[0] because \K does not affect the result of a partial match. +

+

+After a UTF check failure, pcre2_get_startchar() can be used to obtain +the code unit offset of the invalid UTF character. Details are given in the +pcre2unicode +page. +

+
ERROR RETURNS FROM pcre2_match()
+

+If pcre2_match() fails, it returns a negative number. This can be +converted to a text string by calling the pcre2_get_error_message() +function (see "Obtaining a textual error message" +below). +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the +pcre2unicode +page. The following are the other errors that may be returned by +pcre2_match(): +

+  PCRE2_ERROR_NOMATCH
+
+The subject string did not match the pattern. +
+  PCRE2_ERROR_PARTIAL
+
+The subject string did not match, but it did match partially. See the +pcre2partial +documentation for details of partial matching. +
+  PCRE2_ERROR_BADMAGIC
+
+PCRE2 stores a 4-byte "magic number" at the start of the compiled code, to +catch the case when it is passed a junk pointer. This is the error that is +returned when the magic number is not present. +
+  PCRE2_ERROR_BADMODE
+
+This error is given when a compiled pattern is passed to a function in a +library of a different code unit width, for example, a pattern compiled by +the 8-bit library is passed to a 16-bit or 32-bit library function. +
+  PCRE2_ERROR_BADOFFSET
+
+The value of startoffset was greater than the length of the subject. +
+  PCRE2_ERROR_BADOPTION
+
+An unrecognized bit was set in the options argument. +
+  PCRE2_ERROR_BADUTFOFFSET
+
+The UTF code unit sequence that was passed as a subject was checked and found +to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the value of +startoffset did not point to the beginning of a UTF character or the end +of the subject. +
+  PCRE2_ERROR_CALLOUT
+
+This error is never generated by pcre2_match() itself. It is provided for +use by callout functions that want to cause pcre2_match() or +pcre2_callout_enumerate() to return a distinctive error code. See the +pcre2callout +documentation for details. +
+  PCRE2_ERROR_DEPTHLIMIT
+
+The nested backtracking depth limit was reached. +
+  PCRE2_ERROR_HEAPLIMIT
+
+The heap limit was reached. +
+  PCRE2_ERROR_INTERNAL
+
+An unexpected internal error has occurred. This error could be caused by a bug +in PCRE2 or by overwriting of the compiled pattern. +
+  PCRE2_ERROR_JIT_STACKLIMIT
+
+This error is returned when a pattern that was successfully studied using JIT +is being matched, but the memory available for the just-in-time processing +stack is not large enough. See the +pcre2jit +documentation for more details. +
+  PCRE2_ERROR_MATCHLIMIT
+
+The backtracking match limit was reached. +
+  PCRE2_ERROR_NOMEMORY
+
+Heap memory is used to remember backtracking points. This error is given when +the memory allocation function (default or custom) fails. Note that a different +error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds +the heap limit. PCRE2_ERROR_NOMEMORY is also returned if +PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. +
+  PCRE2_ERROR_NULL
+
+Either the code, subject, or match_data argument was passed +as NULL. +
+  PCRE2_ERROR_RECURSELOOP
+
+This error is returned when pcre2_match() detects a recursion loop within +the pattern. Specifically, it means that either the whole pattern or a +capture group has been called recursively for the second time at the same +position in the subject string. Some simple patterns that might do this are +detected and faulted at compile time, but more complicated cases, in particular +mutual recursions between two different groups, cannot be detected until +matching is attempted. +

+
OBTAINING A TEXTUAL ERROR MESSAGE
+

+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +

+

+A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling pcre2_get_error_message(). The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length in code units, into which the text message is +placed. The message is returned in code units of the appropriate width for the +library that is being used. +

+

+The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample. +

+
EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
+

+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +
+
+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+

+Captured substrings can be accessed directly by using the ovector as described +above. +For convenience, auxiliary functions are provided for extracting captured +substrings as new, separate, zero-terminated strings. A substring that contains +a binary zero is correctly extracted and has a further zero added on the end, +but the result is not, of course, a C string. +

+

+The functions in this section identify substrings by number. The number zero +refers to the entire matched substring, with higher numbers referring to +substrings captured by parenthesized groups. After a partial match, only +substring zero is available. An attempt to extract any other substring gives +the error PCRE2_ERROR_PARTIAL. The next section describes similar functions for +extracting captured substrings by name. +

+

+If a pattern uses the \K escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\K) is matched against "ab", the start and +end offset values for the match are 2 and 0. In this situation, calling these +functions with a zero substring number extracts a zero-length empty string. +

+

+You can find the length in code units of a captured substring without +extracting it by calling pcre2_substring_length_bynumber(). The first +argument is a pointer to the match data block, the second is the group number, +and the third is a pointer to a variable into which the length is placed. If +you just want to know whether or not the substring has been captured, you can +pass the third argument as NULL. +

+

+The pcre2_substring_copy_bynumber() function copies a captured substring +into a supplied buffer, whereas pcre2_substring_get_bynumber() copies it +into new memory, obtained using the same memory allocation function that was +used for the match data block. The first two arguments of these functions are a +pointer to the match data block and a capture group number. +

+

+The final arguments of pcre2_substring_copy_bynumber() are a pointer to +the buffer and a pointer to a variable that contains its length in code units. +This is updated to contain the actual number of code units used for the +extracted substring, excluding the terminating zero. +

+

+For pcre2_substring_get_bynumber() the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by +calling pcre2_substring_free(). +

+

+The return value from all these functions is zero for success, or a negative +error code. If the pattern match failed, the match failure code is returned. +If a substring number greater than zero is used after a partial match, +PCRE2_ERROR_PARTIAL is returned. Other possible error codes are: +

+  PCRE2_ERROR_NOMEMORY
+
+The buffer was too small for pcre2_substring_copy_bynumber(), or the +attempt to get memory failed for pcre2_substring_get_bynumber(). +
+  PCRE2_ERROR_NOSUBSTRING
+
+There is no substring with that number in the pattern, that is, the number is +greater than the number of capturing parentheses. +
+  PCRE2_ERROR_UNAVAILABLE
+
+The substring number, though not greater than the number of captures in the +pattern, is greater than the number of slots in the ovector, so the substring +could not be captured. +
+  PCRE2_ERROR_UNSET
+
+The substring did not participate in the match. For example, if the pattern is +(abc)|(def) and the subject is "def", and the ovector contains at least two +capturing slots, substring number 1 is unset. +

+
EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
+

+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +
+
+void pcre2_substring_list_free(PCRE2_UCHAR **list); +

+

+The pcre2_substring_list_get() function extracts all available substrings +and builds a list of pointers to them. It also (optionally) builds a second +list that contains their lengths (in code units), excluding a terminating zero +that is added to each of them. All this is done in a single block of memory +that is obtained using the same memory allocation function that was used to get +the match data block. +

+

+This function must be called only after a successful match. If called after a +partial match, the error code PCRE2_ERROR_PARTIAL is returned. +

+

+The address of the memory block is returned via listptr, which is also +the start of the list of string pointers. The end of the list is marked by a +NULL pointer. The address of the list of lengths is returned via +lengthsptr. If your strings do not contain binary zeros and you do not +therefore need the lengths, you may supply NULL as the lengthsptr +argument to disable the creation of a list of lengths. The yield of the +function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block +could not be obtained. When the list is no longer needed, it should be freed by +calling pcre2_substring_list_free(). +

+

+If this function encounters a substring that is unset, which can happen when +capture group number n+1 matches some part of the subject, but group +n has not been used at all, it returns an empty string. This can be +distinguished from a genuine zero-length substring by inspecting the +appropriate offset in the ovector, which contain PCRE2_UNSET for unset +substrings, or by calling pcre2_substring_length_bynumber(). +

+
EXTRACTING CAPTURED SUBSTRINGS BY NAME
+

+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +
+
+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +
+
+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+

+To extract a substring by name, you first have to find associated number. +For example, for this pattern: +

+  (a+)b(?<xxx>\d+)...
+
+the number of the capture group called "xxx" is 2. If the name is known to be +unique (PCRE2_DUPNAMES was not set), you can find the number from the name by +calling pcre2_substring_number_from_name(). The first argument is the +compiled pattern, and the second is the name. The yield of the function is the +group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or +PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name. +Given the number, you can extract the substring directly from the ovector, or +use one of the "bynumber" functions described above. +

+

+For convenience, there are also "byname" functions that correspond to the +"bynumber" functions, the only difference being that the second argument is a +name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate +names, these functions scan all the groups with the given name, and return the +captured substring from the first named group that is set. +

+

+If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is +returned. If all groups with the name have numbers that are greater than the +number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there +is at least one group with a slot in the ovector, but no group is found to be +set, PCRE2_ERROR_UNSET is returned. +

+

+Warning: If the pattern uses the (?| feature to set up multiple +capture groups with the same number, as described in the +section on duplicate group numbers +in the +pcre2pattern +page, you cannot use names to distinguish the different capture groups, because +names are not included in the compiled code. The matching process uses only +numbers. For this reason, the use of different names for groups with the +same number causes an error at compile time. +

+
CREATING A NEW STRING WITH SUBSTITUTIONS
+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+

+This function optionally calls pcre2_match() and then makes a copy of the +subject string in outputbuffer, replacing parts that were matched with +the replacement string, whose length is supplied in rlength, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if replacement is NULL and rlength is zero, the +replacement is assumed to be an empty string. If rlength is non-zero, an +error occurs if replacement is NULL. +

+

+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). +

+

+If successful, pcre2_substitute() returns the number of substitutions +that were carried out. This may be zero if no match was found, and is never +greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is +returned if an error is detected. +

+

+Matches in which a \K item in a lookahead in the pattern causes the match to +end before it starts are not supported, and give rise to an error return. For +global replacements, matches in which \K in a lookbehind causes the match to +start earlier than the point that was reached in the previous iteration are +also not supported. +

+

+The first seven arguments of pcre2_substitute() are the same as for +pcre2_match(), except that the partial matching options are not +permitted, and match_data may be passed as NULL, in which case a match +data block is obtained and freed within this function, using memory management +functions from the match context, if provided, or else those that were used to +allocate memory for the compiled code. +

+

+If match_data is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the +provided block is used for all calls to pcre2_match(), and its contents +afterwards are the result of the final call. For global changes, this will +always be a no-match error. The contents of the ovector within the match data +block may or may not have been changed. +

+

+As well as the usual options for pcre2_match(), a number of additional +options can be set in the options argument of pcre2_substitute(). +One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external +match_data block must be provided, and it must have already been used for +an external call to pcre2_match() with the same pattern and subject +arguments. The data in the match_data block (return code, offset vector) +is then used for the first substitution instead of calling pcre2_match() +from within pcre2_substitute(). This allows an application to check for a +match before choosing to substitute, without having to repeat the match. +

+

+The contents of the externally supplied match data block are not changed when +PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, +pcre2_match() is called after the first substitution to check for further +matches, but this is done using an internally obtained match data block, thus +always leaving the external block unchanged. +

+

+The code argument is not used for matching before the first substitution +when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when +PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the +UTF setting and the number of capturing parentheses in the pattern. +

+

+The default action of pcre2_substitute() is to return a copy of the +subject string with matched substrings replaced. However, if +PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are +returned. In the global case, multiple replacements are concatenated in the +output buffer. Substitution callouts (see +below) +can be used to separate them if necessary. +

+

+The outlengthptr argument of pcre2_substitute() must point to a +variable that contains the length, in code units, of the output buffer. If the +function is successful, the value is updated to contain the length in code +units of the new string, excluding the trailing zero that is automatically +added. +

+

+If the function is not successful, the value set via outlengthptr depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. +

+

+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, pcre2_substitute() continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed. This value is passed +back via the outlengthptr variable, with the result of the function still +being PCRE2_ERROR_NOMEMORY. +

+

+Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +

+

+The replacement string, which is interpreted as a UTF string in UTF mode, is +checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF +replacement string causes an immediate return with the relevant UTF error code. +

+

+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted +in any way. By default, however, a dollar character is an escape character that +can specify the insertion of characters from capture groups and names from +(*MARK) or other control verbs in the pattern. Dollar is the only escape +character (backslash is treated as literal). The following forms are always +recognized: +

+  $$                  insert a dollar character
+  $<n> or ${<n>}      insert the contents of group <n>
+  $*MARK or ${*MARK}  insert a control verb name
+
+Either a group number or a group name can be given for <n>. Curly brackets are +required only if the following character would be interpreted as part of the +number or name. The number may be zero to include the entire matched string. +For example, if the pattern a(b)c is matched with "=abc=" and the replacement +string "+$1$0$1+", the result is "=+babcb+=". +

+

+$*MARK inserts the name from the last encountered backtracking control verb on +the matching path that has a name. (*MARK) must always include a name, but the +other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name +inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This +facility can be used to perform simple simultaneous substitutions, as this +pcre2test example shows: +

+  /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
+      apple lemon
+   2: pear orange
+
+PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this option is not set, only the first +matching substring is replaced. The search for matches takes place in the +original subject string (that is, previous replacements do not affect it). +Iteration is implemented by advancing the startoffset value for each +search, which is always passed the entire subject string. If an offset limit is +set in the match context, searching stops when that limit is reached. +

+

+You can restrict the effect of a global substitution to a portion of the +subject string by setting either or both of startoffset and an offset +limit. Here is a pcre2test example: +

+  /B/g,replace=!,use_offset_limit
+  ABC ABC ABC ABC\=offset=3,offset_limit=12
+   2: ABC A!C A!C ABC
+
+When continuing with global substitutions after matching a substring with zero +length, an attempt to find a non-empty match at the same offset is performed. +If this is not successful, the offset is advanced by one character except when +CRLF is a valid newline sequence and the next two characters are CR, LF. In +this case, the offset is advanced by two characters. +

+

+PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +

+

+PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +

+

+PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +

+

+Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \n or \x{ddd} can be used to specify +particular character codes, and backslash followed by any non-alphanumeric +character quotes that character. Extended quoting can be coded using \Q...\E, +exactly as in pattern strings. +

+

+There are also four escape sequences for forcing the case of inserted letters. +The insertion mechanism has three states: no case forcing, force upper case, +and force lower case. The escape sequences change the current state: \U and +\L change to upper or lower case forcing, respectively, and \E (when not +terminating a \Q quoted sequence) reverts to no case forcing. The sequences +\u and \l force the next character (if it is a letter) to upper or lower +case, respectively, and then the state automatically reverts to no case +forcing. Case forcing applies to all inserted characters, including those from +capture groups and letters within \Q...\E quoted sequences. If either +PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +properties are used for case forcing characters whose code points are greater +than 127. +

+

+Note that case forcing sequences such as \U...\E do not nest. For example, +the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +not apply to replacement strings. +

+

+The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to capture group substitution. The syntax is similar to that used +by Bash: +

+  ${<n>:-<string>}
+  ${<n>:+<string1>:<string2>}
+
+As before, <n> may be a group number or a name. The first form specifies a +default value. If group <n> is set, its value is inserted; if not, <string> is +expanded and the result inserted. The second form specifies strings that are +expanded and inserted when group <n> is set or unset, respectively. The first +form is just a convenient shorthand for +
+  ${<n>:+${<n>}:<string>}
+
+Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this pcre2test example: +
+  /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
+      body
+   1: hello
+      somebody
+   1: HELLO
+
+The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +

+

+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, +PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and +are ignored. +

+
+Substitution errors +
+

+In the event of an error, pcre2_substitute() returns a negative error +code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from +pcre2_match() are passed straight back. +

+

+PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +

+

+PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +

+

+PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via outlengthptr. Note that this does not happen by +default. +

+

+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the +match_data argument is NULL or if the subject or replacement +arguments are NULL. For backward compatibility reasons an exception is made for +the replacement argument if the rlength argument is also 0. +

+

+PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket +not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before +it started or the match started earlier than the current position in the +subject, which can happen if \K is used in an assertion). +

+

+As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the pcre2_get_error_message() function (see +"Obtaining a textual error message" +above). +

+
+Substitution callouts +
+

+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+The pcre2_set_substitution_callout() function can be used to specify a +callout function for pcre2_substitute(). This information is passed in +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. The callout +function is not called for simulated substitutions that happen as a result of +the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. +

+

+The first argument of the callout function is a pointer to a substitute callout +block structure, which contains the following fields, not necessarily in this +order: +

+  uint32_t    version;
+  uint32_t    subscount;
+  PCRE2_SPTR  input;
+  PCRE2_SPTR  output;
+  PCRE2_SIZE *ovector;
+  uint32_t    oveccount;
+  PCRE2_SIZE  output_offsets[2];
+
+The version field contains the version number of the block format. The +current version is 0. The version number will increase in future if more fields +are added, but the intention is never to remove any of the existing fields. +

+

+The subscount field is the number of the current match. It is 1 for the +first callout, 2 for the second, and so on. The input and output +pointers are copies of the values passed to pcre2_substitute(). +

+

+The ovector field points to the ovector, which contains the result of the +most recent match. The oveccount field contains the number of pairs that +are set in the ovector, and is always greater than zero. +

+

+The output_offsets vector contains the offsets of the replacement in the +output string. This has already been processed for dollar and (if requested) +backslash substitutions as described above. +

+

+The second argument of the callout function is the value passed as +callout_data when the function was registered. The value returned by the +callout function is interpreted as follows: +

+

+If the value is zero, the replacement is accepted, and, if +PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next +match. If the value is not zero, the current replacement is not accepted. If +the value is greater than zero, processing continues when +PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or +PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the +output and the call to pcre2_substitute() exits, returning the number of +matches so far. +

+
DUPLICATE CAPTURE GROUP NAMES
+

+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +

+

+When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture +groups are not required to be unique. Duplicate names are always allowed for +groups with the same number, created by using the (?| feature. Indeed, if such +groups are named, they are required to use the same names. +

+

+Normally, patterns that use duplicate names are such that in any one match, +only one of each set of identically-named groups participates. An example is +shown in the +pcre2pattern +documentation. +

+

+When duplicates are present, pcre2_substring_copy_byname() and +pcre2_substring_get_byname() return the first substring corresponding to +the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is +returned. The pcre2_substring_number_from_name() function returns the +error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. +

+

+If you want to get full details of all captured substrings for a given name, +you must use the pcre2_substring_nametable_scan() function. The first +argument is the compiled pattern, and the second is the name. If the third and +fourth arguments are NULL, the function returns a group number for a unique +name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. +

+

+When the third and fourth arguments are not NULL, they must be pointers to +variables that are updated by the function. After it has run, they point to the +first and last entries in the name-to-number table for the given name, and the +function returns the length of each entry in code units. In both cases, +PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. +

+

+The format of the name table is described +above +in the section entitled Information about a pattern. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data. +

+
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
+

+The traditional matching function uses a similar algorithm to Perl, which stops +when it finds the first match at a given point in the subject. If you want to +find all possible matches, or the longest possible match at a given position, +consider using the alternative matching function (see below) instead. If you +cannot use the alternative function, you can kludge it up by making use of the +callout facility, which is described in the +pcre2callout +documentation. +

+

+What you have to do is to insert a callout right at the end of the pattern. +When your callout function is called, extract and save the current matched +substring. Then return 1, which forces pcre2_match() to backtrack and try +other alternatives. Ultimately, when it runs out of matches, +pcre2_match() will yield PCRE2_ERROR_NOMATCH. +

+
MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
+

+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +

+

+The function pcre2_dfa_match() is called to match a subject string +against a compiled pattern, using a matching algorithm that scans the subject +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that pcre2_dfa_match() does +not support, see the +pcre2matching +documentation. +

+

+The arguments for the pcre2_dfa_match() function are the same as for +pcre2_match(), plus two extras. The ovector within the match data block +is used in a different way, and this is described below. The other common +arguments are used in the same way as for pcre2_match(), so their +description is not repeated here. +

+

+The two additional arguments provide workspace for the function. The workspace +vector should contain at least 20 elements. It is used for keeping track of +multiple paths through the pattern tree. More workspace is needed for patterns +and subjects where there are a lot of potential matches. +

+

+Here is an example of a simple call to pcre2_dfa_match(): +

+  int wspace[20];
+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_dfa_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    md,             /* the match data block */
+    NULL,           /* a match context; NULL means use defaults */
+    wspace,         /* working space vector */
+    20);            /* number of elements (NOT size in bytes) */
+
+

+
+Option bits for pcre2_dfa_match() +
+

+The unused bits of the options argument for pcre2_dfa_match() must +be zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, +PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, +PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last +four of these are exactly the same as for pcre2_match(), so their +description is not repeated here. +

+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+
+These have the same general effect as they do for pcre2_match(), but the +details are slightly different. When PCRE2_PARTIAL_HARD is set for +pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the +subject is reached and there is still at least one matching possibility that +requires additional characters. This happens even if some complete matches have +already been found. When PCRE2_PARTIAL_SOFT is set, the return code +PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL if the end of the +subject is reached, there have been no complete matches, but there is still at +least one matching possibility. The portion of the string that was inspected +when the longest partial match was found is set as the first matching string in +both cases. There is a more detailed discussion of partial and multi-segment +matching, with examples, in the +pcre2partial +documentation. +
+  PCRE2_DFA_SHORTEST
+
+Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as +soon as it has found one match. Because of the way the alternative algorithm +works, this is necessarily the shortest possible match at the first possible +matching point in the subject string. +
+  PCRE2_DFA_RESTART
+
+When pcre2_dfa_match() returns a partial match, it is possible to call it +again, with additional subject characters, and have it continue with the same +match. The PCRE2_DFA_RESTART option requests this action; when it is set, the +workspace and wscount options must reference the same vector as +before because data about the match so far is left in them after a partial +match. There is more discussion of this facility in the +pcre2partial +documentation. +

+
+Successful returns from pcre2_dfa_match() +
+

+When pcre2_dfa_match() succeeds, it may have matched more than one +substring in the subject. Note, however, that all the matches from one run of +the function start at the same point in the subject. The shorter matches are +all initial substrings of the longer matches. For example, if the pattern +

+  <.*>
+
+is matched against the string +
+  This is <something> <something else> <something further> no more
+
+the three matched strings are +
+  <something> <something else> <something further>
+  <something> <something else>
+  <something>
+
+On success, the yield of the function is a number greater than zero, which is +the number of matched substrings. The offsets of the substrings are returned in +the ovector, and can be extracted by number in the same way as for +pcre2_match(), but the numbers bear no relation to any capture groups +that may exist in the pattern, because DFA matching does not support capturing. +

+

+Calls to the convenience functions that extract substrings by name +return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a +DFA match. The convenience functions that extract substrings by number never +return PCRE2_ERROR_NOSUBSTRING. +

+

+The matched strings are stored in the ovector in reverse order of length; that +is, the longest matching string is first. If there were too many matches to fit +into the ovector, the yield of the function is zero, and the vector is filled +with the longest matches. +

+

+NOTE: PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\d+" is compiled as if it were "a\d++". For DFA matching, this +means that only one possible match is found. If you really do want multiple +matches in such cases, either use an ungreedy repeat such as "a\d+?" or set +the PCRE2_NO_AUTO_POSSESS option when compiling. +

+
+Error returns from pcre2_dfa_match() +
+

+The pcre2_dfa_match() function returns a negative number when it fails. +Many of the errors are the same as for pcre2_match(), as described +above. +There are in addition the following errors that are specific to +pcre2_dfa_match(): +

+  PCRE2_ERROR_DFA_UITEM
+
+This return is given if pcre2_dfa_match() encounters an item in the +pattern that it does not support, for instance, the use of \C in a UTF mode or +a backreference. +
+  PCRE2_ERROR_DFA_UCOND
+
+This return is given if pcre2_dfa_match() encounters a condition item +that uses a backreference for the condition, or a test for recursion in a +specific capture group. These are not supported. +
+  PCRE2_ERROR_DFA_UINVALID_UTF
+
+This return is given if pcre2_dfa_match() is called for a pattern that +was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for DFA +matching. +
+  PCRE2_ERROR_DFA_WSSIZE
+
+This return is given if pcre2_dfa_match() runs out of space in the +workspace vector. +
+  PCRE2_ERROR_DFA_RECURSE
+
+When a recursion or subroutine call is processed, the matching function calls +itself recursively, using private memory for the ovector and workspace. +This error is given if the internal ovector is not large enough. This should be +extremely rare, as a vector of size 1000 is used. +
+  PCRE2_ERROR_DFA_BADRESTART
+
+When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, +some plausibility checks are made on the contents of the workspace, which +should contain data about the previous partial match. If any of these checks +fail, this error is given. +

+
SEE ALSO
+

+pcre2build(3), pcre2callout(3), pcre2demo(3), +pcre2matching(3), pcre2partial(3), pcre2posix(3), +pcre2sample(3), pcre2unicode(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 24 April 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html new file mode 100644 index 0000000..d4b0d33 --- /dev/null +++ b/doc/html/pcre2build.html @@ -0,0 +1,652 @@ + + +pcre2build specification + + +

pcre2build man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
BUILDING PCRE2
+

+PCRE2 is distributed with a configure script that can be used to build +the library in Unix-like environments using the applications known as +Autotools. Also in the distribution are files to support building using +CMake instead of configure. The text file +README +contains general information about building with Autotools (some of which is +repeated below), and also has some comments about building on various operating +systems. The files in the vms directory support building under OpenVMS. +There is a lot more information about building PCRE2 without using +Autotools (including information about using CMake and building "by +hand") in the text file called +NON-AUTOTOOLS-BUILD. +You should consult this file as well as the +README +file if you are building in a non-Unix-like environment. +

+
PCRE2 BUILD-TIME OPTIONS
+

+The rest of this document describes the optional features of PCRE2 that can be +selected when the library is compiled. It assumes use of the configure +script, where the optional features are selected or deselected by providing +options to configure before running the make command. However, the +same options can be selected in both Unix-like and non-Unix-like environments +if you are using CMake instead of configure to build PCRE2. +

+

+If you are not using Autotools or CMake, option selection can be done by +editing the config.h file, or by passing parameter settings to the +compiler, as described in +NON-AUTOTOOLS-BUILD. +

+

+The complete list of options for configure (which includes the standard +ones such as the selection of the installation directory) can be obtained by +running +

+  ./configure --help
+
+The following sections include descriptions of "on/off" options whose names +begin with --enable or --disable. Because of the way that configure +works, --enable and --disable always come in pairs, so the complementary option +always exists as well, but as it specifies the default, it is not described. +Options that specify values have names that start with --with. At the end of a +configure run, a summary of the configuration is output. +

+
BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES
+

+By default, a library called libpcre2-8 is built, containing functions +that take string arguments contained in arrays of bytes, interpreted either as +single-byte characters, or UTF-8 strings. You can also build two other +libraries, called libpcre2-16 and libpcre2-32, which process +strings that are contained in arrays of 16-bit and 32-bit code units, +respectively. These can be interpreted either as single-unit characters or +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +the following to the configure command: +

+  --enable-pcre2-16
+  --enable-pcre2-32
+
+If you do not want the 8-bit library, add +
+  --disable-pcre2-8
+
+as well. At least one of the three libraries must be built. Note that the POSIX +wrapper is for the 8-bit library only, and that pcre2grep is an 8-bit +program. Neither of these are built if you select only the 16-bit or 32-bit +libraries. +

+
BUILDING SHARED AND STATIC LIBRARIES
+

+The Autotools PCRE2 building process uses libtool to build both shared +and static libraries by default. You can suppress an unwanted library by adding +one of +

+  --disable-shared
+  --disable-static
+
+to the configure command. Setting --disable-shared ensures that PCRE2 +libraries are built as static libraries. The binaries that are then created as +part of the build process (for example, pcre2test and pcre2grep) +are linked statically with one or more PCRE2 libraries, but may also be +dynamically linked with other libraries such as libc. If you want these +binaries to be fully statically linked, you can set LDFLAGS like this: +
+
+LDFLAGS=--static ./configure --disable-shared +
+
+Note the two hyphens in --static. Of course, this works only if static versions +of all the relevant libraries are available for linking. +

+
UNICODE AND UTF SUPPORT
+

+By default, PCRE2 is built with support for Unicode and UTF character strings. +To build it without Unicode support, add +

+  --disable-unicode
+
+to the configure command. This setting applies to all three libraries. It +is not possible to build one library with Unicode support and another without +in the same configuration. +

+

+Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16 +or UTF-32. To do that, applications that use the library can set the PCRE2_UTF +option when they call pcre2_compile() to compile a pattern. +Alternatively, patterns may be started with (*UTF) unless the application has +locked this out by setting PCRE2_NEVER_UTF. +

+

+UTF support allows the libraries to process character code points up to +0x10ffff in the strings that they handle. Unicode support also gives access to +the Unicode properties of characters, using pattern escapes such as \P, \p, +and \X. Only the general category properties such as Lu and Nd, +script names, and some bi-directional properties are supported. Details are +given in the +pcre2pattern +documentation. +

+

+Pattern escapes such as \d and \w do not by default make use of Unicode +properties. The application can request that they do by setting the PCRE2_UCP +option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also +request this by starting with (*UCP). +

+
DISABLING THE USE OF \C
+

+The \C escape sequence, which matches a single code unit, even in a UTF mode, +can cause unpredictable behaviour because it may leave the current matching +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +pcre2_compile(). There is also a build-time option +

+  --enable-never-backslash-C
+
+(note the upper case C) which locks out the use of \C entirely. +

+
JUST-IN-TIME COMPILER SUPPORT
+

+Just-in-time (JIT) compiler support is included in the build by specifying +

+  --enable-jit
+
+This support is available only for certain hardware architectures. If this +option is set for an unsupported architecture, a building error occurs. +If in doubt, use +
+  --enable-jit=auto
+
+which enables JIT only if the current hardware is supported. You can check +if JIT is enabled in the configuration summary that is output at the end of a +configure run. If you are enabling JIT under SELinux you may also want to +add +
+  --enable-jit-sealloc
+
+which enables the use of an execmem allocator in JIT that is compatible with +SELinux. This has no effect if JIT is not enabled. See the +pcre2jit +documentation for a discussion of JIT usage. When JIT support is enabled, +pcre2grep automatically makes use of it, unless you add +
+  --disable-pcre2grep-jit
+
+to the configure command. +

+
NEWLINE RECOGNITION
+

+By default, PCRE2 interprets the linefeed (LF) character as indicating the end +of a line. This is the normal newline character on Unix-like systems. You can +compile PCRE2 to use carriage return (CR) instead, by adding +

+  --enable-newline-is-cr
+
+to the configure command. There is also an --enable-newline-is-lf option, +which explicitly specifies linefeed as the newline character. +

+

+Alternatively, you can specify that line endings are to be indicated by the +two-character sequence CRLF (CR immediately followed by LF). If you want this, +add +

+  --enable-newline-is-crlf
+
+to the configure command. There is a fourth option, specified by +
+  --enable-newline-is-anycrlf
+
+which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as +indicating a line ending. A fifth option, specified by +
+  --enable-newline-is-any
+
+causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline +sequences are the three just mentioned, plus the single characters VT (vertical +tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line +separator, U+2028), and PS (paragraph separator, U+2029). The final option is +
+  --enable-newline-is-nul
+
+which causes NUL (binary zero) to be set as the default line-ending character. +

+

+Whatever default line ending convention is selected when PCRE2 is built can be +overridden by applications that use the library. At build time it is +recommended to use the standard for your operating system. +

+
WHAT \R MATCHES
+

+By default, the sequence \R in a pattern matches any Unicode newline sequence, +independently of what has been selected as the line ending sequence. If you +specify +

+  --enable-bsr-anycrlf
+
+the default is changed so that \R matches only CR, LF, or CRLF. Whatever is +selected when PCRE2 is built can be overridden by applications that use the +library. +

+
HANDLING VERY LARGE PATTERNS
+

+Within a compiled pattern, offset values are used to point from one part to +another (for example, from an opening parenthesis to an alternation +metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values +are used for these offsets, leading to a maximum size for a compiled pattern of +around 64 thousand code units. This is sufficient to handle all but the most +gigantic patterns. Nevertheless, some people do want to process truly enormous +patterns, so it is possible to compile PCRE2 to use three-byte or four-byte +offsets by adding a setting such as +

+  --with-link-size=3
+
+to the configure command. The value given must be 2, 3, or 4. For the +16-bit library, a value of 3 is rounded up to 4. In these libraries, using +longer offsets slows down the operation of PCRE2 because it has to load +additional data when handling them. For the 32-bit library the value is always +4 and cannot be overridden; the value of --with-link-size is ignored. +

+
LIMITING PCRE2 RESOURCE USAGE
+

+The pcre2_match() function increments a counter each time it goes round +its main loop. Putting a limit on this counter controls the amount of computing +resource used by a single call to pcre2_match(). The limit can be changed +at run time, as described in the +pcre2api +documentation. The default is 10 million, but this can be changed by adding a +setting such as +

+  --with-match-limit=500000
+
+to the configure command. This setting also applies to the +pcre2_dfa_match() matching function, and to JIT matching (though the +counting is done differently). +

+

+The pcre2_match() function uses heap memory to record backtracking +points. The more nested backtracking points there are (that is, the deeper the +search tree), the more memory is needed. There is an upper limit, specified in +kibibytes (units of 1024 bytes). This limit can be changed at run time, as +described in the +pcre2api +documentation. The default limit (in effect unlimited) is 20 million. You can +change this by a setting such as +

+  --with-heap-limit=500
+
+which limits the amount of heap to 500 KiB. This limit applies only to +interpretive matching in pcre2_match() and pcre2_dfa_match(), which +may also use the heap for internal workspace when processing complicated +patterns. This limit does not apply when JIT (which has its own memory +arrangements) is used. +

+

+You can also explicitly limit the depth of nested backtracking in the +pcre2_match() interpreter. This limit defaults to the value that is set +for --with-match-limit. You can set a lower default limit by adding, for +example, +

+  --with-match-limit-depth=10000
+
+to the configure command. This value can be overridden at run time. This +depth limit indirectly limits the amount of heap memory that is used, but +because the size of each backtracking "frame" depends on the number of +capturing parentheses in a pattern, the amount of heap that is used before the +limit is reached varies from pattern to pattern. This limit was more useful in +versions before 10.30, where function recursion was used for backtracking. +

+

+As well as applying to pcre2_match(), the depth limit also controls +the depth of recursive function calls in pcre2_dfa_match(). These are +used for lookaround assertions, atomic groups, and recursion within patterns. +The limit does not apply to JIT matching. +

+
LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS
+

+Lookbehind assertions in which one or more branches can match a variable number +of characters are supported only if there is a maximum matching length for each +top-level branch. There is a limit to this maximum that defaults to 255 +characters. You can alter this default by a setting such as +

+  --with-max-varlookbehind=100
+
+The limit can be changed at runtime by calling +pcre2_set_max_varlookbehind(). Lookbehind assertions in which every +branch matches a fixed number of characters (not necessarily all the same) are +not constrained by this limit. +

+
CREATING CHARACTER TABLES AT BUILD TIME
+

+PCRE2 uses fixed tables for processing characters whose code points are less +than 256. By default, PCRE2 is built with a set of tables that are distributed +in the file src/pcre2_chartables.c.dist. These tables are for ASCII codes +only. If you add +

+  --enable-rebuild-chartables
+
+to the configure command, the distributed tables are no longer used. +Instead, a program called pcre2_dftables is compiled and run. This +outputs the source for new set of tables, created in the default locale of your +C run-time system. This method of replacing the tables does not work if you are +cross compiling, because pcre2_dftables needs to be run on the local +host and therefore not compiled with the cross compiler. +

+

+If you need to create alternative tables when cross compiling, you will have to +do so "by hand". There may also be other reasons for creating tables manually. +To cause pcre2_dftables to be built on the local host, run a normal +compiling command, and then run the program with the output file as its +argument, for example: +

+  cc src/pcre2_dftables.c -o pcre2_dftables
+  ./pcre2_dftables src/pcre2_chartables.c
+
+This builds the tables in the default locale of the local host. If you want to +specify a locale, you must use the -L option: +
+  LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+
+You can also specify -b (with or without -L). This causes the tables to be +written in binary instead of as source code. A set of binary tables can be +loaded into memory by an application and passed to pcre2_compile() in the +same way as tables created by calling pcre2_maketables(). The tables are +just a string of bytes, independent of hardware characteristics such as +endianness. This means they can be bundled with an application that runs in +different environments, to ensure consistent behaviour. +

+
USING EBCDIC CODE
+

+PCRE2 assumes by default that it will run in an environment where the character +code is ASCII or Unicode, which is a superset of ASCII. This is the case for +most computer operating systems. PCRE2 can, however, be compiled to run in an +8-bit EBCDIC environment by adding +

+  --enable-ebcdic --disable-unicode
+
+to the configure command. This setting implies +--enable-rebuild-chartables. You should only use it if you know that you are in +an EBCDIC environment (for example, an IBM mainframe operating system). +

+

+It is not possible to support both EBCDIC and UTF-8 codes in the same version +of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually +exclusive. +

+

+The EBCDIC character that corresponds to an ASCII LF is assumed to have the +value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In +such an environment you should use +

+  --enable-ebcdic-nl25
+
+as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the +same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is not +chosen as LF is made to correspond to the Unicode NEL character (which, in +Unicode, is 0x85). +

+

+The options that select newline behaviour, such as --enable-newline-is-cr, +and equivalent run-time options, refer to these character values in an EBCDIC +environment. +

+
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
+

+By default pcre2grep supports the use of callouts with string arguments +within the patterns it is matching. There are two kinds: one that generates +output using local code, and another that calls an external program or script. +If --disable-pcre2grep-callout-fork is added to the configure command, +only the first kind of callout is supported; if --disable-pcre2grep-callout is +used, all callouts are completely ignored. For more details of pcre2grep +callouts, see the +pcre2grep +documentation. +

+
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
+

+By default, pcre2grep reads all files as plain text. You can build it so +that it recognizes files whose names end in .gz or .bz2, and reads +them with libz or libbz2, respectively, by adding one or both of +

+  --enable-pcre2grep-libz
+  --enable-pcre2grep-libbz2
+
+to the configure command. These options naturally require that the +relevant libraries are installed on your system. Configuration will fail if +they are not. +

+
PCRE2GREP BUFFER SIZE
+

+pcre2grep uses an internal buffer to hold a "window" on the file it is +scanning, in order to be able to output "before" and "after" lines when it +finds a match. The default starting size of the buffer is 20KiB. The buffer +itself is three times this size, but because of the way it is used for holding +"before" lines, the longest line that is guaranteed to be processable is the +notional buffer size. If a longer line is encountered, pcre2grep +automatically expands the buffer, up to a specified maximum size, whose default +is 1MiB or the starting size, whichever is the larger. You can change the +default parameter values by adding, for example, +

+  --with-pcre2grep-bufsize=51200
+  --with-pcre2grep-max-bufsize=2097152
+
+to the configure command. The caller of pcre2grep can override +these values by using --buffer-size and --max-buffer-size on the command line. +

+
PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
+

+If you add one of +

+  --enable-pcre2test-libreadline
+  --enable-pcre2test-libedit
+
+to the configure command, pcre2test is linked with the +libreadline orlibedit library, respectively, and when its input is +from a terminal, it reads it using the readline() function. This provides +line-editing and history facilities. Note that libreadline is +GPL-licensed, so if you distribute a binary of pcre2test linked in this +way, there may be licensing issues. These can be avoided by linking instead +with libedit, which has a BSD licence. +

+

+Setting --enable-pcre2test-libreadline causes the -lreadline option to be +added to the pcre2test build. In many operating environments with a +system-installed readline library this is sufficient. However, in some +environments (e.g. if an unmodified distribution version of readline is in +use), some extra configuration may be necessary. The INSTALL file for +libreadline says this: +

+  "Readline uses the termcap functions, but does not link with
+  the termcap or curses library itself, allowing applications
+  which link with readline the to choose an appropriate library."
+
+If your environment has not been set up so that an appropriate library is +automatically included, you may need to add something like +
+  LIBS="-ncurses"
+
+immediately before the configure command. +

+
INCLUDING DEBUGGING CODE
+

+If you add +

+  --enable-debug
+
+to the configure command, additional debugging code is included in the +build. This feature is intended for use by the PCRE2 maintainers. +

+
DEBUGGING WITH VALGRIND SUPPORT
+

+If you add +

+  --enable-valgrind
+
+to the configure command, PCRE2 will use valgrind annotations to mark +certain memory regions as unaddressable. This allows it to detect invalid +memory accesses, and is mostly useful for debugging PCRE2 itself. +

+
CODE COVERAGE REPORTING
+

+If your C compiler is gcc, you can build a version of PCRE2 that can generate a +code coverage report for its test suite. To enable this, you must install +lcov version 1.6 or above. Then specify +

+  --enable-coverage
+
+to the configure command and build PCRE2 in the usual way. +

+

+Note that using ccache (a caching C compiler) is incompatible with code +coverage reporting. If you have configured ccache to run automatically +on your system, you must set the environment variable +

+  CCACHE_DISABLE=1
+
+before running make to build PCRE2, so that ccache is not used. +

+

+When --enable-coverage is used, the following addition targets are added to the +Makefile: +

+  make coverage
+
+This creates a fresh coverage report for the PCRE2 test suite. It is equivalent +to running "make coverage-reset", "make coverage-baseline", "make check", and +then "make coverage-report". +
+  make coverage-reset
+
+This zeroes the coverage counters, but does nothing else. +
+  make coverage-baseline
+
+This captures baseline coverage information. +
+  make coverage-report
+
+This creates the coverage report. +
+  make coverage-clean-report
+
+This removes the generated coverage report without cleaning the coverage data +itself. +
+  make coverage-clean-data
+
+This removes the captured coverage data without removing the coverage files +created at compile time (*.gcno). +
+  make coverage-clean
+
+This cleans all coverage data including the generated coverage report. For more +information about code coverage, see the gcov and lcov +documentation. +

+
DISABLING THE Z AND T FORMATTING MODIFIERS
+

+The C99 standard defines formatting modifiers z and t for size_t and +ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in +environments other than old versions of Microsoft Visual Studio when +__STDC_VERSION__ is defined and has a value greater than or equal to 199901L +(indicating support for C99). +However, there is at least one environment that claims to be C99 but does not +support these modifiers. If +

+  --disable-percent-zt
+
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu, +a suitable format is used depending in the size of long for the platform. +

+
SUPPORT FOR FUZZERS
+

+There is a special option for use by people who want to run fuzzing tests on +PCRE2: +

+  --enable-fuzz-support
+
+At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. +

+

+Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck +to be created. This is normally run under valgrind or used when PCRE2 is +compiled with address sanitizing enabled. It calls the fuzzing function and +outputs information about what it is doing. The input strings are specified by +arguments: if an argument starts with "=" the rest of it is a literal input +string. Otherwise, it is assumed to be a file name, and the contents of the +file are the test string. +

+
OBSOLETE OPTION
+

+In versions of PCRE2 prior to 10.30, there were two ways of handling +backtracking in the pcre2_match() function. The default was to use the +system stack, but if +

+  --disable-stack-for-recursion
+
+was set, memory on the heap was used. From release 10.30 onwards this has +changed (the stack is no longer used) and this option now does nothing except +give a warning. +

+
SEE ALSO
+

+pcre2api(3), pcre2-config(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 15 April 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2callout.html b/doc/html/pcre2callout.html new file mode 100644 index 0000000..cdb65ad --- /dev/null +++ b/doc/html/pcre2callout.html @@ -0,0 +1,480 @@ + + +pcre2callout specification + + +

pcre2callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+#include <pcre2.h> +

+

+int (*pcre2_callout)(pcre2_callout_block *, void *); +
+
+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +

+
DESCRIPTION
+

+PCRE2 provides a feature called "callout", which is a means of temporarily +passing control to the caller of PCRE2 in the middle of pattern matching. The +caller of PCRE2 provides an external function by putting its entry point in +a match context (see pcre2_set_callout() in the +pcre2api +documentation). +

+

+When using the pcre2_substitute() function, an additional callout feature +is available. This does a callout after each change to the subject string and +is described in the +pcre2api +documentation; the rest of this document is concerned with callouts during +pattern matching. +

+

+Within a regular expression, (?C<arg>) indicates a point at which the external +function is to be called. Different callout points can be identified by putting +a number less than 256 after the letter C. The default value is zero. +Alternatively, the argument may be a delimited string. The starting delimiter +must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the +start, except for {, where the ending delimiter is }. If the ending delimiter +is needed within the string, it must be doubled. For example, this pattern has +two callout points: +

+  (?C1)abc(?C"some ""arbitrary"" text")def
+
+If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 +automatically inserts callouts, all with number 255, before each item in the +pattern except for immediately before or after an explicit callout. For +example, if PCRE2_AUTO_CALLOUT is used with the pattern +
+  A(?C3)B
+
+it is processed as if it were +
+  (?C255)A(?C3)B(?C255)
+
+Here is a more complicated example: +
+  A(\d{2}|--)
+
+With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were +
+  (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+
+Notice that there is a callout before and after each parenthesis and +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the +condition. Such a callout may also be inserted explicitly, for example: +
+  (?(?C9)(?=a)ab|de)  (?(?C%text%)(?!=d)ab|de)
+
+This applies only to assertion conditions (because they are themselves +independent groups). +

+

+Callouts can be useful for tracking the progress of pattern matching. The +pcre2test +program has a pattern qualifier (/auto_callout) that sets automatic callouts. +When any callouts are present, the output from pcre2test indicates how +the pattern is being matched. This is useful information when you are trying to +optimize the performance of a particular pattern. +

+
MISSING CALLOUTS
+

+You should be aware that, because of optimizations in the way PCRE2 compiles +and matches patterns, callouts sometimes do not happen exactly as you might +expect. +

+
+Auto-possessification +
+

+At compile time, PCRE2 "auto-possessifies" repeated items when it knows that +what follows cannot be part of the repeat. For example, a+[bc] is compiled as +if it were a++[bc]. The pcre2test output when this pattern is compiled +with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string +"aaaa" is: +

+  --->aaaa
+   +0 ^        a+
+   +2 ^   ^    [bc]
+  No match
+
+This indicates that when matching [bc] fails, there is no backtracking into a+ +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this: +
+  --->aaaa
+   +0 ^        a+
+   +2 ^   ^    [bc]
+   +2 ^  ^     [bc]
+   +2 ^ ^      [bc]
+   +2 ^^       [bc]
+  No match
+
+This time, when matching [bc] fails, the matcher backtracks into a+ and tries +again, repeatedly, until a+ itself fails. +

+
+Automatic .* anchoring +
+

+By default, an optimization is applied when .* is the first significant item in +a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the +pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can +start only after an internal newline or at the beginning of the subject, and +pcre2_compile() remembers this. If a pattern has more than one top-level +branch, automatic anchoring occurs if all branches are anchorable. +

+

+This optimization is disabled, however, if .* is in an atomic group or if there +is a backreference to the capture group in which it appears. It is also +disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of +callouts does not affect it. +

+

+For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT and +applied to the string "aa", the pcre2test output is: +

+  --->aa
+   +0 ^      .*
+   +2 ^ ^    \d
+   +2 ^^     \d
+   +2 ^      \d
+  No match
+
+This shows that all match attempts start at the beginning of the subject. In +other words, the pattern is anchored. You can disable this optimization by +passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or starting the +pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to: +
+  --->aa
+   +0 ^      .*
+   +2 ^ ^    \d
+   +2 ^^     \d
+   +2 ^      \d
+   +0  ^     .*
+   +2  ^^    \d
+   +2  ^     \d
+  No match
+
+This shows more match attempts, starting at the second subject character. +Another optimization, described in the next section, means that there is no +subsequent attempt to match with an empty subject. +

+
+Other optimizations +
+

+Other optimizations that provide fast "no match" results also affect callouts. +For example, if the pattern is +

+  ab(?C4)cd
+
+PCRE2 knows that any matching string must contain the letter "d". If the +subject string is "abyz", the lack of "d" means that matching doesn't ever +start, and the callout is never reached. However, with "abyd", though the +result is still no match, the callout is obeyed. +

+

+For most patterns PCRE2 also knows the minimum length of a matching string, and +will immediately give a "no match" return without actually running a match if +the subject is not long enough, or, for unanchored patterns, if it has been +scanned far enough. +

+

+You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE +option to pcre2_compile(), or by starting the pattern with +(*NO_START_OPT). This slows down the matching process, but does ensure that +callouts such as the example above are obeyed. +

+
THE CALLOUT INTERFACE
+

+During matching, when PCRE2 reaches a callout point, if an external function is +provided in the match context, it is called. This applies to both normal, +DFA, and JIT matching. The first argument to the callout function is a pointer +to a pcre2_callout block. The second argument is the void * callout data +that was supplied when the callout was set up by calling +pcre2_set_callout() (see the +pcre2api +documentation). The callout block structure contains the following fields, not +necessarily in this order: +

+  uint32_t      version;
+  uint32_t      callout_number;
+  uint32_t      capture_top;
+  uint32_t      capture_last;
+  uint32_t      callout_flags;
+  PCRE2_SIZE   *offset_vector;
+  PCRE2_SPTR    mark;
+  PCRE2_SPTR    subject;
+  PCRE2_SIZE    subject_length;
+  PCRE2_SIZE    start_match;
+  PCRE2_SIZE    current_position;
+  PCRE2_SIZE    pattern_position;
+  PCRE2_SIZE    next_item_length;
+  PCRE2_SIZE    callout_string_offset;
+  PCRE2_SIZE    callout_string_length;
+  PCRE2_SPTR    callout_string;
+
+The version field contains the version number of the block format. The +current version is 2; the three callout string fields were added for version 1, +and the callout_flags field for version 2. If you are writing an +application that might use an earlier release of PCRE2, you should check the +version number before accessing any of these fields. The version number will +increase in future if more fields are added, but the intention is never to +remove any of the existing fields. +

+
+Fields for numerical callouts +
+

+For a numerical callout, callout_string is NULL, and callout_number +contains the number of the callout, in the range 0-255. This is the number +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts. +

+
+Fields for string callouts +
+

+For callouts with string arguments, callout_number is always zero, and +callout_string points to the string that is contained within the compiled +pattern. Its length is given by callout_string_length. Duplicated ending +delimiters that were present in the original pattern string have been turned +into single characters, but there is no other processing of the callout string +argument. An additional code unit containing binary zero is present after the +string, but is not included in the length. The delimiter that was used to start +the string is also stored within the pattern, immediately before the string +itself. You can access this delimiter as callout_string[-1] if you need +it. +

+

+The callout_string_offset field is the code unit offset to the start of +the callout argument string within the original pattern string. This is +provided for the benefit of applications such as script languages that might +need to report errors in the callout string within the pattern. +

+
+Fields for all callouts +
+

+The remaining fields in the callout block are the same for both kinds of +callout. +

+

+The offset_vector field is a pointer to a vector of capturing offsets +(the "ovector"). You may read the elements in this vector, but you must not +change any of them. +

+

+For calls to pcre2_match(), the offset_vector field is not (since +release 10.30) a pointer to the actual ovector that was passed to the matching +function in the match data block. Instead it points to an internal ovector of a +size large enough to hold all possible captured substrings in the pattern. Note +that whenever a recursion or subroutine call within a pattern completes, the +capturing state is reset to what it was before. +

+

+The capture_last field contains the number of the most recently captured +substring, and the capture_top field contains one more than the number of +the highest numbered captured substring so far. If no substrings have yet been +captured, the value of capture_last is 0 and the value of +capture_top is 1. The values of these fields do not always differ by one; +for example, when the callout in the pattern ((a)(b))(?C2) is taken, +capture_last is 1 but capture_top is 4. +

+

+The contents of ovector[2] to ovector[<capture_top>*2-1] can be inspected in +order to extract substrings that have been matched so far, in the same way as +extracting substrings after a match has completed. The values in ovector[0] and +ovector[1] are always PCRE2_UNSET because the match is by definition not +complete. Substrings that have not been captured but whose numbers are less +than capture_top also have both of their ovector slots set to +PCRE2_UNSET. +

+

+For DFA matching, the offset_vector field points to the ovector that was +passed to the matching function in the match data block for callouts at the top +level, but to an internal ovector during the processing of pattern recursions, +lookarounds, and atomic groups. However, these ovectors hold no useful +information because pcre2_dfa_match() does not support substring +capturing. The value of capture_top is always 1 and the value of +capture_last is always 0 for DFA matching. +

+

+The subject and subject_length fields contain copies of the values +that were passed to the matching function. +

+

+The start_match field normally contains the offset within the subject at +which the current match attempt started. However, if the escape sequence \K +has been encountered, this value is changed to reflect the modified starting +point. If the pattern is not anchored, the callout function may be called +several times from the same point in the pattern for different starting points +in the subject. +

+

+The current_position field contains the offset within the subject of the +current match pointer. +

+

+The pattern_position field contains the offset in the pattern string to +the next item to be matched. +

+

+The next_item_length field contains the length of the next item to be +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an alternation +bar or a closing parenthesis, the length is one, unless a closing parenthesis +is followed by a quantifier, in which case its length is included. (This +changed in release 10.23. In earlier releases, before an opening parenthesis +the length was that of the entire group, and before an alternation bar or a +closing parenthesis the length was zero.) +

+

+The pattern_position and next_item_length fields are intended to +help in distinguishing between different automatic callouts, which all have the +same callout number. However, they are set for all callouts, and are used by +pcre2test to show the next item to be matched when displaying callout +information. +

+

+In callouts from pcre2_match() the mark field contains a pointer to +the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or +(*THEN) item in the match, or NULL if no such items have been passed. Instances +of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In +callouts from the DFA matching function this field always contains NULL. +

+

+The callout_flags field is always zero in callouts from +pcre2_dfa_match() or when JIT is being used. When pcre2_match() +without JIT is used, the following bits may be set: +

+  PCRE2_CALLOUT_STARTMATCH
+
+This is set for the first callout after the start of matching for each new +starting position in the subject. +
+  PCRE2_CALLOUT_BACKTRACK
+
+This is set if there has been a matching backtrack since the previous callout, +or since the start of matching if this is the first callout from a +pcre2_match() run. +

+

+Both bits are set when a backtrack has caused a "bumpalong" to a new starting +position in the subject. Output from pcre2test does not indicate the +presence of these bits unless the callout_extra modifier is set. +

+

+The information in the callout_flags field is provided so that +applications can track and tell their users how matching with backtracking is +done. This can be useful when trying to optimize patterns, or just to +understand how PCRE2 works. There is no support in pcre2_dfa_match() +because there is no backtracking in DFA matching, and there is no support in +JIT because JIT is all about maximimizing matching performance. In both these +cases the callout_flags field is always zero. +

+
RETURN VALUES FROM CALLOUTS
+

+The external callout function returns an integer to PCRE2. If the value is +zero, matching proceeds as normal. If the value is greater than zero, matching +fails at the current point, but the testing of other matching possibilities +goes ahead, just as if a lookahead assertion had failed. If the value is less +than zero, the match is abandoned, and the matching function returns the +negative value. +

+

+Negative values should normally be chosen from the set of PCRE2_ERROR_xxx +values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match" +failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout +functions; it will never be used by PCRE2 itself. +

+
CALLOUT ENUMERATION
+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling pcre2_callout_enumerate(). The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +user_data value that was passed to pcre2_callout_enumerate(). The +data block contains the following fields: +

+  version                Block version number
+  pattern_position       Offset to next item in pattern
+  next_item_length       Length of next item in pattern
+  callout_number         Number for numbered callouts
+  callout_string_offset  Offset to string within pattern
+  callout_string_length  Length of callout string
+  callout_string         Points to callout string or is NULL
+
+The version number is currently 0. It will increase if new fields are ever +added to the block. The remaining fields are the same as their namesakes in the +pcre2_callout block that is used for callouts during matching, as +described +above. +

+

+Note that the value of pattern_position is unique for each callout. +However, if a callout occurs inside a group that is quantified with a non-zero +minimum or a fixed maximum, the group is replicated inside the compiled +pattern. For example, a pattern such as /(a){2}/ is compiled as if it were +/(a)(a)/. This means that the callout will be enumerated more than once, but +with the same value for pattern_position in each case. +

+

+The callback function should normally return zero. If it returns a non-zero +value, scanning the pattern stops, and that value is returned from +pcre2_callout_enumerate(). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 19 January 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2compat.html b/doc/html/pcre2compat.html new file mode 100644 index 0000000..d60182e --- /dev/null +++ b/doc/html/pcre2compat.html @@ -0,0 +1,276 @@ + + +pcre2compat specification + + +

pcre2compat man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+DIFFERENCES BETWEEN PCRE2 AND PERL +
+

+This document describes some of the known differences in the ways that PCRE2 +and Perl handle regular expressions. The differences described here are with +respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually +changing, the information may at times be out of date. +

+

+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +

+

+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +have are given in the +pcre2unicode +page. +

+

+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +they do not mean what you might think. For example, (?!a){3} does not assert +that the next three characters are not "a". It just asserts that the next +character is not "a" three times (in principle; PCRE2 optimizes this to run the +assertion just once). Perl allows some repeat quantifiers on other assertions, +for example, \b* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. +

+

+4. If a braced quantifier such as {1,2} appears where there is nothing to +repeat (for example, at the start of a branch), PCRE2 raises an error whereas +Perl treats the quantifier characters as literal. +

+

+5. Capture groups that occur inside negative lookaround assertions are counted, +but their entries in the offsets vector are set only when a negative assertion +is a condition that has a matching branch (that is, the condition is false). +Perl may set such capture groups in other circumstances. +

+

+6. The following Perl escape sequences are not supported: \F, \l, \L, \u, +\U, and \N when followed by a character name. \N on its own, matching a +non-newline character, and \N{U+dd..}, matching a Unicode code point, are +supported. The escapes that modify the case of following letters are +implemented by Perl's general string-handling and are not part of its pattern +matching engine. If any of these are encountered by PCRE2, an error is +generated by default. However, if either of the PCRE2_ALT_BSUX or +PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript +interprets them. +

+

+7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is +built with Unicode support (the default). The properties that can be tested +with \p and \P are limited to the general category properties such as Lu and +Nd, the derived properties Any and LC (synonym L&), script names such as Greek +or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and +Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See +the +pcre2pattern +documentation for details. The long synonyms for property names that Perl +supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted +to prefix any of these properties with "Is". +

+

+8. PCRE2 supports the \Q...\E escape for quoting substrings. Characters +in between are treated as literals. However, this is slightly different from +Perl in that $ and @ are also handled as literals inside the quotes. In Perl, +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \Q +and \E which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \Q and \E just like any other character. Note the +following examples: +

+    Pattern            PCRE2 matches     Perl matches
+
+    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
+    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
+    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz
+    \QA\B\E            A\B               A\B
+    \Q\\E              \                 \\E
+
+The \Q...\E sequence is recognized both inside and outside character classes +by both PCRE2 and Perl. +

+

+9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +constructions. However, PCRE2 does have a "callout" feature, which allows an +external function to be called during pattern matching. See the +pcre2callout +documentation for details. +

+

+10. Subroutine calls (whether recursive or not) were treated as atomic groups +up to PCRE2 release 10.23, but from release 10.30 this changed, and +backtracking into subroutine calls is now supported, as in Perl. +

+

+11. In PCRE2, if any of the backtracking control verbs are used in a group that +is called as a subroutine (whether or not recursively), their effect is +confined to that group; it does not extend to the surrounding pattern. This is +not always the case in Perl. In particular, if (*THEN) is present in a group +that is called as a subroutine, its action is limited to that group, even if +the group does not contain any | characters. Note that such groups are +processed as anchored at the point where they are tested. +

+

+12. If a pattern contains more than one backtracking control verb, the first +one that is backtracked onto acts. For example, in the pattern +A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C +triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the +same as PCRE2, but there are cases where it differs. +

+

+13. There are some differences that are concerned with the settings of captured +strings when part of a pattern is repeated. For example, matching "aba" against +the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to +"b". +

+

+14. PCRE2's handling of duplicate capture group numbers and names is not as +general as Perl's. This is a consequence of the fact the PCRE2 works internally +just with numbers, using an external table to translate between numbers and +names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two +capture groups have the same number but different names, is not supported, and +causes an error at compile time. If it were allowed, it would not be possible +to distinguish which group matched, because both names map to capture group +number 1. To avoid this confusing situation, an error is given at compile time. +

+

+15. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a group. If the /x modifier is +set, Perl allowed white space between ( and ? though the latest Perls give an +error (for a while it was just deprecated). There may still be some cases where +Perl behaves differently. +

+

+16. Perl, when in warning mode, gives warnings for character classes such as +[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no +warning features, so it gives an error in these cases because they are almost +certainly user mistakes. +

+

+17. In PCRE2, the upper/lower case character properties Lu and Ll are not +affected when case-independent matching is specified. For example, \p{Lu} +always matches an upper case letter. I think Perl has changed in this respect; +in the release at the time of writing (5.38), \p{Lu} and \p{Ll} match all +letters, regardless of case, when case independence is specified. +

+

+18. From release 5.32.0, Perl locks out the use of \K in lookaround +assertions. From release 10.38 PCRE2 does the same by default. However, there +is an option for re-enabling the previous behaviour. When this option is set, +\K is acted on when it occurs in positive assertions, but is ignored in +negative assertions. +

+

+19. PCRE2 provides some extensions to the Perl regular expression facilities. +Perl 5.10 included new features that were not in earlier versions of Perl, some +of which (such as named parentheses) were in PCRE2 for some time before. This +list is with respect to Perl 5.38: +
+
+(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +meta-character matches only at the very end of the string. +
+
+(b) A backslash followed by a letter with no special meaning is faulted. (Perl +can be made to issue a warning.) +
+
+(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. +
+
+(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +only at the first matching position in the subject string. +
+
+(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART +options have no Perl equivalents. +
+
+(f) The \R escape sequence can be restricted to match only CR, LF, or CRLF +by the PCRE2_BSR_ANYCRLF option. +
+
+(g) The callout facility is PCRE2-specific. Perl supports codeblocks and +variable interpolation, but not general hooks on every match. +
+
+(h) The partial matching facility is PCRE2-specific. +
+
+(i) The alternative matching function (pcre2_dfa_match() matches in a +different way and is not Perl-compatible. +
+
+(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at +the start of a pattern. These set overall options that cannot be changed within +the pattern. +
+
+(k) PCRE2 supports non-atomic positive lookaround assertions. This is an +extension to the lookaround facilities. The default, Perl-compatible +lookarounds are atomic. +
+
+(l) There are three syntactical items in patterns that can refer to a capturing +group by number: back references such as \g{2}, subroutine calls such as (?3), +and condition references such as (?(4)...). PCRE2 supports relative group +numbers such as +2 and -4 in all three cases. Perl supports both plus and minus +for subroutine calls, but only minus for back references, and no relative +numbering at all for conditions. +

+

+20. Perl has different limits than PCRE2. See the +pcre2limit +documentation for details. Perl went with 5.10 from recursion to iteration +keeping the intermediate matches on the heap, which is ~10% slower but does not +fall into any stack-overflow limit. PCRE2 made a similar change at release +10.30, and also has many build-time and run-time customizable limits. +

+

+21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way +to set characters by context just like Perl's "/d". A regular expression using +PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer +to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top. +

+

+22. Some recursive patterns that Perl diagnoses as infinite recursions can be +handled by PCRE2, either by the interpreter or the JIT. An example is +/(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated +"abcd" substrings at the end of the subject. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 30 November 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2convert.html b/doc/html/pcre2convert.html new file mode 100644 index 0000000..6b9fea5 --- /dev/null +++ b/doc/html/pcre2convert.html @@ -0,0 +1,191 @@ + + +pcre2convert specification + + +

pcre2convert man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
+

+This document describes a set of functions that can be used to convert +"foreign" patterns into PCRE2 regular expressions. This facility is currently +experimental, and may be changed in future releases. Two kinds of pattern, +globs and POSIX patterns, are supported. +

+
THE CONVERT CONTEXT
+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +
+
+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +
+
+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +
+
+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +
+
+A convert context is used to hold parameters that affect the way that pattern +conversion works. Like all PCRE2 contexts, you need to use a context only if +you want to override the defaults. There are the usual create, copy, and free +functions. If custom memory management functions are set in a general context +that is passed to pcre2_convert_context_create(), they are used for all +memory management within the conversion functions. +

+

+There are only two parameters in the convert context at present. Both apply +only to glob conversions. The escape character defaults to grave accent under +Windows, otherwise backslash. It can be set to zero, meaning no escape +character, or to any punctuation character with a code point less than 256. +The separator character defaults to backslash under Windows, otherwise forward +slash. It can be set to forward slash, backslash, or dot. +

+

+The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if +their second argument is invalid. +

+
THE CONVERSION FUNCTION
+

+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +
+
+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +
+
+The first two arguments of pcre2_pattern_convert() define the foreign +pattern that is to be converted. The length may be given as +PCRE2_ZERO_TERMINATED. The options argument defines how the pattern is to +be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set. +PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid. +One or more of the glob options, or one of the following POSIX options must be +set to define the type of conversion that is required: +

+  PCRE2_CONVERT_GLOB
+  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+  PCRE2_CONVERT_GLOB_NO_STARSTAR
+  PCRE2_CONVERT_POSIX_BASIC
+  PCRE2_CONVERT_POSIX_EXTENDED
+
+Details of the conversions are given below. The buffer and blength +arguments define how the output is handled: +

+

+If buffer is NULL, the function just returns the length of the converted +pattern via blength. This is one less than the length of buffer needed, +because a terminating zero is always added to the output. +

+

+If buffer points to a NULL pointer, an output buffer is obtained using +the allocator in the context or malloc() if no context is supplied. A +pointer to this buffer is placed in the variable to which buffer points. +When no longer needed the output buffer must be freed by calling +pcre2_converted_pattern_free(). If this function is called with a NULL +argument, it returns immediately without doing anything. +

+

+If buffer points to a non-NULL pointer, blength must be set to the +actual length of the buffer provided (in code units). +

+

+In all cases, after successful conversion, the variable pointed to by +blength is updated to the length actually used (in code units), excluding +the terminating zero that is always added. +

+

+If an error occurs, the length (via blength) is set to the offset +within the input pattern where the error was detected. Only gross syntax errors +are caught; there are plenty of errors that will get passed on for +pcre2_compile() to discover. +

+

+The return from pcre2_pattern_convert() is zero on success or a non-zero +PCRE2 error code. Note that PCRE2 error codes may be positive or negative: +pcre2_compile() uses mostly positive codes and pcre2_match() +negative ones; pcre2_convert() uses existing codes of both kinds. A +textual error message can be obtained by calling +pcre2_get_error_message(). +

+
CONVERTING GLOBS
+

+Globs are used to match file names, and consequently have the concept of a +"path separator", which defaults to backslash under Windows and forward slash +otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not +permitted to match separator characters, but the double-star (**) feature +(which does match separators) is supported. +

+

+PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to +match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with +the double-star feature disabled. These options may be given together. +

+
CONVERTING POSIX PATTERNS
+

+POSIX defines two kinds of regular expression pattern: basic and extended. +These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or +PCRE2_CONVERT_POSIX_EXTENDED, respectively. +

+

+In POSIX patterns, backslash is not special in a character class. Unmatched +closing parentheses are treated as literals. +

+

+In basic patterns, ? + | {} and () must be escaped to be recognized +as metacharacters outside a character class. If the first character in the +pattern is * it is treated as a literal. ^ is a metacharacter only at the start +of a branch. +

+

+In extended patterns, a backslash not in a character class always +makes the next character literal, whatever it is. There are no backreferences. +

+

+Note: POSIX mandates that the longest possible match at the first matching +position must be found. This is not what pcre2_match() does; it yields +the first match that is found. An application can use pcre2_dfa_match() +to find the longest match, but that does not support backreferences (but then +neither do POSIX extended patterns). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 28 June 2018 +
+Copyright © 1997-2018 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2demo.html b/doc/html/pcre2demo.html new file mode 100644 index 0000000..1cb7e0a --- /dev/null +++ b/doc/html/pcre2demo.html @@ -0,0 +1,518 @@ + + +pcre2demo specification + + +

pcre2demo man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SOURCE CODE +
+

+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+using the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use something like this:
+
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <stdio.h>
+#include <string.h>
+#include <pcre2.h>
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+PCRE2_SIZE subject_length;
+
+pcre2_match_data *match_data;
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present.                                            *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i < argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+  else if (argv[i][0] == '-')
+    {
+    printf("Unrecognised option %s\n", argv[i]);
+    return 1;
+    }
+  else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Exactly two arguments required: a regex and a subject string\n");
+  return 1;
+  }
+
+/* Pattern and subject are char arguments, so they can be straightforwardly
+cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
+length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
+defined to be size_t. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = (PCRE2_SIZE)strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,               /* the pattern */
+  PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
+  0,                     /* default options */
+  &errornumber,          /* for error number */
+  &erroroffset,          /* for error offset */
+  NULL);                 /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256];
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE2 again, in order to do a    *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result. Using  *
+* pcre2_match_data_create_from_pattern() ensures that the block is       *
+* exactly the right size for the number of capturing parentheses in the  *
+* pattern. If you need to know the actual size of a match_data block as  *
+* a number of bytes, you can find it like this:                          *
+*                                                                        *
+* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data);    *
+*************************************************************************/
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+/* Now run the match. */
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc < 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\n", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /*   data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeeded. Get a pointer to the output vector, where string offsets
+are stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("Match succeeded at offset %d\n", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\n");
+
+/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround
+assertions. However, there is an option to re-enable the old behaviour. If that
+is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an
+assertion to set the start of a match later than its end. In this demonstration
+program, we show how to detect this case, but it shouldn't arise because the
+option is never set. */
+
+if (ovector[0] > ovector[1])
+  {
+  printf("\\K was used in an assertion to set the match start after its end.\n"
+    "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+      (char *)(subject + ovector[1]));
+  printf("Run abandoned\n");
+  pcre2_match_data_free(match_data);
+  pcre2_code_free(re);
+  return 1;
+  }
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i < rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &namecount);          /* where to put the answer */
+
+if (namecount == 0) printf("No named substrings\n"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\n");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i < namecount; i++)
+    {
+    int n = (tabptr[0] << 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
+utf8 = (option_bits & PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF;
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                   /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];   /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* If the previous match was not an empty string, there is one tricky case to
+  consider. If a pattern contains \K within a lookbehind assertion at the
+  start, the end of the matched string can be at the offset where the match
+  started. Without special action, this leads to a loop that keeps on matching
+  the same substring. We must detect this case and arrange to move the start on
+  by one character. The pcre2_get_startchar() function returns the starting
+  offset that was passed to pcre2_match(). */
+
+  else
+    {
+    PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
+    if (start_offset <= startchar)
+      {
+      if (startchar >= subject_length) break;   /* Reached end of subject.   */
+      start_offset = startchar + 1;             /* Advance by one character. */
+      if (utf8)                                 /* If UTF-8, it may be more  */
+        {                                       /*   than one code unit.     */
+        for (; start_offset < subject_length; start_offset++)
+          if ((subject[start_offset] & 0xc0) != 0x80) break;
+        }
+      }
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &&                      /* If CRLF is a newline & */
+        start_offset < subject_length - 1 &&    /* we are at CRLF, */
+        subject[start_offset] == '\r' &&
+        subject[start_offset + 1] == '\n')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] < subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc < 0)
+    {
+    printf("Matching error %d\n", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeeded */
+
+  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\n");
+
+  /* We must guard against patterns such as /(?=.\K)/ that use \K in an
+  assertion to set the start of a match later than its end. In this
+  demonstration program, we just detect this case and give up. */
+
+  if (ovector[0] > ovector[1])
+    {
+    printf("\\K was used in an assertion to set the match start after its end.\n"
+      "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+        (char *)(subject + ovector[1]));
+    printf("Run abandoned\n");
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i < rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount == 0) printf("No named substrings\n"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\n");
+    for (i = 0; i < namecount; i++)
+      {
+      int n = (tabptr[0] << 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\n");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html new file mode 100644 index 0000000..bd12246 --- /dev/null +++ b/doc/html/pcre2grep.html @@ -0,0 +1,1125 @@ + + +pcre2grep specification + + +

pcre2grep man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2grep [options] [long options] [pattern] [path1 path2 ...] +

+
DESCRIPTION
+

+pcre2grep searches files for character patterns, in the same way as other +grep commands do, but it uses the PCRE2 regular expression library to support +patterns that are compatible with the regular expressions of Perl 5. See +pcre2syntax(3) +for a quick-reference summary of pattern syntax, or +pcre2pattern(3) +for a full description of the syntax and semantics of the regular expressions +that PCRE2 supports. +

+

+Patterns, whether supplied on the command line or in a separate file, are given +without delimiters. For example: +

+  pcre2grep Thursday /etc/motd
+
+If you attempt to use delimiters (for example, by surrounding a pattern with +slashes, as is common in Perl scripts), they are interpreted as part of the +pattern. Quotes can of course be used to delimit patterns on the command line +because they are interpreted by the shell, and indeed quotes are required if a +pattern contains white space or shell metacharacters. +

+

+The first argument that follows any option settings is treated as the single +pattern to be matched when neither -e nor -f is present. +Conversely, when one or both of these options are used to specify patterns, all +arguments are treated as path names. At least one of -e, -f, or an +argument pattern must be provided. +

+

+If no files are specified, pcre2grep reads the standard input. The +standard input can also be referenced by a name consisting of a single hyphen. +For example: +

+  pcre2grep some-pattern file1 - file3
+
+By default, input files are searched line by line, so pattern assertions about +the beginning and end of a subject string (^, $, \A, \Z, and \z) match at +the beginning and end of each line. When a line matches a pattern, it is copied +to the standard output, and if there is more than one file, the file name is +output at the start of each line, followed by a colon. However, there are +options that can change how pcre2grep behaves. For example, the -M +option makes it possible to search for strings that span line boundaries. What +defines a line boundary is controlled by the -N (--newline) option. +The -h and -H options control whether or not file names are shown, +and the -Z option changes the file name terminator to a zero byte. +

+

+The amount of memory used for buffering files that are being scanned is +controlled by parameters that can be set by the --buffer-size and +--max-buffer-size options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by --max-buffer-size. The +default values for these parameters can be set when pcre2grep is +built; if nothing is specified, the defaults are set to 20KiB and 1MiB +respectively. An error occurs if a line is too long and the buffer can no +longer be expanded. +

+

+The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output. +

+

+When matching with a multiline pattern, the size of the buffer must be at least +half of the maximum match expected or the pattern might fail to match. +

+

+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater. +BUFSIZ is defined in <stdio.h>. When there is more than one pattern +(specified by the use of -e and/or -f), each pattern is applied to +each line in the order in which they are defined, except that all the -e +patterns are tried before the -f patterns. +

+

+By default, as soon as one pattern matches a line, no further patterns are +considered. However, if --colour (or --color) is used to colour the +matching substrings, or if --only-matching, --file-offsets, +--line-offsets, or --output is used to output only the part of the +line that matched (either shown literally, or as an offset), the behaviour is +different. In this situation, all the patterns are applied to the line. If +there is more than one match, the one that begins nearest to the start of the +subject is processed; if there is more than one match at that position, the one +with the longest matching substring is processed; if the matching substrings +are equal, the first match found is processed. +

+

+Scanning with all the patterns resumes immediately following the match, so that +later matches on the same line can be found. Note, however, that an overlapping +match that starts in the middle of another match will not be processed. +

+

+The above behaviour was changed at release 10.41 to be more compatible with GNU +grep. In earlier releases, pcre2grep did not recognize matches from +later patterns that were earlier in the subject. +

+

+Patterns that can match an empty string are accepted, but empty string +matches are never recognized. An example is the pattern "(super)?(man)?", in +which all components are optional. This pattern finds all occurrences of both +"super" and "man"; the output differs from matching with "super|man" when only +the matching substrings are being shown. +

+

+If the LC_ALL or LC_CTYPE environment variable is set, +pcre2grep uses the value to set a locale when calling the PCRE2 library. +The --locale option can be used to override this. +

+
SUPPORT FOR COMPRESSED FILES
+

+Compile-time options for pcre2grep can set it up to use libz or +libbz2 for reading compressed files whose names end in .gz or +.bz2, respectively. You can find out whether your pcre2grep binary +has support for one or both of these file types by running it with the +--help option. If the appropriate support is not present, all files are +treated as plain text. The standard input is always so treated. If a file with +a .gz or .bz2 extension is not in fact compressed, it is read as a +plain text file. When input is from a compressed .gz or .bz2 file, the +--line-buffered option is ignored. +

+
BINARY FILES
+

+By default, a file that contains a binary zero byte within the first 1024 bytes +is identified as a binary file, and is processed specially. However, if the +newline type is specified as NUL, that is, the line terminator is a binary +zero, the test for a binary file is not applied. See the --binary-files +option for a means of changing the way binary files are handled. +

+
BINARY ZEROS IN PATTERNS
+

+Patterns passed from the command line are strings that are terminated by a +binary zero, so cannot contain internal zeros. However, patterns that are read +from a file via the -f option may contain binary zeros. +

+
OPTIONS
+

+The order in which some of the options appear can affect the output. For +example, both the -H and -l options affect the printing of file +names. Whichever comes later in the command line will be the one that takes +effect. Similarly, except where noted below, if an option is given twice, the +later setting is used. Numerical values for options may be followed by K or M, +to signify multiplication by 1024 or 1024*1024 respectively. +

+

+-- +This terminates the list of options. It is useful if the next item on the +command line starts with a hyphen but is not an option. This allows for the +processing of patterns and file names that start with hyphens. +

+

+-A number, --after-context=number +Output up to number lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines (the -Z option can be used to change the file name +terminator to a zero byte). A line containing "--" is output between each group +of lines, unless they are in fact contiguous in the input file. The value of +number is expected to be relatively small. When -c is used, +-A is ignored. +

+

+-a, --text +Treat binary files as text. This is equivalent to +--binary-files=text. +

+

+--allow-lookaround-bsk +PCRE2 now forbids the use of \K in lookarounds by default, in line with Perl. +This option causes pcre2grep to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option, which enables this somewhat dangerous usage. +

+

+-B number, --before-context=number +Output up to number lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +number lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines (the -Z option can be used to +change the file name terminator to a zero byte). A line containing "--" is +output between each group of lines, unless they are in fact contiguous in the +input file. The value of number is expected to be relatively small. When +-c is used, -B is ignored. +

+

+--binary-files=word +Specify how binary files are to be processed. If the word is "binary" (the +default), pattern matching is performed on binary files, but the only output is +"Binary file <name> matches" when a match succeeds. If the word is "text", +which is equivalent to the -a or --text option, binary files are +processed in the same way as any other file. In this case, when a match +succeeds, the output may be binary garbage, which can have nasty effects if +sent to a terminal. If the word is "without-match", which is equivalent to the +-I option, binary files are not processed at all; they are assumed not to +be of interest and are skipped without causing any output or affecting the +return code. +

+

+--buffer-size=number +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +--max-buffer-size below. +

+

+-C number, --context=number +Output number lines of context both before and after each matching line. +This is equivalent to setting both -A and -B to the same value. +

+

+-c, --count +Do not output lines from the files that are being scanned; instead output the +number of lines that would have been shown, either because they matched, or, if +-v is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +-M (multiline) option is used (without -v), there may be more +suppressed lines than the count (that is, the number of matches). +
+
+If no lines are selected, the number zero is output. If several files are +being scanned, a count is output for each of them and the -t option can +be used to cause a total to be output at the end. However, if the +--files-with-matches option is also used, only those files whose counts +are greater than zero are listed. When -c is used, the -A, +-B, and -C options are ignored. +

+

+--colour, --color +If this option is given without any data, it is equivalent to "--colour=auto". +If data is required, it must be given in the same shell item, separated by an +equals sign. +

+

+--colour=value, --color=value +This option specifies under what circumstances the parts of a line that matched +a pattern should be coloured in the output. It is ignored if +--file-offsets, --line-offsets, or --output is set. By +default, output is not coloured. The value for the --colour option (which +is optional, see above) may be "never", "always", or "auto". In the latter +case, colouring happens only if the standard output is connected to a terminal. +More resources are used when colouring is enabled, because pcre2grep has +to search for all possible matches in a line, not just one, in order to colour +them all. +
+
+The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +pcre2grep looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +
+
+If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. +

+

+-D action, --devices=action +If an input path is not a regular file or a directory, "action" specifies how +it is to be processed. Valid values are "read" (the default) or "skip" +(silently skip the path). +

+

+-d action, --directories=action +If an input path is a directory, "action" specifies how it is to be processed. +Valid values are "read" (the default in non-Windows environments, for +compatibility with GNU grep), "recurse" (equivalent to the -r option), or +"skip" (silently skip the path, the default in Windows environments). In the +"read" case, directories are read as if they were ordinary files. In some +operating systems the effect of reading a directory like this is an immediate +end-of-file; in others it may provoke an error. +

+

+--depth-limit=number +See --match-limit below. +

+

+-E, --case-restrict +When case distinctions are being ignored in Unicode mode, two ASCII letters (K +and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F +(long S) respectively, as well as their lower case ASCII counterparts. When +this option is set, case equivalences are restricted such that no ASCII +character matches a non-ASCII character, and vice versa. +

+

+-e pattern, --regex=pattern, --regexp=pattern +Specify a pattern to be matched. This option can be used multiple times in +order to specify several patterns. It can also be used as a way of specifying a +single pattern that starts with a hyphen. When -e is used, no argument +pattern is taken from the command line; all arguments are treated as file +names. There is no limit to the number of patterns. They are applied to each +line in the order in which they are defined. +
+
+If -f is used with -e, the command line patterns are matched first, +followed by the patterns from the file(s), independent of the order in which +these options are specified. +

+

+--exclude=pattern +Files (but not directories) whose names match the pattern are skipped without +being processed. This applies to all files, whether listed on the command line, +obtained from --file-list, or by scanning a directory. The pattern is a +PCRE2 regular expression, and is matched against the final component of the +file name, not the entire path. The -F, -w, and -x options do +not apply to this pattern. The option may be given any number of times in order +to specify multiple patterns. If a file name matches both an --include +and an --exclude pattern, it is excluded. There is no short form for this +option. +

+

+--exclude-from=filename +Treat each non-empty line of the file as the data for an --exclude +option. What constitutes a newline when reading the file is the operating +system's default. The --newline option has no effect on this option. This +option may be given more than once in order to specify a number of files to +read. +

+

+--exclude-dir=pattern +Directories whose names match the pattern are skipped without being processed, +whatever the setting of the --recursive option. This applies to all +directories, whether listed on the command line, obtained from +--file-list, or by scanning a parent directory. The pattern is a PCRE2 +regular expression, and is matched against the final component of the directory +name, not the entire path. The -F, -w, and -x options do not +apply to this pattern. The option may be given any number of times in order to +specify more than one pattern. If a directory matches both --include-dir +and --exclude-dir, it is excluded. There is no short form for this +option. +

+

+-F, --fixed-strings +Interpret each data-matching pattern as a list of fixed strings, separated by +newlines, instead of as a regular expression. What constitutes a newline for +this purpose is controlled by the --newline option. The -w (match +as a word) and -x (match whole line) options can be used with -F. +They apply to each of the fixed strings. A line is selected if any of the fixed +strings are found in it (subject to -w or -x, if present). This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the --include or +--exclude options. +

+

+-f filename, --file=filename +Read patterns from the file, one per line. As is the case with patterns on the +command line, no delimiters should be used. What constitutes a newline when +reading the file is the operating system's default interpretation of \n. The +--newline option has no effect on this option. Trailing white space is +removed from each line, and blank lines are ignored. An empty file contains no +patterns and therefore matches nothing. Patterns read from a file in this way +may contain binary zeros, which are treated as ordinary data characters. +
+
+If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When -f is used, patterns +specified on the command line using -e may also be present; they are +matched before the file's patterns. However, no pattern is taken from the +command line; all arguments are treated as the names of paths to be searched. +

+

+--file-list=filename +Read a list of files and/or directories that are to be scanned from the given +file, one per line. What constitutes a newline when reading the file is the +operating system's default. Trailing white space is removed from each line, and +blank lines are ignored. These paths are processed before any that are listed +on the command line. The file name can be given as "-" to refer to the standard +input. If --file and --file-list are both specified as "-", +patterns are read first. This is useful only when the standard input is a +terminal, from which further lines (the list of files) can be read after an +end-of-file indication. If this option is given more than once, all the +specified files are read. +

+

+--file-offsets +Instead of showing lines or parts of lines that match, show each match as an +offset from the start of the file and a length, separated by a comma. In this +mode, --colour has no effect, and no context is shown. That is, the +-A, -B, and -C options are ignored. If there is more than one +match in a line, each of them is shown separately. This option is mutually +exclusive with --output, --line-offsets, and --only-matching. +

+

+--group-separator=text +Output this text string instead of two hyphens between groups of lines when +-A, -B, or -C is in use. See also --no-group-separator. +

+

+-H, --with-filename +Force the inclusion of the file name at the start of output lines when +searching a single file. The file name is not normally shown in this case. +By default, for matching lines, the file name is followed by a colon; for +context lines, a hyphen separator is used. The -Z option can be used to +change the terminator to a zero byte. If a line number is also being output, +it follows the file name. When the -M option causes a pattern to match +more than one line, only the first is preceded by the file name. This option +overrides any previous -h, -l, or -L options. +

+

+-h, --no-filename +Suppress the output file names when searching multiple files. File names are +normally shown when multiple files are searched. By default, for matching +lines, the file name is followed by a colon; for context lines, a hyphen +separator is used. The -Z option can be used to change the terminator to +a zero byte. If a line number is also being output, it follows the file name. +This option overrides any previous -H, -L, or -l options. +

+

+--heap-limit=number +See --match-limit below. +

+

+--help +Output a help message, giving brief details of the command options and file +type support, and then exit. Anything else on the command line is +ignored. +

+

+-I +Ignore binary files. This is equivalent to +--binary-files=without-match. +

+

+-i, --ignore-case +Ignore upper/lower case distinctions when pattern matching. This applies when +matching path names for inclusion or exclusion as well as when matching lines +in files. +

+

+--include=pattern +If any --include patterns are specified, the only files that are +processed are those whose names match one of the patterns and do not match an +--exclude pattern. This option does not affect directories, but it +applies to all files, whether listed on the command line, obtained from +--file-list, or by scanning a directory. The pattern is a PCRE2 regular +expression, and is matched against the final component of the file name, not +the entire path. The -F, -w, and -x options do not apply to +this pattern. The option may be given any number of times. If a file name +matches both an --include and an --exclude pattern, it is excluded. +There is no short form for this option. +

+

+--include-from=filename +Treat each non-empty line of the file as the data for an --include +option. What constitutes a newline for this purpose is the operating system's +default. The --newline option has no effect on this option. This option +may be given any number of times; all the files are read. +

+

+--include-dir=pattern +If any --include-dir patterns are specified, the only directories that +are processed are those whose names match one of the patterns and do not match +an --exclude-dir pattern. This applies to all directories, whether listed +on the command line, obtained from --file-list, or by scanning a parent +directory. The pattern is a PCRE2 regular expression, and is matched against +the final component of the directory name, not the entire path. The -F, +-w, and -x options do not apply to this pattern. The option may be +given any number of times. If a directory matches both --include-dir and +--exclude-dir, it is excluded. There is no short form for this option. +

+

+-L, --files-without-match +Instead of outputting lines from the files, just output the names of the files +that do not contain any lines that would have been output. Each file name is +output once, on a separate line by default, but if the -Z option is set, +they are separated by zero bytes instead of newlines. This option overrides any +previous -H, -h, or -l options. +

+

+-l, --files-with-matches +Instead of outputting lines from the files, just output the names of the files +containing lines that would have been output. Each file name is output once, on +a separate line, but if the -Z option is set, they are separated by zero +bytes instead of newlines. Searching normally stops as soon as a matching line +is found in a file. However, if the -c (count) option is also used, +matching continues in order to obtain the correct count, and those files that +have at least one match are listed along with their counts. Using this option +with -c is a way of suppressing the listing of files with no matches that +occurs with -c on its own. This option overrides any previous -H, +-h, or -L options. +

+

+--label=name +This option supplies a name to be used for the standard input when file names +are being output. If not supplied, "(standard input)" is used. There is no +short form for this option. +

+

+--line-buffered +When this option is given, non-compressed input is read and processed line by +line, and the output is flushed after each write. By default, input is read in +large chunks, unless pcre2grep can determine that it is reading from a +terminal, which is currently possible only in Unix-like environments or +Windows. Output to terminal is normally automatically flushed by the operating +system. This option can be useful when the input or output is attached to a +pipe and you do not want pcre2grep to buffer up large amounts of data. +However, its use will affect performance, and the -M (multiline) option +ceases to work. When input is from a compressed .gz or .bz2 file, +--line-buffered is ignored. +

+

+--line-offsets +Instead of showing lines or parts of lines that match, show each match as a +line number, the offset from the start of the line, and a length. The line +number is terminated by a colon (as usual; see the -n option), and the +offset and length are separated by a comma. In this mode, --colour has no +effect, and no context is shown. That is, the -A, -B, and -C +options are ignored. If there is more than one match in a line, each of them is +shown separately. This option is mutually exclusive with --output, +--file-offsets, and --only-matching. +

+

+--locale=locale-name +This option specifies a locale to be used for pattern matching. It overrides +the value in the LC_ALL or LC_CTYPE environment variables. If no +locale is specified, the PCRE2 library's default (usually the "C" locale) is +used. There is no short form for this option. +

+

+-M, --multiline +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode, and a match is allowed to continue past +the end of the initial line and onto one or more subsequent lines. +
+
+Patterns used with -M may usefully contain literal newline characters and +internal occurrences of ^ and $ characters, because in multiline mode these can +match at internal newlines. Because pcre2grep is scanning multiple lines, +the \Z and \z assertions match only at the end of the last line in the file. +The \A assertion matches at the start of the first line of a match. This can +be any line in the file; it is not anchored to the first line. +
+
+The output for a successful match may consist of more than one line. The first +line is the line in which the match started, and the last line is the line in +which the match ended. If the matched string ends with a newline sequence, the +output ends at the end of that line. If -v is set, none of the lines in a +multi-line match are output. Once a match has been handled, scanning restarts +at the beginning of the line after the one in which the match ended. +
+
+The newline sequence that separates multiple lines must be matched as part of +the pattern. For example, to find the phrase "regular expression" in a file +where "regular" might be at the end of a line and "expression" at the start of +the next line, you could use this command: +

+  pcre2grep -M 'regular\s+expression' <file>
+
+The \s escape sequence matches any white space character, including newlines, +and is followed by + so as to match trailing white space on the first line as +well as possibly handling a two-character newline sequence. +
+
+There is a limit to the number of lines that can be matched, imposed by the way +that pcre2grep buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem. +
+
+The -M option does not work when input is read line by line (see +--line-buffered.) +

+

+-m number, --max-count=number +Stop processing after finding number matching lines, or non-matching +lines if -v is also set. Any trailing context lines are output after the +final match. In multiline mode, each multiline match counts as just one line +for this purpose. If this limit is reached when reading the standard input from +a regular file, the file is left positioned just after the last matching line. +If -c is also set, the count that is output is never greater than +number. This option has no effect if used with -L, -l, or +-q, or when just checking for a match in a binary file. +

+

+--match-limit=number +Processing some regular expression patterns may take a very long time to search +for all possible matching strings. Others may require a very large amount of +memory. There are three options that set resource limits for matching. +
+
+The --match-limit option provides a means of limiting computing resource +usage when processing patterns that are not going to match, but which have a +very large number of possibilities in their search trees. The classic example +is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a +counter that is incremented each time around its main processing loop. If the +value set by --match-limit is reached, an error occurs. +
+
+The --heap-limit option specifies, as a number of kibibytes (units of +1024 bytes), the maximum amount of heap memory that may be used for matching. +
+
+The --depth-limit option limits the depth of nested backtracking points, +which indirectly limits the amount of memory that is used. The amount of memory +needed for each backtracking point depends on the number of capturing +parentheses in the pattern, so the amount of memory that is used before this +limit acts varies from pattern to pattern. This limit is of use only if it is +set smaller than --match-limit. +
+
+There are no short forms for these options. The default limits can be set +when the PCRE2 library is compiled; if they are not specified, the defaults +are very large and so effectively unlimited. +

+

+--max-buffer-size=number +This limits the expansion of the processing buffer, whose initial size can be +set by --buffer-size. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +

+

+-N newline-type, --newline=newline-type +Six different conventions for indicating the ends of lines in scanned files are +supported. For example: +

+  pcre2grep -N CRLF 'some pattern' <file>
+
+The newline type may be specified in upper, lower, or mixed case. If the +newline type is NUL, lines are separated by binary zero characters. The other +types are the single-character sequences CR (carriage return) and LF +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which +recognizes any of the preceding three types, and an "any" type, for which any +Unicode line ending sequence is assumed to end a line. The Unicode sequences +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +
+
+When the PCRE2 library is built, a default line-ending sequence is specified. +This is normally the standard sequence for the operating system. Unless +otherwise specified by this option, pcre2grep uses the library's default. +
+
+This option makes it possible to use pcre2grep to scan files that have +come from other environments without having to modify their line endings. If +the data that is being scanned does not agree with the convention set by this +option, pcre2grep may behave in strange ways. Note that this option does +not apply to files specified by the -f, --exclude-from, or +--include-from options, which are expected to use the operating system's +standard newline sequence. +

+

+-n, --line-number +Precede each output line by its line number in the file, followed by a colon +for matching lines or a hyphen for context lines. If the file name is also +being output, it precedes the line number. When the -M option causes a +pattern to match more than one line, only the first is preceded by its line +number. This option is forced if --line-offsets is used. +

+

+--no-group-separator +Do not output a separator between groups of lines when -A, -B, or +-C is in use. The default is to output a line containing two hyphens. See +also --group-separator. +

+

+--no-jit +If the PCRE2 library is built with support for just-in-time compiling (which +speeds up matching), pcre2grep automatically makes use of this, unless it +was explicitly disabled at build time. This option can be used to disable the +use of JIT at run time. It is provided for testing and working around problems. +It should never be needed in normal use. +

+

+-O text, --output=text +When there is a match, instead of outputting the line that matched, output just +the text specified in this option, followed by an operating-system standard +newline. In this mode, --colour has no effect, and no context is shown. +That is, the -A, -B, and -C options are ignored. The +--newline option has no effect on this option, which is mutually +exclusive with --only-matching, --file-offsets, and +--line-offsets. However, like --only-matching, if there is more +than one match in a line, each of them causes a line of output. +
+
+Escape sequences starting with a dollar character may be used to insert the +contents of the matched part of the line and/or captured substrings into the +text. +
+
+$<digits> or ${<digits>} is replaced by the captured substring of the given +decimal number; zero substitutes the whole match. If the number is greater than +the number of capturing substrings, or if the capture is unset, the replacement +is empty. +
+
+$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by +newline; $r by carriage return; $t by tab; $v by vertical tab. +
+
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the +given octal number. In the first form, up to three octal digits are processed. +When more digits are needed in Unicode mode to specify a wide character, the +second form must be used. +
+
+$x<digits> or $x{<digits>} is replaced by the character represented by the +given hexadecimal number. In the first form, up to two hexadecimal digits are +processed. When more digits are needed in Unicode mode to specify a wide +character, the second form must be used. +
+
+Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar. +

+

+-o, --only-matching +Show only the part of the line that matched a pattern instead of the whole +line. In this mode, no context is shown. That is, the -A, -B, and +-C options are ignored. If there is more than one match in a line, each +of them is shown separately, on a separate line of output. If -o is +combined with -v (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with --output, +--file-offsets and --line-offsets. +

+

+-onumber, --only-matching=number +Show only the part of the line that matched the capturing parentheses of the +given number. Up to 50 capturing parentheses are supported by default. This +limit can be changed via the --om-capture option. A pattern may contain +any number of capturing parentheses, but only those whose number is within the +limit can be accessed by -o. An error occurs if the number specified by +-o is greater than the limit. +
+
+-o0 is the same as -o without a number. Because these options can be +given without an argument (see above), if an argument is present, it must be +given in the same shell item, for example, -o3 or --only-matching=2. The +comments given for the non-argument case above also apply to this option. If +the specified capturing parentheses do not exist in the pattern, or were not +set in the match, nothing is output unless the file name or line number are +being output. +
+
+If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +but one option). +

+

+--om-capture=number +Set the number of capturing parentheses that can be accessed by -o. The +default is 50. +

+

+--om-separator=text +Specify a separating string for multiple occurrences of -o. The default +is an empty string. Separating strings are never coloured. +

+

+-P, --no-ucp +Starting from release 10.43, when UTF/Unicode mode is specified with -u +or -U, the PCRE2_UCP option is used by default. This means that the +POSIX classes in patterns match more than just ASCII characters. For example, +[:digit:] matches any Unicode decimal digit. The --no-ucp option +suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters, +as was the case in earlier releases. Note that there are now more fine-grained +option settings within patterns that affect individual classes. For example, +when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while +allowing \w to match Unicode letters and digits. +

+

+-q, --quiet +Work quietly, that is, display nothing except error messages. The exit +status indicates whether or not any matches were found. +

+

+-r, --recursive +If any given path is a directory, recursively scan the files it contains, +taking note of any --include and --exclude settings. By default, a +directory is read as a normal file; in some operating systems this gives an +immediate end-of-file. This option is a shorthand for setting the -d +option to "recurse". +

+

+--recursion-limit=number +This is an obsolete synonym for --depth-limit. See --match-limit +above for details. +

+

+-s, --no-messages +Suppress error messages about non-existent or unreadable files. Such files are +quietly skipped. However, the return code is still 2, even if matches were +found in other files. +

+

+-t, --total-count +This option is useful when scanning more than one file. If used on its own, +-t suppresses all output except for a grand total number of matching +lines (or non-matching lines if -v is used) in all the files. If -t +is used with -c, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The -t option is +ignored when used with -L (list files without matches), because the grand +total would always be zero. +

+

+-u, --utf +Operate in UTF/Unicode mode. This option is available only if PCRE2 has been +compiled with UTF-8 support. All patterns (including those for any +--exclude and --include options) and all lines that are scanned +must be valid strings of UTF-8 characters. If an invalid UTF-8 string is +encountered, an error occurs. +

+

+-U, --utf-allow-invalid +As --utf, but in addition subject lines may contain invalid UTF-8 code +unit sequences. These can never form part of any pattern match. Patterns +themselves, however, must still be valid UTF-8 strings. This facility allows +valid UTF-8 strings to be sought within arbitrary byte sequences in executable +or other binary files. For more details about matching in non-valid UTF-8 +strings, see the +pcre2unicode(3) +documentation. +

+

+-V, --version +Write the version numbers of pcre2grep and the PCRE2 library to the +standard output and then exit. Anything else on the command line is +ignored. +

+

+-v, --invert-match +Invert the sense of the match, so that lines which do not match any of +the patterns are the ones that are found. When this option is set, options such +as --only-matching and --output, which specify parts of a match +that are to be output, are ignored. +

+

+-w, --word-regex, --word-regexp +Force the patterns only to match "words". That is, there must be a word +boundary at the start and end of each matched string. This is equivalent to +having "\b(?:" at the start of each pattern, and ")\b" at the end. This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the --include or +--exclude options. +

+

+-x, --line-regex, --line-regexp +Force the patterns to start matching only at the beginnings of lines, and in +addition, require them to match entire lines. In multiline mode the match may +be more than one line. This is equivalent to having "^(?:" at the start of each +pattern and ")$" at the end. This option applies only to the patterns that are +matched against the contents of files; it does not apply to patterns specified +by any of the --include or --exclude options. +

+

+-Z, --null +Terminate files names in the regular output with a zero byte (the NUL +character) instead of what would normally appear. This is useful when file +names contain unusual characters such as colons, hyphens, or even newlines. The +option does not apply to file names in error messages. +

+
ENVIRONMENT VARIABLES
+

+The environment variables LC_ALL and LC_CTYPE are examined, in that +order, for a locale. The first one that is set is used. This can be overridden +by the --locale option. If no locale is set, the PCRE2 library's default +(usually the "C" locale) is used. +

+
NEWLINES
+

+The -N (--newline) option allows pcre2grep to scan files with +newline conventions that differ from the default. This option affects only the +way scanned files are processed. It does not affect the interpretation of files +specified by the -f, --file-list, --exclude-from, or +--include-from options. +

+

+Any parts of the scanned input files that are written to the standard output +are copied with whatever newline sequences they have in the input. However, if +the final line of a file is output, and it does not end with a newline +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a +single NL is used. +

+

+The newline setting does not affect the way in which pcre2grep writes +newlines in informational messages to the standard output and error streams. +Under Windows, the standard output is set to be binary, so that "\r\n" at the +ends of output lines that are copied from the input is not converted to +"\r\r\n" by the C I/O library. This means that any messages written to the +standard output must end with "\r\n". For all other operating systems, and +for all messages to the standard error stream, "\n" is used. +

+
OPTIONS COMPATIBILITY WITH GNU GREP
+

+Many of the short and long forms of pcre2grep's options are the same as +in the GNU grep program. Any long option of the form --xxx-regexp +(GNU terminology) is also available as --xxx-regex (PCRE2 terminology). +However, the --case-restrict, --depth-limit, -E, +--file-list, --file-offsets, --heap-limit, +--include-dir, --line-offsets, --locale, --match-limit, +-M, --multiline, -N, --newline, --no-ucp, +--om-separator, --output, -P, -u, --utf, +-U, and --utf-allow-invalid options are specific to +pcre2grep, as is the use of the --only-matching option with a +capturing parentheses number. +

+

+Although most of the common options work the same way, a few are different in +pcre2grep. For example, the --include option's argument is a glob +for GNU grep, but in pcre2grep it is a regular expression to which +the -i option applies. If both the -c and -l options are +given, GNU grep lists only file names, without counts, but pcre2grep +gives the counts as well. +

+
OPTIONS WITH DATA
+

+There are four different ways in which an option with data can be specified. +If a short form option is used, the data may follow immediately, or (with one +exception) in the next command line item. For example: +

+  -f/some/file
+  -f /some/file
+
+The exception is the -o option, which may appear with or without data. +Because of this, if data is present, it must follow immediately in the same +item, for example -o3. +

+

+If a long form option is used, the data may appear in the same command line +item, separated by an equals character, or (with two exceptions) it may appear +in the next command line item. For example: +

+  --file=/some/file
+  --file /some/file
+
+Note, however, that if you want to supply a file name beginning with ~ as data +in a shell command, and have the shell expand ~ to a home directory, you must +separate the file name from the option, because the shell does not treat ~ +specially unless it is at the start of an item. +

+

+The exceptions to the above are the --colour (or --color) and +--only-matching options, for which the data is optional. If one of these +options does have data, it must be given in the first form, using an equals +character. Otherwise pcre2grep will assume that it has no data. +

+
USING PCRE2'S CALLOUT FACILITY
+

+pcre2grep has, by default, support for calling external programs or +scripts or echoing specific strings during matching by making use of PCRE2's +callout facility. However, this support can be completely or partially disabled +when pcre2grep is built. You can find out whether your binary has support +for callouts by running it with the --help option. If callout support is +completely disabled, all callouts in patterns are ignored by pcre2grep. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored. +

+

+A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is +either a number or a quoted string (see the +pcre2callout +documentation for details). Numbered callouts are ignored by pcre2grep; +only callouts with string arguments are useful. +

+
+Echoing a specific string +
+

+Starting the callout string with a pipe character invokes an echoing facility +that avoids calling an external program or script. This facility is always +available, provided that callouts were not completely disabled when +pcre2grep was built. The rest of the callout string is processed as a +zero-terminated string, which means it should not contain any internal binary +zeros. It is written to the output, having first been passed through the same +escape processing as text from the --output (-O) option (see +above). However, $0 cannot be used to insert a matched substring because the +match is still in progress. Instead, the single character '0' is inserted. Any +syntax errors in the string (for example, a dollar not followed by another +character) causes the callout to be ignored. No terminator is added to the +output string, so if you want a newline, you must include it explicitly using +the escape $n. For example: +

+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+
+Matching continues normally after the string is output. If you want to see only +the callout output but not any output from an actual match, you should end the +pattern with (*FAIL). +

+
+Calling external programs or scripts +
+

+This facility can be independently disabled when pcre2grep is built. It +is supported for Windows, where a call to _spawnvp() is used, for VMS, +where lib$spawn() is used, and for any Unix-like environment where +fork() and execv() are available. +

+

+If the callout string does not start with a pipe (vertical bar) character, it +is parsed into a list of substrings separated by pipe characters. The first +substring must be an executable name, with the following substrings specifying +arguments: +

+  executable_name|arg1|arg2|...
+
+Any substring (including the executable name) may contain escape sequences +started by a dollar character. These are the same as for the --output +(-O) option documented above, except that $0 cannot insert the matched +string because the match is still in progress. Instead, the character '0' +is inserted. If you need a literal dollar or pipe character in any +substring, use $$ or $| respectively. Here is an example: +
+  echo -e "abcde\n12345" | pcre2grep \
+    '(?x)(.)(..(.))
+    (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
+
+  Output:
+
+    Arg1: [a] [bcd] [d] Arg2: |a| ()
+    abcde
+    Arg1: [1] [234] [4] Arg2: |1| ()
+    12345
+
+The parameters for the system call that is used to run the program or script +are zero-terminated strings. This means that binary zero characters in the +callout argument will cause premature termination of their substrings, and +therefore should not be present. Any syntax errors in the string (for example, +a dollar not followed by another character) causes the callout to be ignored. +If running the program fails for any reason (including the non-existence of the +executable), a local matching failure occurs and the matcher backtracks in the +normal way. +

+
MATCHING ERRORS
+

+It is possible to supply a regular expression that takes a very long time to +fail to match certain lines. Such patterns normally involve nested indefinite +repeats, for example: (a+)*\d when matched against a line of a's with no final +digit. The PCRE2 matching function has a resource limit that causes it to abort +in these circumstances. If this happens, pcre2grep outputs an error +message and the line that caused the problem to the standard error stream. If +there are more than 20 such errors, pcre2grep gives up. +

+

+The --match-limit option of pcre2grep can be used to set the +overall resource limit. There are also other limits that affect the amount of +memory used during matching; see the discussion of --heap-limit and +--depth-limit above. +

+
DIAGNOSTICS
+

+Exit status is 0 if any matches were found, 1 if no matches were found, and 2 +for syntax errors, overlong lines, non-existent or inaccessible files (even if +matches were found in other files) or too many matching errors. Using the +-s option to suppress error messages about inaccessible files does not +affect the return code. +

+

+When run under VMS, the return code is placed in the symbol PCRE2GREP_RC +because VMS does not distinguish between exit(0) and exit(1). +

+
SEE ALSO
+

+pcre2pattern(3), pcre2syntax(3), pcre2callout(3), +pcre2unicode(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 22 December 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html new file mode 100644 index 0000000..d97d800 --- /dev/null +++ b/doc/html/pcre2jit.html @@ -0,0 +1,496 @@ + + +pcre2jit specification + + +

pcre2jit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 JUST-IN-TIME COMPILER SUPPORT
+

+Just-in-time compiling is a heavyweight optimization that can greatly speed up +pattern matching. However, it comes at the cost of extra processing before the +match is performed, so it is of most benefit when the same pattern is going to +be matched many times. This does not necessarily mean many calls of a matching +function; if the pattern is not anchored, matching attempts may take place many +times at various positions in the subject, even for a single call. Therefore, +if the subject string is very long, it may still pay to use JIT even for +one-off matches. JIT support is available for all of the 8-bit, 16-bit and +32-bit PCRE2 libraries. +

+

+JIT support applies only to the traditional Perl-compatible matching function. +It does not apply when the DFA matching function is being used. The code for +JIT support was written by Zoltan Herczeg. +

+
AVAILABILITY OF JIT SUPPORT
+

+JIT support is an optional feature of PCRE2. The "configure" option +--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if +you want to use JIT. The support is limited to the following hardware +platforms: +

+  ARM 32-bit (v7, and Thumb2)
+  ARM 64-bit
+  IBM s390x 64 bit
+  Intel x86 32-bit and 64-bit
+  LoongArch 64 bit
+  MIPS 32-bit and 64-bit
+  Power PC 32-bit and 64-bit
+  RISC-V 32-bit and 64-bit
+
+If --enable-jit is set on an unsupported platform, compilation fails. +

+

+A client program can tell if JIT support is available by calling +pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if +PCRE2 was built with JIT support, and zero otherwise. However, having the JIT +code available does not guarantee that it will be used for any particular +match. One reason for this is that there are a number of options and pattern +items that are +not supported by JIT +(see below). Another reason is that in some environments JIT is unable to get +memory in which to build its compiled code. The only guarantee from +pcre2_config() is that if it returns zero, JIT will definitely not +be used. +

+

+A simple program does not need to check availability in order to use JIT when +possible. The API is implemented in a way that falls back to the interpretive +code if JIT is not available or cannot be used for a given match. For programs +that need the best possible performance, there is a +"fast path" +API that is JIT-specific. +

+
SIMPLE USE OF JIT
+

+To make use of the JIT support in the simplest way, all you have to do is to +call pcre2_jit_compile() after successfully compiling a pattern with +pcre2_compile(). This function has two arguments: the first is the +compiled pattern pointer that was returned by pcre2_compile(), and the +second is zero or more of the following option bits: PCRE2_JIT_COMPLETE, +PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. +

+

+If JIT support is not available, a call to pcre2_jit_compile() does +nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern +is passed to the JIT compiler, which turns it into machine code that executes +much faster than the normal interpretive code, but yields exactly the same +results. The returned value from pcre2_jit_compile() is zero on success, +or a negative error code. +

+

+There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to pcre2_jit_compile() returns +PCRE2_ERROR_NOMEMORY. +

+

+PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete +matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or +PCRE2_PARTIAL_SOFT options of pcre2_match(), you should set one or both +of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT +compiler generates different optimized code for each of the three modes +(normal, soft partial, hard partial). When pcre2_match() is called, the +appropriate code is run if it is available. Otherwise, the pattern is matched +using interpretive code. +

+

+You can call pcre2_jit_compile() multiple times for the same compiled +pattern. It does nothing if it has previously compiled code for any of the +option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and +(perhaps later, when you find you need partial matching) again with +PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore +PCRE2_JIT_COMPLETE and just compile code for partial matching. If +pcre2_jit_compile() is called with no option bits set, it immediately +returns zero. This is an alternative way of testing whether JIT is available. +

+

+At present, it is not possible to free JIT compiled code except when the entire +compiled pattern is freed by calling pcre2_code_free(). +

+

+In some circumstances you may need to call additional functions. These are +described in the section entitled +"Controlling the JIT stack" +below. +

+

+There are some pcre2_match() options that are not supported by JIT, and +there are also some pattern items that JIT cannot handle. Details are given +below. +In both cases, matching automatically falls back to the interpretive code. If +you want to know whether JIT was actually used for a particular match, you +should arrange for a JIT callback function to be set up as described in the +section entitled +"Controlling the JIT stack" +below, even if you do not need to supply a non-default JIT stack. Such a +callback function is called whenever JIT code is about to be obeyed. If the +match-time options are not right for JIT execution, the callback function is +not obeyed. +

+

+If the JIT compiler finds an unsupported item, no JIT data is generated. You +can find out if JIT compilation was successful for a compiled pattern by +calling pcre2_pattern_info() with the PCRE2_INFO_JITSIZE option. A +non-zero result means that JIT compilation was successful. A result of 0 means +that JIT support is not available, or the pattern was not processed by +pcre2_jit_compile(), or the JIT compiler was not able to handle the +pattern. Successful JIT compilation does not, however, guarantee the use of JIT +at match time because there are some match time options that are not supported +by JIT. +

+
MATCHING SUBJECTS CONTAINING INVALID UTF
+

+When a pattern is compiled with the PCRE2_UTF option, subject strings are +normally expected to be a valid sequence of UTF code units. By default, this is +checked at the start of matching and an error is generated if invalid UTF is +detected. The PCRE2_NO_UTF_CHECK option can be passed to pcre2_match() to +skip the check (for improved performance) if you are sure that a subject string +is valid. If this option is used with an invalid string, the result is +undefined. The calling program may crash or loop or otherwise misbehave. +

+

+However, a way of running matches on strings that may contain invalid UTF +sequences is available. Calling pcre2_compile() with the +PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in +pcre2_match() to support invalid UTF, and, if pcre2_jit_compile() +is subsequently called, the compiled JIT code also supports invalid UTF. +Details of how this support works, in both the JIT and the interpretive cases, +is given in the +pcre2unicode +documentation. +

+

+There is also an obsolete option for pcre2_jit_compile() called +PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility. +It is superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF +and should no longer be used. It may be removed in future. +

+
UNSUPPORTED OPTIONS AND PATTERN ITEMS
+

+The pcre2_match() options that are supported for JIT matching are +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and +PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not +supported at match time. +

+

+If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the +use of JIT, forcing matching by the interpreter code. +

+

+The only unsupported pattern items are \C (match a single data unit) when +running in a UTF mode, and a callout immediately before an assertion condition +in a conditional group. +

+
RETURN VALUES FROM JIT MATCHING
+

+When a pattern is matched using JIT, the return values are the same as those +given by the interpretive pcre2_match() code, with the addition of one +new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for +the JIT stack was insufficient. See +"Controlling the JIT stack" +below for a discussion of JIT stack usage. +

+

+The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching +a very large pattern tree goes on for too long, as it is in the same +circumstance when JIT is not used, but the details of exactly what is counted +are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned +when JIT matching is used. +

+
CONTROLLING THE JIT STACK
+

+When the compiled JIT code runs, it needs a block of memory to use as a stack. +By default, it uses 32KiB on the machine stack. However, some large or +complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT +is given when there is not enough stack. Three functions are provided for +managing blocks of memory for use as JIT stacks. There is further discussion +about the use of JIT stacks in the section entitled +"JIT stack FAQ" +below. +

+

+The pcre2_jit_stack_create() function creates a JIT stack. Its arguments +are a starting size, a maximum size, and a general context (for memory +allocation functions, or NULL for standard memory allocation). It returns a +pointer to an opaque structure of type pcre2_jit_stack, or NULL if there +is an error. The pcre2_jit_stack_free() function is used to free a stack +that is no longer needed. If its argument is NULL, this function returns +immediately, without doing anything. (For the technically minded: the address +space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to +1MiB should be more than enough for any pattern. +

+

+The pcre2_jit_stack_assign() function specifies which stack JIT code +should use. Its arguments are as follows: +

+  pcre2_match_context  *mcontext
+  pcre2_jit_callback    callback
+  void                 *data
+
+The first argument is a pointer to a match context. When this is subsequently +passed to a matching function, its information determines which JIT stack is +used. If this argument is NULL, the function returns immediately, without doing +anything. There are three cases for the values of the other two options: +
+  (1) If callback is NULL and data is NULL, an internal 32KiB block
+      on the machine stack is used. This is the default when a match
+      context is created.
+
+  (2) If callback is NULL and data is not NULL, data must be
+      a pointer to a valid JIT stack, the result of calling
+      pcre2_jit_stack_create().
+
+  (3) If callback is not NULL, it must point to a function that is
+      called with data as an argument at the start of matching, in
+      order to set up a JIT stack. If the return from the callback
+      function is NULL, the internal 32KiB stack is used; otherwise the
+      return value must be a valid JIT stack, the result of calling
+      pcre2_jit_stack_create().
+
+A callback function is obeyed whenever JIT code is about to be run; it is not +obeyed when pcre2_match() is called with options that are incompatible +for JIT matching. A callback function can therefore be used to determine +whether a match operation was executed by JIT or by the interpreter. +

+

+You may safely use the same JIT stack for more than one pattern (either by +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +

+

+In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. +

+

+Strictly speaking, even more is allowed. You can assign the same non-NULL stack +to a match context that is used by any number of patterns, as long as they are +not used for matching by multiple threads at the same time. For example, you +could use the same stack in all compiled patterns, with a global mutex in the +callback to wait until the stack is available for use. However, this is an +inefficient solution, and not recommended. +

+

+This is a suggestion for how a multithreaded program that needs to set up +non-default JIT stacks might operate: +

+  During thread initialization
+    thread_local_var = pcre2_jit_stack_create(...)
+
+  During thread exit
+    pcre2_jit_stack_free(thread_local_var)
+
+  Use a one-line callback function
+    return thread_local_var
+
+All the functions described in this section do nothing if JIT is not available. +

+
JIT STACK FAQ
+

+(1) Why do we need JIT stacks? +
+
+PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where +the local data of the current node is pushed before checking its child nodes. +Allocating real machine stack on some platforms is difficult. For example, the +stack chain needs to be updated every time if we extend the stack on PowerPC. +Although it is possible, its updating time overhead decreases performance. So +we do the recursion in memory. +

+

+(2) Why don't we simply allocate blocks of memory with malloc()? +
+
+Modern operating systems have a nice feature: they can reserve an address space +instead of allocating memory. We can safely allocate memory pages inside this +address space, so the stack could grow without moving memory data (this is +important because of pointers). Thus we can allocate 1MiB address space, and +use only a single memory page (usually 4KiB) if that is enough. However, we can +still grow up to 1MiB anytime if needed. +

+

+(3) Who "owns" a JIT stack? +
+
+The owner of the stack is the user program, not the JIT studied pattern or +anything else. The user program must ensure that if a stack is being used by +pcre2_match(), (that is, it is assigned to a match context that is passed +to the pattern currently running), that stack must not be used by any other +threads (to avoid overwriting the same memory area). The best practice for +multithreaded programs is to allocate a stack for each thread, and return this +stack through the JIT callback function. +

+

+(4) When should a JIT stack be freed? +
+
+You can free a JIT stack at any time, as long as it will not be used by +pcre2_match() again. When you assign the stack to a match context, only a +pointer is set. There is no reference counting or any other magic. You can free +compiled patterns, contexts, and stacks in any order, anytime. +Just do not call pcre2_match() with a match context pointing to an +already freed stack, as that will cause SEGFAULT. (Also, do not free a stack +currently used by pcre2_match() in another thread). You can also replace +the stack in a context at any time when it is not in use. You should free the +previous stack before assigning a replacement. +

+

+(5) Should I allocate/free a stack every time before/after calling +pcre2_match()? +
+
+No, because this is too costly in terms of resources. However, you could +implement some clever idea which release the stack if it is not used in let's +say two minutes. The JIT callback can help to achieve this without keeping a +list of patterns. +

+

+(6) OK, the stack is for long term memory allocation. But what happens if a +pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the +stack is freed? +
+
+Especially on embedded systems, it might be a good idea to release memory +sometimes without freeing the stack. There is no API for this at the moment. +Probably a function call which returns with the currently allocated memory for +any stack and another which allows releasing memory (shrinking the stack) would +be a good idea if someone needs this. +

+

+(7) This is too much of a headache. Isn't there any better solution for JIT +stack handling? +
+
+No, thanks to Windows. If POSIX threads were used everywhere, we could throw +out this complicated API. +

+
FREEING JIT SPECULATIVE MEMORY
+

+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +

+

+The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve +allocation speed. However, in low memory conditions, it might be better to free +all possible memory. You can cause this to happen by calling +pcre2_jit_free_unused_memory(). Its argument is a general context, for custom +memory management, or NULL for standard memory management. +

+
EXAMPLE CODE
+

+This is a single-threaded example that specifies a JIT stack without using a +callback. A real program should include error checking after all the function +calls. +

+  int rc;
+  pcre2_code *re;
+  pcre2_match_data *match_data;
+  pcre2_match_context *mcontext;
+  pcre2_jit_stack *jit_stack;
+
+  re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
+    &errornumber, &erroffset, NULL);
+  rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
+  mcontext = pcre2_match_context_create(NULL);
+  jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL);
+  pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
+  match_data = pcre2_match_data_create(re, 10);
+  rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
+  /* Process result */
+
+  pcre2_code_free(re);
+  pcre2_match_data_free(match_data);
+  pcre2_match_context_free(mcontext);
+  pcre2_jit_stack_free(jit_stack);
+
+
+

+
JIT FAST PATH API
+

+Because the API described above falls back to interpreted matching when JIT is +not available, it is convenient for programs that are written for general use +in many environments. However, calling JIT via pcre2_match() does have a +performance impact. Programs that are written for use where JIT is known to be +available, and which need the best possible performance, can instead use a +"fast path" API to call JIT matching directly instead of calling +pcre2_match() (obviously only for patterns that have been successfully +processed by pcre2_jit_compile()). +

+

+The fast path function is called pcre2_jit_match(), and it takes exactly +the same arguments as pcre2_match(). However, the subject string must be +specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported +option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as +is the PCRE2_NO_JIT option. The return values are also the same as for +pcre2_match(), plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial +or complete) is requested that was not compiled. +

+

+When you call pcre2_match(), as well as testing for invalid options, a +number of other sanity checks are performed on the arguments. For example, if +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not +set for pcre2_compile(), the result is undefined. The program may crash +or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should call pcre2_jit_match() in UTF mode only if you are sure the +subject is valid. +

+

+Bypassing the sanity checks and the pcre2_match() wrapping can give +speedups of more than 10%. +

+
SEE ALSO
+

+pcre2api(3), pcre2unicode(3) +

+
AUTHOR
+

+Philip Hazel (FAQ by Zoltan Herczeg) +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 21 February 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2limits.html b/doc/html/pcre2limits.html new file mode 100644 index 0000000..8152ed2 --- /dev/null +++ b/doc/html/pcre2limits.html @@ -0,0 +1,105 @@ + + +pcre2limits specification + + +

pcre2limits man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SIZE AND OTHER LIMITATIONS +
+

+There are some size limitations in PCRE2 but it is hoped that they will never +in practice be relevant. +

+

+The maximum size of a compiled pattern is approximately 64 thousand code units +for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default +internal linkage size, which is 2 bytes for these libraries. If you want to +process regular expressions that are truly enormous, you can compile PCRE2 with +an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is +rounded up to 4). See the README file in the source distribution and the +pcre2build +documentation for details. In these cases the limit is substantially larger. +However, the speed of execution is slower. In the 32-bit library, the internal +linkage size is always 4. +

+

+The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls pcre2_compile() can specify a smaller limit. +

+

+The maximum length (in code units) of a subject string is one less than the +largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned +integer type, usually defined as size_t. Its maximum value (that is +~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated strings +and unset offsets. +

+

+All values in repeating quantifiers must be less than 65536. +

+

+There are two different limits that apply to branches of lookbehind assertions. +If every branch in such an assertion matches a fixed number of characters, +the maximum length of any branch is 65535 characters. If any branch matches a +variable number of characters, then the maximum matching length for every +branch is limited. The default limit is set at compile time, defaulting to 255, +but can be changed by the calling program. +

+

+There is no limit to the number of parenthesized groups, but there can be no +more than 65535 capture groups, and there is a limit to the depth of nesting of +parenthesized subpatterns of all kinds. This is imposed in order to limit the +amount of system stack used at compile time. The default limit can be specified +when PCRE2 is built; if not, the default is set to 250. An application can +change this limit by calling pcre2_set_parens_nest_limit() to set the limit in +a compile context. +

+

+The maximum length of name for a named capture group is 32 code units, and the +maximum number of such groups is 10000. +

+

+The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +

+

+The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold. +

+

+The maximum amount of heap memory used for matching is controlled by the heap +limit, which can be set in a pattern or in a match context. The default is a +very large number, effectively unlimited. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: August 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2matching.html b/doc/html/pcre2matching.html new file mode 100644 index 0000000..3b8b629 --- /dev/null +++ b/doc/html/pcre2matching.html @@ -0,0 +1,253 @@ + + +pcre2matching specification + + +

pcre2matching man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 MATCHING ALGORITHMS
+

+This document describes the two different algorithms that are available in +PCRE2 for matching a compiled regular expression against a given subject +string. The "standard" algorithm is the one provided by the pcre2_match() +function. This works in the same as Perl's matching function, and provide a +Perl-compatible matching operation. The just-in-time (JIT) optimization that is +described in the +pcre2jit +documentation is compatible with this function. +

+

+An alternative algorithm is provided by the pcre2_dfa_match() function; +it operates in a different way, and is not Perl-compatible. This alternative +has advantages and disadvantages compared with the standard algorithm, and +these are described below. +

+

+When there is only one possible way in which a given subject string can match a +pattern, the two algorithms give the same answer. A difference arises, however, +when there are multiple possibilities. For example, if the pattern +

+  ^<.*>
+
+is matched against the string +
+  <something> <something else> <something further>
+
+there are three possible answers. The standard algorithm finds only one of +them, whereas the alternative algorithm finds all three. +

+
REGULAR EXPRESSIONS AS TREES
+

+The set of strings that are matched by a regular expression can be represented +as a tree structure. An unlimited repetition in the pattern makes the tree of +infinite size, but it is still a tree. Matching the pattern to a given subject +string (from a given starting point) can be thought of as a search of the tree. +There are two ways to search a tree: depth-first and breadth-first, and these +correspond to the two matching algorithms provided by PCRE2. +

+
THE STANDARD MATCHING ALGORITHM
+

+In the terminology of Jeffrey Friedl's book "Mastering Regular Expressions", +the standard algorithm is an "NFA algorithm". It conducts a depth-first search +of the pattern tree. That is, it proceeds along a single path through the tree, +checking that the subject matches what is required. When there is a mismatch, +the algorithm tries any alternatives at the current point, and if they all +fail, it backs up to the previous branch point in the tree, and tries the next +alternative branch at that level. This often involves backing up (moving to the +left) in the subject string as well. The order in which repetition branches are +tried is controlled by the greedy or ungreedy nature of the quantifier. +

+

+If a leaf node is reached, a matching string has been found, and at that point +the algorithm stops. Thus, if there is more than one possible match, this +algorithm returns the first one that it finds. Whether this is the shortest, +the longest, or some intermediate length depends on the way the alternations +and the greedy or ungreedy repetition quantifiers are specified in the +pattern. +

+

+Because it ends up with a single path through the tree, it is relatively +straightforward for this algorithm to keep track of the substrings that are +matched by portions of the pattern in parentheses. This provides support for +capturing parentheses and backreferences. +

+
THE ALTERNATIVE MATCHING ALGORITHM
+

+This algorithm conducts a breadth-first search of the tree. Starting from the +first matching point in the subject, it scans the subject string from left to +right, once, character by character, and as it does this, it remembers all the +paths through the tree that represent valid matches. In Friedl's terminology, +this is a kind of "DFA algorithm", though it is not implemented as a +traditional finite state machine (it keeps multiple states active +simultaneously). +

+

+Although the general principle of this matching algorithm is that it scans the +subject string only once, without backtracking, there is one exception: when a +lookaround assertion is encountered, the characters following or preceding the +current point have to be independently inspected. +

+

+The scan continues until either the end of the subject is reached, or there are +no more unterminated paths. At this point, terminated paths represent the +different matching possibilities (if there are none, the match has failed). +Thus, if there is more than one possible match, this algorithm finds all of +them, and in particular, it finds the longest. The matches are returned in +the output vector in decreasing order of length. There is an option to stop the +algorithm after the first match (which is necessarily the shortest) is found. +

+

+Note that the size of vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using pcre2_match_data_create_from_pattern() to create the match +data block is therefore not advisable when doing DFA matching. +

+

+Note also that all the matches that are found start at the same point in the +subject. If the pattern +

+  cat(er(pillar)?)?
+
+is matched against the string "the caterpillar catchment", the result is the +three strings "caterpillar", "cater", and "cat" that start at the fifth +character of the subject. The algorithm does not automatically move on to find +matches that start at later positions. +

+

+PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\d+" is compiled as if it were "a\d++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compiling. +

+

+There are a number of features of PCRE2 regular expressions that are not +supported or behave differently in the alternative matching function. Those +that are not supported cause an error if encountered. +

+

+1. Because the algorithm finds all possible matches, the greedy or ungreedy +nature of repetition quantifiers is not relevant (though it may affect +auto-possessification, as just described). During matching, greedy and ungreedy +quantifiers are treated in exactly the same way. However, possessive +quantifiers can make a difference when what follows could also match what is +quantified, for example in a pattern like this: +

+  ^a++\w!
+
+This pattern matches "aaab!" but not "aaa!", which would be matched by a +non-possessive quantifier. Similarly, if an atomic group is present, it is +matched as if it were a standalone pattern at the current point, and the +longest match is then "locked in" for the rest of the overall pattern. +

+

+2. When dealing with multiple paths through the tree simultaneously, it is not +straightforward to keep track of captured substrings for the different matching +possibilities, and PCRE2's implementation of this algorithm does not attempt to +do this. This means that no captured substrings are available. +

+

+3. Because no substrings are captured, backreferences within the pattern are +not supported. +

+

+4. For the same reason, conditional expressions that use a backreference as the +condition or test for a specific group recursion are not supported. +

+

+5. Again for the same reason, script runs are not supported. +

+

+6. Because many paths through the tree may be active, the \K escape sequence, +which resets the start of the match when encountered (but may be on some paths +and not on others), is not supported. +

+

+7. Callouts are supported, but the value of the capture_top field is +always 1, and the value of the capture_last field is always 0. +

+

+8. The \C escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in these modes, because +the alternative algorithm moves through the subject string one character (not +code unit) at a time, for all active paths through the tree. +

+

+9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +supported. (*FAIL) is supported, and behaves like a failing negative assertion. +

+

+10. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not +supported by pcre2_dfa_match(). +

+
ADVANTAGES OF THE ALTERNATIVE ALGORITHM
+

+The main advantage of the alternative algorithm is that all possible matches +(at a single point in the subject) are automatically found, and in particular, +the longest match is found. To find more than one match at the same point using +the standard algorithm, you have to do kludgy things with callouts. +

+

+Partial matching is possible with this algorithm, though it has some +limitations. The +pcre2partial +documentation gives details of partial matching and discusses multi-segment +matching. +

+
DISADVANTAGES OF THE ALTERNATIVE ALGORITHM
+

+The alternative algorithm suffers from a number of disadvantages: +

+

+1. It is substantially slower than the standard algorithm. This is partly +because it has to search for all possible matches, but is also because it is +less susceptible to optimization. +

+

+2. Capturing parentheses, backreferences, script runs, and matching within +invalid UTF string are not supported. +

+

+3. Although atomic groups are supported, their use does not provide the +performance advantage that it does for the standard algorithm. +

+

+4. JIT optimization is not supported. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 19 January 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2partial.html b/doc/html/pcre2partial.html new file mode 100644 index 0000000..64116c4 --- /dev/null +++ b/doc/html/pcre2partial.html @@ -0,0 +1,408 @@ + + +pcre2partial specification + + +

pcre2partial man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PARTIAL MATCHING IN PCRE2
+

+In normal use of PCRE2, if there is a match up to the end of a subject string, +but more characters are needed to match the entire pattern, PCRE2_ERROR_NOMATCH +is returned, just like any other failing match. There are circumstances where +it might be helpful to distinguish this "partial match" case. +

+

+One example is an application where the subject string is very long, and not +all available at once. The requirement here is to be able to do the matching +segment by segment, but special action is needed when a matched substring spans +the boundary between two segments. +

+

+Another example is checking a user input string as it is typed, to ensure that +it conforms to a required format. Invalid characters can be immediately +diagnosed and rejected, giving instant feedback. +

+

+Partial matching is a PCRE2-specific feature; it is not Perl-compatible. It is +requested by setting one of the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT +options when calling a matching function. The difference between the two +options is whether or not a partial match is preferred to an alternative +complete match, though the details differ between the two types of matching +function. If both options are set, PCRE2_PARTIAL_HARD takes precedence. +

+

+If you want to use partial matching with just-in-time optimized code, as well +as setting a partial match option for the matching function, you must also call +pcre2_jit_compile() with one or both of these options: +

+  PCRE2_JIT_PARTIAL_HARD
+  PCRE2_JIT_PARTIAL_SOFT
+
+PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial +matches on the same pattern. Separate code is compiled for each mode. If the +appropriate JIT mode has not been compiled, interpretive matching code is used. +

+

+Setting a partial matching option disables two of PCRE2's standard +optimization hints. PCRE2 remembers the last literal code unit in a pattern, +and abandons matching immediately if it is not present in the subject string. +This optimization cannot be used for a subject string that might match only +partially. PCRE2 also remembers a minimum length of a matching string, and does +not bother to run the matching function on shorter strings. This optimization +is also disabled for partial matching. +

+
REQUIREMENTS FOR A PARTIAL MATCH
+

+A possible partial match occurs during matching when the end of the subject +string is reached successfully, but either more characters are needed to +complete the match, or the addition of more characters might change what is +matched. +

+

+Example 1: if the pattern is /abc/ and the subject is "ab", more characters are +definitely needed to complete a match. In this case both hard and soft matching +options yield a partial match. +

+

+Example 2: if the pattern is /ab+/ and the subject is "ab", a complete match +can be found, but the addition of more characters might change what is +matched. In this case, only PCRE2_PARTIAL_HARD returns a partial match; +PCRE2_PARTIAL_SOFT returns the complete match. +

+

+On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if the next +pattern item is \z, \Z, \b, \B, or $ there is always a partial match. +Otherwise, for both options, the next pattern item must be one that inspects a +character, and at least one of the following must be true: +

+

+(1) At least one character has already been inspected. An inspected character +need not form part of the final matched string; lookbehind assertions and the +\K escape sequence provide ways of inspecting characters before the start of a +matched string. +

+

+(2) The pattern contains one or more lookbehind assertions. This condition +exists in case there is a lookbehind that inspects characters before the start +of the match. +

+

+(3) There is a special case when the whole pattern can match an empty string. +When the starting point is at the end of the subject, the empty string match is +a possibility, and if PCRE2_PARTIAL_SOFT is set and neither of the above +conditions is true, it is returned. However, because adding more characters +might result in a non-empty match, PCRE2_PARTIAL_HARD returns a partial match, +which in this case means "there is going to be a match at this point, but until +some more characters are added, we do not know if it will be an empty string or +something longer". +

+
PARTIAL MATCHING USING pcre2_match()
+

+When a partial matching option is set, the result of calling +pcre2_match() can be one of the following: +

+

+A successful match +A complete match has been found, starting and ending within this subject. +

+

+PCRE2_ERROR_NOMATCH +No match can start anywhere in this subject. +

+

+PCRE2_ERROR_PARTIAL +Adding more characters may result in a complete match that uses one or more +characters from the end of this subject. +

+

+When a partial match is returned, the first two elements in the ovector point +to the portion of the subject that was matched, but the values in the rest of +the ovector are undefined. The appearance of \K in the pattern has no effect +for a partial match. Consider this pattern: +

+  /abc\K123/
+
+If it is matched against "456abc123xyz" the result is a complete match, and the +ovector defines the matched string as "123", because \K resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all +these characters are needed for a subsequent re-match with additional +characters. +

+

+If there is more than one partial match, the first one that was found provides +the data that is returned. Consider this pattern: +

+  /123\w+X|dogY/
+
+If this is matched against the subject string "abc123dog", both alternatives +fail to match, but the end of the subject is reached during matching, so +PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, identifying +"123dog" as the first partial match. (In this example, there are two partial +matches, because "dog" on its own partially matches the second alternative.) +

+
+How a partial match is processed by pcre2_match() +
+

+What happens when a partial match is identified depends on which of the two +partial matching options is set. +

+

+If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon as a +partial match is found, without continuing to search for possible complete +matches. This option is "hard" because it prefers an earlier partial match over +a later complete match. For this reason, the assumption is made that the end of +the supplied subject string is not the true end of the available data, which is +why \z, \Z, \b, \B, and $ always give a partial match. +

+

+If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but matching +continues as normal, and other alternatives in the pattern are tried. If no +complete match can be found, PCRE2_ERROR_PARTIAL is returned instead of +PCRE2_ERROR_NOMATCH. This option is "soft" because it prefers a complete match +over a partial match. All the various matching items in a pattern behave as if +the subject string is potentially complete; \z, \Z, and $ match at the end of +the subject, as normal, and for \b and \B the end of the subject is treated +as a non-alphanumeric. +

+

+The difference between the two partial matching options can be illustrated by a +pattern such as: +

+  /dog(sbody)?/
+
+This matches either "dog" or "dogsbody", greedily (that is, it prefers the +longer string if possible). If it is matched against the string "dog" with +PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if +PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other +hand, if the pattern is made ungreedy the result is different: +
+  /dog(sbody)??/
+
+In this case the result is always a complete match because that is found first, +and matching never continues after finding a complete match. It might be easier +to follow this explanation by thinking of the two patterns like this: +
+  /dog(sbody)?/    is the same as  /dogsbody|dog/
+  /dog(sbody)??/   is the same as  /dog|dogsbody/
+
+The second pattern will never match "dogsbody", because it will always find the +shorter match first. +

+
+Example of partial matching using pcre2test +
+

+The pcre2test data modifiers partial_hard (or ph) and +partial_soft (or ps) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, +respectively, when calling pcre2_match(). Here is a run of +pcre2test using a pattern that matches the whole subject in the form of a +date: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 25dec3\=ph
+  Partial match: 23dec3
+  data> 3ju\=ph
+  Partial match: 3ju
+  data> 3juj\=ph
+  No match
+
+This example gives the same results for both hard and soft partial matching +options. Here is an example where there is a difference: +
+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 25jun04\=ps
+   0: 25jun04
+   1: jun
+  data> 25jun04\=ph
+  Partial match: 25jun04
+
+With PCRE2_PARTIAL_SOFT, the subject is matched completely. For +PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, so +there is only a partial match. +

+
MULTI-SEGMENT MATCHING WITH pcre2_match()
+

+PCRE was not originally designed with multi-segment matching in mind. However, +over time, features (including partial matching) that make multi-segment +matching possible have been added. A very long string can be searched segment +by segment by calling pcre2_match() repeatedly, with the aim of achieving +the same results that would happen if the entire string was available for +searching all the time. Normally, the strings that are being sought are much +shorter than each individual segment, and are in the middle of very long +strings, so the pattern is normally not anchored. +

+

+Special logic must be implemented to handle a matched substring that spans a +segment boundary. PCRE2_PARTIAL_HARD should be used, because it returns a +partial match at the end of a segment whenever there is the possibility of +changing the match by adding more characters. The PCRE2_NOTBOL option should +also be set for all but the first segment. +

+

+When a partial match occurs, the next segment must be added to the current +subject and the match re-run, using the startoffset argument of +pcre2_match() to begin at the point where the partial match started. +For example: +

+    re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
+  data> ...the date is 23ja\=ph
+  Partial match: 23ja
+  data> ...the date is 23jan19 and on that day...\=offset=15
+   0: 23jan19
+   1: jan
+
+Note the use of the offset modifier to start the new match where the +partial match was found. In this example, the next segment was added to the one +in which the partial match was found. This is the most straightforward +approach, typically using a memory buffer that is twice the size of each +segment. After a partial match, the first half of the buffer is discarded, the +second half is moved to the start of the buffer, and a new segment is added +before repeating the match as in the example above. After a no match, the +entire buffer can be discarded. +

+

+If there are memory constraints, you may want to discard text that precedes a +partial match before adding the next segment. Unfortunately, this is not at +present straightforward. In cases such as the above, where the pattern does not +contain any lookbehinds, it is sufficient to retain only the partially matched +substring. However, if the pattern contains a lookbehind assertion, characters +that precede the start of the partial match may have been inspected during the +matching process. When pcre2test displays a partial match, it indicates +these characters with '<' if the allusedtext modifier is set: +

+    re> "(?<=123)abc"
+  data> xx123ab\=ph,allusedtext
+  Partial match: 123ab
+                 <<<
+
+However, the allusedtext modifier is not available for JIT matching, +because JIT matching does not record the first (or last) consulted characters. +For this reason, this information is not available via the API. It is therefore +not possible in general to obtain the exact number of characters that must be +retained in order to get the right match result. If you cannot retain the +entire segment, you must find some heuristic way of choosing. +

+

+If you know the approximate length of the matching substrings, you can use that +to decide how much text to retain. The only lookbehind information that is +currently available via the API is the length of the longest individual +lookbehind in a pattern, but this can be misleading if there are nested +lookbehinds. The value returned by calling pcre2_pattern_info() with the +PCRE2_INFO_MAXLOOKBEHIND option is the maximum number of characters (not code +units) that any individual lookbehind moves back when it is processed. A +pattern such as "(?<=(?<!b)a)" has a maximum lookbehind value of one, but +inspects two characters before its starting point. +

+

+In a non-UTF or a 32-bit case, moving back is just a subtraction, but in +UTF-8 or UTF-16 you have to count characters while moving back through the code +units. +

+
PARTIAL MATCHING USING pcre2_dfa_match()
+

+The DFA function moves along the subject string character by character, without +backtracking, searching for all possible matches simultaneously. If the end of +the subject is reached before the end of the pattern, there is the possibility +of a partial match. +

+

+When PCRE2_PARTIAL_SOFT is set, PCRE2_ERROR_PARTIAL is returned only if there +have been no complete matches. Otherwise, the complete matches are returned. +If PCRE2_PARTIAL_HARD is set, a partial match takes precedence over any +complete matches. The portion of the string that was matched when the longest +partial match was found is set as the first matching string. +

+

+Because the DFA function always searches for all possible matches, and there is +no difference between greedy and ungreedy repetition, its behaviour is +different from the pcre2_match(). Consider the string "dog" matched +against this ungreedy pattern: +

+  /dog(sbody)??/
+
+Whereas the standard function stops as soon as it finds the complete match for +"dog", the DFA function also finds the partial match for "dogsbody", and so +returns that when PCRE2_PARTIAL_HARD is set. +

+
MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()
+

+When a partial match has been found using the DFA matching function, it is +possible to continue the match by providing additional subject data and calling +the function again with the same compiled regular expression, this time setting +the PCRE2_DFA_RESTART option. You must pass the same working space as before, +because this is where details of the previous partial match are stored. You can +set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with PCRE2_DFA_RESTART +to continue partial matching over multiple segments. Here is an example using +pcre2test: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 23ja\=dfa,ps
+  Partial match: 23ja
+  data> n05\=dfa,dfa_restart
+   0: n05
+
+The first call has "23ja" as the subject, and requests partial matching; the +second call has "n05" as the subject for the continued (restarted) match. +Notice that when the match is complete, only the last part is shown; PCRE2 does +not retain the previously partially-matched string. It is up to the calling +program to do that if it needs to. This means that, for an unanchored pattern, +if a continued match fails, it is not possible to try again at a new starting +point. All this facility is capable of doing is continuing with the previous +match attempt. For example, consider this pattern: +
+  1234|3789
+
+If the first part of the subject is "ABC123", a partial match of the first +alternative is found at offset 3. There is no partial match for the second +alternative, because such a match does not start at the same point in the +subject string. Attempting to continue with the string "7890" does not yield a +match because only those alternatives that match at one point in the subject +are remembered. Depending on the application, this may or may not be what you +want. +

+

+If you do want to allow for starting again at the next character, one way of +doing it is to retain some or all of the segment and try a new complete match, +as described for pcre2_match() above. Another possibility is to work with +two buffers. If a partial match at offset n in the first buffer is +followed by "no match" when PCRE2_DFA_RESTART is used on the second buffer, you +can then try a new match starting at offset n+1 in the first buffer. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 04 September 2019 +
+Copyright © 1997-2019 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html new file mode 100644 index 0000000..cf50c1a --- /dev/null +++ b/doc/html/pcre2pattern.html @@ -0,0 +1,3855 @@ + + +pcre2pattern specification + + +

pcre2pattern man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 REGULAR EXPRESSION DETAILS
+

+The syntax and semantics of the regular expressions that are supported by PCRE2 +are described in detail below. There is a quick-reference syntax summary in the +pcre2syntax +page. PCRE2 tries to match Perl syntax and semantics as closely as it can. +PCRE2 also supports some alternative regular expression syntax (which does not +conflict with the Perl syntax) in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. +

+

+Perl's regular expressions are described in its own documentation, and regular +expressions in general are covered in a number of books, some of which have +copious examples. Jeffrey Friedl's "Mastering Regular Expressions", published +by O'Reilly, covers regular expressions in great detail. This description of +PCRE2's regular expressions is intended as reference material. +

+

+This document discusses the regular expression patterns that are supported by +PCRE2 when its main matching function, pcre2_match(), is used. PCRE2 also +has an alternative matching function, pcre2_dfa_match(), which matches +using a different algorithm that is not Perl-compatible. Some of the features +discussed below are not available when DFA matching is used. The advantages and +disadvantages of the alternative function, and how it differs from the normal +function, are discussed in the +pcre2matching +page. +

+
SPECIAL START-OF-PATTERN ITEMS
+

+A number of options that can be passed to pcre2_compile() can also be set +by special items at the start of a pattern. These are not Perl-compatible, but +are provided to make these options accessible to pattern writers who are not +able to change the program that processes the pattern. Any number of these +items may appear, but they must all be together right at the start of the +pattern string, and the letters must be in upper case. +

+
+UTF support +
+

+In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either as +single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 can be +specified for the 32-bit library, in which case it constrains the character +values to valid Unicode code points. To process UTF strings, PCRE2 must be +built to include Unicode support (which is the default). When using UTF strings +you must either call the compiling function with one or both of the PCRE2_UTF +or PCRE2_MATCH_INVALID_UTF options, or the pattern must start with the special +sequence (*UTF), which is equivalent to setting the relevant PCRE2_UTF. How +setting a UTF mode affects pattern matching is mentioned in several places +below. There is also a summary of features in the +pcre2unicode +page. +

+

+Some applications that allow their users to supply patterns may wish to +restrict them to non-UTF data for security reasons. If the PCRE2_NEVER_UTF +option is passed to pcre2_compile(), (*UTF) is not allowed, and its +appearance in a pattern causes an error. +

+
+Unicode property support +
+

+Another special sequence that may appear at the start of a pattern is (*UCP). +This has the same effect as setting the PCRE2_UCP option: it causes sequences +such as \d and \w to use Unicode properties to determine character types, +instead of recognizing only characters with codes less than 256 via a lookup +table. If also causes upper/lower casing operations to use Unicode properties +for characters with code points greater than 127, even when UTF is not set. +These behaviours can be changed within the pattern; see the section entitled +"Internal Option Setting" +below. +

+

+Some applications that allow their users to supply patterns may wish to +restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to +pcre2_compile(), (*UCP) is not allowed, and its appearance in a pattern +causes an error. +

+
+Locking out empty string matching +
+

+Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same effect +as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option to whichever +matching function is subsequently called to match the pattern. These options +lock out the matching of empty strings, either entirely, or only at the start +of the subject. +

+
+Disabling auto-possessification +
+

+If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting +the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making quantifiers +possessive when what follows cannot match the repeated item. For example, by +default a+b is treated as a++b. For more details, see the +pcre2api +documentation. +

+
+Disabling start-up optimizations +
+

+If a pattern starts with (*NO_START_OPT), it has the same effect as setting the +PCRE2_NO_START_OPTIMIZE option. This disables several optimizations for quickly +reaching "no match" results. For more details, see the +pcre2api +documentation. +

+
+Disabling automatic anchoring +
+

+If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as +setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimizations that +apply to patterns whose top-level branches all start with .* (match any number +of arbitrary characters). For more details, see the +pcre2api +documentation. +

+
+Disabling JIT compilation +
+

+If a pattern that starts with (*NO_JIT) is successfully compiled, an attempt by +the application to apply the JIT optimization by calling +pcre2_jit_compile() is ignored. +

+
+Setting match resource limits +
+

+The pcre2_match() function contains a counter that is incremented every +time it goes round its main loop. The caller of pcre2_match() can set a +limit on this counter, which therefore limits the amount of computing resource +used for a match. The maximum depth of nested backtracking can also be limited; +this indirectly restricts the amount of heap memory that is used, but there is +also an explicit memory limit that can be set. +

+

+These facilities are provided to catch runaway matches that are provoked by +patterns with huge matching trees. A common example is a pattern with nested +unlimited repeats applied to a long string that does not match. When one of +these limits is reached, pcre2_match() gives an error return. The limits +can also be set by items at the start of the pattern of the form +

+  (*LIMIT_HEAP=d)
+  (*LIMIT_MATCH=d)
+  (*LIMIT_DEPTH=d)
+
+where d is any number of decimal digits. However, the value of the setting must +be less than the value set (or defaulted) by the caller of pcre2_match() +for it to have any effect. In other words, the pattern writer can lower the +limits set by the programmer, but not raise them. If there is more than one +setting of one of these limits, the lower value is used. The heap limit is +specified in kibibytes (units of 1024 bytes). +

+

+Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is +still recognized for backwards compatibility. +

+

+The heap limit applies only when the pcre2_match() or +pcre2_dfa_match() interpreters are used for matching. It does not apply +to JIT. The match limit is used (but in a different way) when JIT is being +used, or when pcre2_dfa_match() is called, to limit computing resource +usage by those matching functions. The depth limit is ignored by JIT but is +relevant for DFA matching, which uses function recursion for recursions within +the pattern and for lookaround assertions and atomic groups. In this case, the +depth limit controls the depth of such recursion. +

+
+Newline conventions +
+

+PCRE2 supports six different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, any +Unicode newline sequence, or the NUL character (binary zero). The +pcre2api +page has +further discussion +about newlines, and shows how to set the newline convention when calling +pcre2_compile(). +

+

+It is also possible to specify a newline convention by starting a pattern +string with one of the following sequences: +

+  (*CR)        carriage return
+  (*LF)        linefeed
+  (*CRLF)      carriage return, followed by linefeed
+  (*ANYCRLF)   any of the three above
+  (*ANY)       all Unicode newline sequences
+  (*NUL)       the NUL character (binary zero)
+
+These override the default and the options given to the compiling function. For +example, on a Unix system where LF is the default newline sequence, the pattern +
+  (*CR)a.b
+
+changes the convention to CR. That pattern matches "a\nb" because LF is no +longer a newline. If more than one of these settings is present, the last one +is used. +

+

+The newline convention affects where the circumflex and dollar assertions are +true. It also affects the interpretation of the dot metacharacter when +PCRE2_DOTALL is not set, and the behaviour of \N when not followed by an +opening brace. However, it does not affect what the \R escape sequence +matches. By default, this is any Unicode newline sequence, for Perl +compatibility. However, this can be changed; see the next section and the +description of \R in the section entitled +"Newline sequences" +below. A change of \R setting can be combined with a change of newline +convention. +

+
+Specifying what \R matches +
+

+It is possible to restrict \R to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. This effect can also be achieved by starting a pattern with +(*BSR_ANYCRLF). For completeness, (*BSR_UNICODE) is also recognized, +corresponding to PCRE2_BSR_UNICODE. +

+
EBCDIC CHARACTER CODES
+

+PCRE2 can be compiled to run in an environment that uses EBCDIC as its +character code instead of ASCII or Unicode (typically a mainframe system). In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. +

+
CHARACTERS AND METACHARACTERS
+

+A regular expression is a pattern that is matched against a subject string from +left to right. Most characters stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the pattern +

+  The quick brown fox
+
+matches a portion of a subject string that is identical to itself. When +caseless matching is specified (the PCRE2_CASELESS option or (?i) within the +pattern), letters are matched independently of case. Note that there are two +ASCII characters, K and S, that, in addition to their lower case ASCII +equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F +(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the +PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to +pcre2_compile() or set by (?r) within the pattern). +

+

+The power of regular expressions comes from the ability to include wild cards, +character classes, alternatives, and repetitions in the pattern. These are +encoded in the pattern by the use of metacharacters, which do not stand +for themselves but instead are interpreted in some special way. +

+

+There are two different sets of metacharacters: those that are recognized +anywhere in the pattern except within square brackets, and those that are +recognized within square brackets. Outside square brackets, the metacharacters +are as follows: +

+  \      general escape character with several uses
+  ^      assert start of string (or line, in multiline mode)
+  $      assert end of string (or line, in multiline mode)
+  .      match any character except newline (by default)
+  [      start character class definition
+  |      start of alternative branch
+  (      start group or control verb
+  )      end group or control verb
+  *      0 or more quantifier
+  +      1 or more quantifier; also "possessive quantifier"
+  ?      0 or 1 quantifier; also quantifier minimizer
+  {      potential start of min/max quantifier
+
+Brace characters { and } are also used to enclose data for constructions such +as \g{2} or \k{name}. In almost all uses of braces, space and/or horizontal +tab characters that follow { or precede } are allowed and are ignored. In the +case of quantifiers, they may also appear before or after the comma. The +exception to this is \u{...} which is an ECMAScript compatibility feature +that is recognized only when the PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript +does not ignore such white space; it causes the item to be interpreted as +literal. +

+

+Part of a pattern that is in square brackets is called a "character class". In +a character class the only metacharacters are: +

+  \      general escape character
+  ^      negate the class, but only if the first character
+  -      indicates character range
+  [      POSIX character class (if followed by POSIX syntax)
+  ]      terminates the character class
+
+If a pattern is compiled with the PCRE2_EXTENDED option, most white space in +the pattern, other than in a character class, within a \Q...\E sequence, or +between a # outside a character class and the next newline, inclusive, are +ignored. An escaping backslash can be used to include a white space or a # +character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the +same applies, but in addition unescaped space and horizontal tab characters are +ignored inside a character class. Note: only these two characters are ignored, +not the full set of pattern white space characters that are ignored outside a +character class. Option settings can be changed within a pattern; see the +section entitled +"Internal Option Setting" +below. +

+

+The following sections describe the use of each of the metacharacters. +

+
BACKSLASH
+

+The backslash character has several uses. Firstly, if it is followed by a +character that is not a digit or a letter, it takes away any special meaning +that character may have. This use of backslash as an escape character applies +both inside and outside character classes. +

+

+For example, if you want to match a * character, you must write \* in the +pattern. This escaping action applies whether or not the following character +would otherwise be interpreted as a metacharacter, so it is always safe to +precede a non-alphanumeric with backslash to specify that it stands for itself. +In particular, if you want to match a backslash, you write \\. +

+

+Only ASCII digits and letters have any special meaning after a backslash. All +other characters (in particular, those whose code points are greater than 127) +are treated as literals. +

+

+If you want to treat all characters in a sequence as literals, you can do so by +putting them between \Q and \E. Note that this includes white space even when +the PCRE2_EXTENDED option is set so that most other white space is ignored. The +behaviour is different from Perl in that $ and @ are handled as literals in +\Q...\E sequences in PCRE2, whereas in Perl, $ and @ cause variable +interpolation. Also, Perl does "double-quotish backslash interpolation" on any +backslashes between \Q and \E which, its documentation says, "may lead to +confusing results". PCRE2 treats a backslash between \Q and \E just like any +other character. Note the following examples: +

+  Pattern            PCRE2 matches   Perl matches
+
+  \Qabc$xyz\E        abc$xyz        abc followed by the contents of $xyz
+  \Qabc\$xyz\E       abc\$xyz       abc\$xyz
+  \Qabc\E\$\Qxyz\E   abc$xyz        abc$xyz
+  \QA\B\E            A\B            A\B
+  \Q\\E              \              \\E
+
+The \Q...\E sequence is recognized both inside and outside character classes. +An isolated \E that is not preceded by \Q is ignored. If \Q is not followed +by \E later in the pattern, the literal interpretation continues to the end of +the pattern (that is, \E is assumed at the end). If the isolated \Q is inside +a character class, this causes an error, because the character class is then +not terminated by a closing square bracket. +

+
+Non-printing characters +
+

+A second use of backslash provides a way of encoding non-printing characters +in patterns in a visible manner. There is no restriction on the appearance of +non-printing characters in a pattern, but when a pattern is being prepared by +text editing, it is often easier to use one of the following escape sequences +instead of the binary character it represents. In an ASCII or Unicode +environment, these escapes are as follows: +

+  \a          alarm, that is, the BEL character (hex 07)
+  \cx         "control-x", where x is a non-control ASCII character
+  \e          escape (hex 1B)
+  \f          form feed (hex 0C)
+  \n          linefeed (hex 0A)
+  \r          carriage return (hex 0D) (but see below)
+  \t          tab (hex 09)
+  \0dd        character with octal code 0dd
+  \ddd        character with octal code ddd, or backreference
+  \o{ddd..}   character with octal code ddd..
+  \xhh        character with hex code hh
+  \x{hhh..}   character with hex code hhh..
+  \N{U+hhh..} character with Unicode hex code point hhh..
+
+By default, after \x that is not followed by {, from zero to two hexadecimal +digits are read (letters can be in upper or lower case). Any number of +hexadecimal digits may appear between \x{ and }. If a character other than a +hexadecimal digit appears between \x{ and }, or if there is no terminating }, +an error occurs. +

+

+Characters whose code points are less than 256 can be defined by either of the +two syntaxes for \x or by an octal sequence. There is no difference in the way +they are handled. For example, \xdc is exactly the same as \x{dc} or \334. +However, using the braced versions does make such sequences easier to read. +

+

+Support is available for some ECMAScript (aka JavaScript) escape sequences via +two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed +by { is not recognized. Only if \x is followed by two hexadecimal digits is it +recognized as a character escape. Otherwise it is interpreted as a literal "x" +character. In this mode, support for code points greater than 256 is provided +by \u, which must be followed by four hexadecimal digits; otherwise it is +interpreted as a literal "u" character. +

+

+PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, +\u{hhh..} is recognized as the character specified by hexadecimal code point. +There may be any number of hexadecimal digits, but unlike other places that +also use curly brackets, spaces are not allowed and would result in the string +being interpreted as a literal. This syntax is from ECMAScript 6. +

+

+The \N{U+hhh..} escape sequence is recognized only when PCRE2 is operating in +UTF mode. Perl also uses \N{name} to specify characters by Unicode name; PCRE2 +does not support this. Note that when \N is not followed by an opening brace +(curly bracket) it has an entirely different meaning, matching any character +that is not a newline. +

+

+There are some legacy applications where the escape sequence \r is expected to +match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \r in a +pattern is converted to \n so that it matches a LF (linefeed) instead of a CR +(carriage return) character. +

+

+An error occurs if \c is not followed by a character whose ASCII code point +is in the range 32 to 126. The precise effect of \cx is as follows: if x is a +lower case letter, it is converted to upper case. Then bit 6 of the character +(hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is +5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If +the code unit following \c has a code point less than 32 or greater than 126, +a compile-time error occurs. +

+

+When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. \a, \e, +\f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c +escape is processed as specified for Perl in the perlebcdic document. The +only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\c@ encodes character code 0; after \c the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F). +

+

+Thus, apart from \c?, these escapes generate the same character code values as +they do in an ASCII environment, though the meanings of the values mostly +differ. For example, \cG always generates code value 7, which is BEL in ASCII +but DEL in EBCDIC. +

+

+The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \c? generate 95; otherwise it generates 255. +

+

+After \0 up to two further octal digits are read. If there are fewer than two +digits, just those that are present are used. Thus the sequence \0\x\015 +specifies two binary zeros followed by a CR character (code value 13). Make +sure you supply two digits after the initial zero if the pattern character that +follows is itself an octal digit. +

+

+The escape \o must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape is a recent +addition to Perl; it provides way of specifying character code points as octal +numbers greater than 0777, and it also allows octal numbers and backreferences +to be unambiguously specified. +

+

+For greater clarity and unambiguity, it is best to avoid following \ by a +digit greater than zero. Instead, use \o{...} or \x{...} to specify numerical +character code points, and \g{...} to specify backreferences. The following +paragraphs describe the old, ambiguous syntax. +

+

+The handling of a backslash followed by a digit other than 0 is complicated, +and Perl has changed over time, causing PCRE2 also to change. +

+

+Outside a character class, PCRE2 reads the digit and any following digits as a +decimal number. If the number is less than 10, begins with the digit 8 or 9, or +if there are at least that many previous capture groups in the expression, the +entire sequence is taken as a backreference. A description of how this +works is given +later, +following the discussion of +parenthesized groups. +Otherwise, up to three octal digits are read to form a character code. +

+

+Inside a character class, PCRE2 handles \8 and \9 as the literal characters +"8" and "9", and otherwise reads up to three octal digits following the +backslash, using them to generate a data character. Any subsequent digits stand +for themselves. For example, outside a character class: +

+  \040   is another way of writing an ASCII space
+  \40    is the same, provided there are fewer than 40 previous capture groups
+  \7     is always a backreference
+  \11    might be a backreference, or another way of writing a tab
+  \011   is always a tab
+  \0113  is a tab followed by the character "3"
+  \113   might be a backreference, otherwise the character with octal code 113
+  \377   might be a backreference, otherwise the value 255 (decimal)
+  \81    is always a backreference
+
+Note that octal values of 100 or greater that are specified using this syntax +must not be introduced by a leading zero, because no more than three octal +digits are ever read. +

+
+Constraints on character values +
+

+Characters that are specified using octal or hexadecimal numbers are +limited to certain values, as follows: +

+  8-bit non-UTF mode    no greater than 0xff
+  16-bit non-UTF mode   no greater than 0xffff
+  32-bit non-UTF mode   no greater than 0xffffffff
+  All UTF modes         no greater than 0x10ffff and a valid code point
+
+Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the +so-called "surrogate" code points). The check for these can be disabled by the +caller of pcre2_compile() by setting the option +PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8 +and UTF-32 modes, because these values are not representable in UTF-16. +

+
+Escape sequences in character classes +
+

+All the sequences that define a single character value can be used both inside +and outside character classes. In addition, inside a character class, \b is +interpreted as the backspace character (hex 08). +

+

+When not followed by an opening brace, \N is not allowed in a character class. +\B, \R, and \X are not special inside a character class. Like other +unrecognized alphabetic escape sequences, they cause an error. Outside a +character class, these sequences have different meanings. +

+
+Unsupported escape sequences +
+

+In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its string +handler and used to modify the case of following characters. By default, PCRE2 +does not support these escape sequences in patterns. However, if either of the +PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U matches a "U" +character, and \u can be used to define a character by code point, as +described above. +

+
+Absolute and relative backreferences +
+

+The sequence \g followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative backreference. A named backreference +can be coded as \g{name}. Backreferences are discussed +later, +following the discussion of +parenthesized groups. +

+
+Absolute and relative subroutine calls +
+

+For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a capture group as a subroutine. Details are discussed +later. +Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not +synonymous. The former is a backreference; the latter is a +subroutine +call. +

+
+Generic character types +
+

+Another use of backslash is for specifying generic character types: +

+  \d     any decimal digit
+  \D     any character that is not a decimal digit
+  \h     any horizontal white space character
+  \H     any character that is not a horizontal white space character
+  \N     any character that is not a newline
+  \s     any white space character
+  \S     any character that is not a white space character
+  \v     any vertical white space character
+  \V     any character that is not a vertical white space character
+  \w     any "word" character
+  \W     any "non-word" character
+
+The \N escape sequence has the same meaning as +the "." metacharacter +when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the +meaning of \N. Note that when \N is followed by an opening brace it has a +different meaning. See the section entitled +"Non-printing characters" +above for details. Perl also uses \N{name} to specify characters by Unicode +name; PCRE2 does not support this. +

+

+Each pair of lower and upper case escape sequences partitions the complete set +of characters into two disjoint sets. Any given character matches one, and only +one, of each pair. The sequences can appear both inside and outside character +classes. They each match one character of the appropriate type. If the current +matching point is at the end of the subject string, all of them fail, because +there is no character to match. +

+

+The default \s characters are HT (9), LF (10), VT (11), FF (12), CR (13), and +space (32), which are defined as white space in the "C" locale. This list may +vary if locale-specific matching is taking place. For example, in some locales +the "non-breaking space" character (\xA0) is recognized as white space, and in +others the VT character is not. +

+

+A "word" character is an underscore or any character that is a letter or digit. +By default, the definition of letters and digits is controlled by PCRE2's +low-valued character tables, and may vary if locale-specific matching is taking +place (see +"Locale support" +in the +pcre2api +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 127 are used for +accented letters, and these are then matched by \w. The use of locales with +Unicode is discouraged. +

+

+By default, characters whose code points are greater than 127 never match \d, +\s, or \w, and always match \D, \S, and \W, although this may be different +for characters in the range 128-255 when locale-specific matching is happening. +These escape sequences retain their original meanings from before Unicode +support was available, mainly for efficiency reasons. If the PCRE2_UCP option +is set, the behaviour is changed so that Unicode properties are used to +determine character types, as follows: +

+  \d  any character that matches \p{Nd} (decimal digit)
+  \s  any character that matches \p{Z} or \h or \v
+  \w  any character that matches \p{L}, \p{N}, \p{Mn}, or \p{Pc}
+
+The addition of \p{Mn} (non-spacing mark) and the replacement of an explicit +test for underscore with a test for \p{Pc} (connector punctuation) happened in +PCRE2 release 10.43. This brings PCRE2 into line with Perl. +

+

+The upper case escapes match the inverse sets of characters. Note that \d +matches only decimal digits, whereas \w matches any Unicode digit, as well as +other character categories. Note also that PCRE2_UCP affects \b, and +\B because they are defined in terms of \w and \W. Matching these sequences +is noticeably slower when PCRE2_UCP is set. +

+

+The effect of PCRE2_UCP on any one of these escape sequences can be negated by +the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, and +PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and reset within +a pattern by means of an internal option setting +(see below). +

+

+The sequences \h, \H, \v, and \V, in contrast to the other sequences, which +match only ASCII characters by default, always match a specific list of code +points, whether or not PCRE2_UCP is set. The horizontal space characters are: +

+  U+0009     Horizontal tab (HT)
+  U+0020     Space
+  U+00A0     Non-break space
+  U+1680     Ogham space mark
+  U+180E     Mongolian vowel separator
+  U+2000     En quad
+  U+2001     Em quad
+  U+2002     En space
+  U+2003     Em space
+  U+2004     Three-per-em space
+  U+2005     Four-per-em space
+  U+2006     Six-per-em space
+  U+2007     Figure space
+  U+2008     Punctuation space
+  U+2009     Thin space
+  U+200A     Hair space
+  U+202F     Narrow no-break space
+  U+205F     Medium mathematical space
+  U+3000     Ideographic space
+
+The vertical space characters are: +
+  U+000A     Linefeed (LF)
+  U+000B     Vertical tab (VT)
+  U+000C     Form feed (FF)
+  U+000D     Carriage return (CR)
+  U+0085     Next line (NEL)
+  U+2028     Line separator
+  U+2029     Paragraph separator
+
+In 8-bit, non-UTF-8 mode, only the characters with code points less than 256 +are relevant. +

+
+Newline sequences +
+

+Outside a character class, by default, the escape sequence \R matches any +Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent to the +following: +

+  (?>\r\n|\n|\x0b|\f|\r|\x85)
+
+This is an example of an "atomic group", details of which are given +below. +This particular group matches either the two-character sequence CR followed by +LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, +U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split. +

+

+In other modes, two additional characters whose code points are greater than 255 +are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). +Unicode support is not needed for these characters to be recognized. +

+

+It is possible to restrict \R to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. (BSR is an abbreviation for "backslash R".) This can be made +the default when PCRE2 is built; if this is the case, the other behaviour can +be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify +these settings by starting a pattern string with one of the following +sequences: +

+  (*BSR_ANYCRLF)   CR, LF, or CRLF only
+  (*BSR_UNICODE)   any Unicode newline sequence
+
+These override the default and the options given to the compiling function. +Note that these special settings, which are not Perl-compatible, are recognized +only at the very start of a pattern, and that they must be in upper case. If +more than one of them is present, the last one is used. They can be combined +with a change of newline convention; for example, a pattern can start with: +
+  (*ANY)(*BSR_ANYCRLF)
+
+They can also be combined with the (*UTF) or (*UCP) special sequences. Inside a +character class, \R is treated as an unrecognized escape sequence, and causes +an error. +

+
+Unicode character properties +
+

+When PCRE2 is built with Unicode support (the default), three additional escape +sequences that match characters with specific properties are available. They +can be used in any mode, though in 8-bit and 16-bit non-UTF modes these +sequences are of course limited to testing characters whose code points are +less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points +greater than 0x10ffff (the Unicode limit) may be encountered. These are all +treated as being in the Unknown script and with an unassigned type. +

+

+Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \d and \w do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +

+

+The extra escape sequences that provide property support are: +

+  \p{xx}   a character with the xx property
+  \P{xx}   a character without the xx property
+  \X       a Unicode extended grapheme cluster
+
+The property names represented by xx above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described +below). +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \P{Any} does not match any characters, so always causes a +match failure. +

+
+Script properties for \p and \P +
+

+There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas +\p{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40. +

+

+Unassigned characters (and in non-UTF 32-bit mode, characters with code points +greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not +part of an identified script are lumped together as "Common". The current list +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +

+  pcre2test -LS
+
+
+

+
+The general category property for \p and \P +
+

+Each character has exactly one Unicode general category property, specified by +a two-letter abbreviation. For compatibility with Perl, negation can be +specified by including a circumflex between the opening brace and the property +name. For example, \p{^Lu} is the same as \P{Lu}. +

+

+If only one letter is specified with \p or \P, it includes all the general +category properties that start with that letter. In this case, in the absence +of negation, the curly brackets in the escape sequence are optional; these two +examples have the same effect: +

+  \p{L}
+  \pL
+
+The following general category property codes are supported: +
+  C     Other
+  Cc    Control
+  Cf    Format
+  Cn    Unassigned
+  Co    Private use
+  Cs    Surrogate
+
+  L     Letter
+  Ll    Lower case letter
+  Lm    Modifier letter
+  Lo    Other letter
+  Lt    Title case letter
+  Lu    Upper case letter
+
+  M     Mark
+  Mc    Spacing mark
+  Me    Enclosing mark
+  Mn    Non-spacing mark
+
+  N     Number
+  Nd    Decimal number
+  Nl    Letter number
+  No    Other number
+
+  P     Punctuation
+  Pc    Connector punctuation
+  Pd    Dash punctuation
+  Pe    Close punctuation
+  Pf    Final punctuation
+  Pi    Initial punctuation
+  Po    Other punctuation
+  Ps    Open punctuation
+
+  S     Symbol
+  Sc    Currency symbol
+  Sk    Modifier symbol
+  Sm    Mathematical symbol
+  So    Other symbol
+
+  Z     Separator
+  Zl    Line separator
+  Zp    Paragraph separator
+  Zs    Space separator
+
+The special property LC, which has the synonym L&, is also supported: it +matches a character that has the Lu, Ll, or Lt property, in other words, a +letter that is not classified as a modifier or "other". +

+

+The Cs (Surrogate) property applies only to characters whose code points are in +the range U+D800 to U+DFFF. These characters are no different to any other +character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library). +However, they are not valid in Unicode strings and so cannot be tested by PCRE2 +in UTF mode, unless UTF validity checking has been turned off (see the +discussion of PCRE2_NO_UTF_CHECK in the +pcre2api +page). +

+

+The long synonyms for property names that Perl supports (such as \p{Letter}) +are not supported by PCRE2, nor is it permitted to prefix any of these +properties with "Is". +

+

+No character that is in the Unicode table has the Cn (unassigned) property. +Instead, this property is assumed for any code point that is not in the +Unicode table. +

+

+Specifying caseless matching does not affect these escape sequences. For +example, \p{Lu} always matches only upper case letters. This is different from +the behaviour of current versions of Perl. +

+
+Binary (yes/no) properties for \p and \P +
+

+Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

+  pcre2test -LP
+
+
+

+
+The Bidi_Class property for \p and \P +
+

+

+  \p{Bidi_Class:<class>}   matches a character with the given class
+  \p{BC:<class>}           matches a character with the given class
+
+The recognized classes are: +
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+
+An equals sign may be used instead of a colon. The class names are +case-insensitive; only the short names listed above are recognized. +

+
+Extended grapheme clusters +
+

+The \X escape matches any number of Unicode characters that form an "extended +grapheme cluster", and treats the sequence as an atomic group +(see below). +Unicode supports various kinds of composite character by giving each character +a grapheme breaking property, and having rules that use these properties to +define the boundaries of extended grapheme clusters. The rules are defined in +Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0 +abandoned the use of some previous properties that had been used for emojis. +Instead it introduced various emoji-specific properties. PCRE2 uses only the +Extended Pictographic property. +

+

+\X always matches at least one character. Then it decides whether to add +additional characters according to the following rules for ending a cluster: +

+

+1. End at the end of the subject string. +

+

+2. Do not end between CR and LF; otherwise end after any control character. +

+

+3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters +are of five types: L, V, T, LV, and LVT. An L character may be followed by an +L, V, LV, or LVT character; an LV or V character may be followed by a V or T +character; an LVT or T character may be followed only by a T character. +

+

+4. Do not end before extending characters or spacing marks or the zero-width +joiner (ZWJ) character. Characters with the "mark" property always have the +"extend" grapheme breaking property. +

+

+5. Do not end after prepend characters. +

+

+6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width +joiner) sequences. An emoji ZWJ sequence consists of a character with the +Extended_Pictographic property, optionally followed by one or more characters +with the Extend property, followed by the ZWJ character, followed by another +Extended_Pictographic character. +

+

+7. Do not break within emoji flag sequences. That is, do not break between +regional indicator (RI) characters if there are an odd number of RI characters +before the break point. +

+

+8. Otherwise, end the cluster. +

+
+PCRE2's additional properties +
+

+As well as the standard Unicode properties described above, PCRE2 supports four +more that make it possible to convert traditional escape sequences such as \w +and \s to use Unicode properties. PCRE2 uses these non-standard, non-Perl +properties internally when PCRE2_UCP is set. However, they may also be used +explicitly. These properties are: +

+  Xan   Any alphanumeric character
+  Xps   Any POSIX space character
+  Xsp   Any Perl space character
+  Xwd   Any Perl "word" character
+
+Xan matches characters that have either the L (letter) or the N (number) +property. Xps matches the characters tab, linefeed, vertical tab, form feed, or +carriage return, and any other character that has the Z (separator) property. +Xsp is the same as Xps; in PCRE1 it used to exclude vertical tab, for Perl +compatibility, but Perl changed. Xwd matches the same characters as Xan, plus +those that match Mn (non-spacing mark) or Pc (connector punctuation, which +includes underscore). +

+

+There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +excluded. (Universal Character Names are of the form \uHHHH or \UHHHHHHHH +where H is a hexadecimal digit. Note that the Xuc property does not match these +sequences but the characters that they represent.) +

+
+Resetting the match start +
+

+In normal use, the escape sequence \K causes any previously matched characters +not to be included in the final matched sequence that is returned. For example, +the pattern: +

+  foo\Kbar
+
+matches "foobar", but reports that it has matched "bar". \K does not interact +with anchoring in any way. The pattern: +
+  ^foo\Kbar
+
+matches only when the subject begins with "foobar" (in single line mode), +though it again reports the matched string as "bar". This feature is similar to +a lookbehind assertion +(described below), +but the part of the pattern that precedes \K is not constrained to match a +limited number of characters, as is required for a lookbehind assertion. The +use of \K does not interfere with the setting of +captured substrings. +For example, when the pattern +
+  (foo)\Kbar
+
+matches "foobar", the first substring is still set to "foo". +

+

+From version 5.32.0 Perl forbids the use of \K in lookaround assertions. From +release 10.38 PCRE2 also forbids this by default. However, the +PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling +pcre2_compile() to re-enable the previous behaviour. When this option is +set, \K is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. Note that when a pattern such as (?=ab\K) +matches, the reported start of the match can be greater than the end of the +match. Using \K in a lookbehind assertion at the start of a pattern can also +lead to odd effects. For example, consider this pattern: +

+  (?<=\Kfoo)bar
+
+If the subject is "foobar", a call to pcre2_match() with a starting +offset of 3 succeeds and reports the matching string as "foobar", that is, the +start of the reported match is earlier than where the match started. +

+
+Simple assertions +
+

+The final use of backslash is for certain simple assertions. An assertion +specifies a condition that has to be met at a particular point in a match, +without consuming any characters from the subject string. The use of +groups for more complicated assertions is described +below. +The backslashed assertions are: +

+  \b     matches at a word boundary
+  \B     matches when not at a word boundary
+  \A     matches at the start of the subject
+  \Z     matches at the end of the subject
+          also matches before a newline at the end of the subject
+  \z     matches only at the end of the subject
+  \G     matches at the first matching position in the subject
+
+Inside a character class, \b has a different meaning; it matches the backspace +character. If any other of these assertions appears in a character class, an +"invalid escape sequence" error is generated. +

+

+A word boundary is a position in the subject string where the current character +and the previous character do not both match \w or \W (i.e. one matches +\w and the other matches \W), or the start or end of the string if the +first or last character matches \w, respectively. When PCRE2 is built with +Unicode support, the meanings of \w and \W can be changed by setting the +PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2 +nor Perl has a separate "start of word" or "end of word" metasequence. However, +whatever follows \b normally determines which it is. For example, the fragment +\ba matches "a" at the start of a word. +

+

+The \A, \Z, and \z assertions differ from the traditional circumflex and +dollar (described in the next section) in that they only ever match at the very +start and end of the subject string, whatever options are set. Thus, they are +independent of multiline mode. These three assertions are not affected by the +PCRE2_NOTBOL or PCRE2_NOTEOL options, which affect only the behaviour of the +circumflex and dollar metacharacters. However, if the startoffset +argument of pcre2_match() is non-zero, indicating that matching is to +start at a point other than the beginning of the subject, \A can never match. +The difference between \Z and \z is that \Z matches before a newline at the +end of the string as well as at the very end, whereas \z matches only at the +end. +

+

+The \G assertion is true only when the current matching position is at the +start point of the matching process, as specified by the startoffset +argument of pcre2_match(). It differs from \A when the value of +startoffset is non-zero. By calling pcre2_match() multiple times +with appropriate arguments, you can mimic Perl's /g option, and it is in this +kind of implementation where \G can be useful. +

+

+Note, however, that PCRE2's implementation of \G, being true at the starting +character of the matching process, is subtly different from Perl's, which +defines it as true at the end of the previous match. In Perl, these can be +different when the previously matched string was empty. Because PCRE2 does just +one match at a time, it cannot reproduce this behaviour. +

+

+If all the alternatives of a pattern begin with \G, the expression is anchored +to the starting match position, and the "anchored" flag is set in the compiled +regular expression. +

+
CIRCUMFLEX AND DOLLAR
+

+The circumflex and dollar metacharacters are zero-width assertions. That is, +they test for a particular condition being true without consuming any +characters from the subject string. These two metacharacters are concerned with +matching the starts and ends of lines. If the newline convention is set so that +only the two-character sequence CRLF is recognized as a newline, isolated CR +and LF characters are treated as ordinary data characters, and are not +recognized as newlines. +

+

+Outside a character class, in the default matching mode, the circumflex +character is an assertion that is true only if the current matching point is at +the start of the subject string. If the startoffset argument of +pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circumflex can +never match if the PCRE2_MULTILINE option is unset. Inside a character class, +circumflex has an entirely different meaning +(see below). +

+

+Circumflex need not be the first character of the pattern if a number of +alternatives are involved, but it should be the first thing in each alternative +in which it appears if the pattern is ever to match that branch. If all +possible alternatives start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is said to be an +"anchored" pattern. (There are also other constructs that can cause a pattern +to be anchored.) +

+

+The dollar character is an assertion that is true only if the current matching +point is at the end of the subject string, or immediately before a newline at +the end of the string (by default), unless PCRE2_NOTEOL is set. Note, however, +that it does not actually match the newline. Dollar need not be the last +character of the pattern if a number of alternatives are involved, but it +should be the last item in any branch in which it appears. Dollar has no +special meaning in a character class. +

+

+The meaning of dollar can be changed so that it matches only at the very end of +the string, by setting the PCRE2_DOLLAR_ENDONLY option at compile time. This +does not affect the \Z assertion. +

+

+The meanings of the circumflex and dollar metacharacters are changed if the +PCRE2_MULTILINE option is set. When this is the case, a dollar character +matches before any newlines in the string, as well as at the very end, and a +circumflex matches immediately after internal newlines as well as at the start +of the subject string. It does not match after a newline that ends the string, +for compatibility with Perl. However, this can be changed by setting the +PCRE2_ALT_CIRCUMFLEX option. +

+

+For example, the pattern /^abc$/ matches the subject string "def\nabc" (where +\n represents a newline) in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode because all branches start with +^ are not anchored in multiline mode, and a match for circumflex is possible +when the startoffset argument of pcre2_match() is non-zero. The +PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. +

+

+When the newline convention (see +"Newline conventions" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\r\nxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +

+

+Note that the sequences \A, \Z, and \z can be used to match the start and +end of the subject in both modes, and if all branches of a pattern start with +\A it is always anchored, whether or not PCRE2_MULTILINE is set. +

+
FULL STOP (PERIOD, DOT) AND \N
+

+Outside a character class, a dot in the pattern matches any one character in +the subject string except (by default) a character that signifies the end of a +line. One or more characters may be specified as line terminators (see +"Newline conventions" +above). +

+

+Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurrences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters. +

+

+The behaviour of dot with regard to newlines can be changed. If the +PCRE2_DOTALL option is set, a dot matches any one character, without exception. +If the two-character sequence CRLF is present in the subject string, it takes +two dots to match it. +

+

+The handling of dot is entirely independent of the handling of circumflex and +dollar, the only relationship being that they both involve newlines. Dot has no +special meaning in a character class. +

+

+The escape sequence \N when not followed by an opening brace behaves like a +dot, except that it is not affected by the PCRE2_DOTALL option. In other words, +it matches any character except one that signifies the end of a line. +

+

+When \N is followed by an opening brace it has a different meaning. See the +section entitled +"Non-printing characters" +above for details. Perl also uses \N{name} to specify characters by Unicode +name; PCRE2 does not support this. +

+
MATCHING A SINGLE CODE UNIT
+

+Outside a character class, the escape sequence \C matches any one code unit, +whether or not a UTF mode is set. In the 8-bit library, one code unit is one +byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is a +32-bit unit. Unlike a dot, \C always matches line-ending characters. The +feature is provided in Perl in order to match individual bytes in UTF-8 mode, +but it is unclear how it can usefully be used. +

+

+Because \C breaks up characters into individual code units, matching one unit +with \C in UTF-8 or UTF-16 mode means that the rest of the string may start +with a malformed UTF character. This has undefined results, because PCRE2 +assumes that it is matching character by character in a valid UTF string (by +default it checks the subject string's validity at the start of processing +unless the PCRE2_NO_UTF_CHECK or PCRE2_MATCH_INVALID_UTF option is used). +

+

+An application can lock out the use of \C by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \C permanently disabled. +

+

+PCRE2 does not allow \C to appear in lookbehind assertions +(described below) +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +pcre2_dfa_match() nor the JIT optimizer support \C in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +

+

+In the 32-bit library, however, \C is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified. +

+

+In general, the \C escape sequence is best avoided. However, one way of using +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): +

+  (?| (?=[\x00-\x7f])(\C) |
+      (?=[\x80-\x{7ff}])(\C)(\C) |
+      (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
+      (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
+
+In this example, a group that starts with (?| resets the capturing parentheses +numbers in each alternative (see +"Duplicate Group Numbers" +below). The assertions at the start of each branch check the next UTF-8 +character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The +character's individual bytes are then captured by the appropriate number of +\C groups. +

+
SQUARE BRACKETS AND CHARACTER CLASSES
+

+An opening square bracket introduces a character class, terminated by a closing +square bracket. A closing square bracket on its own is not special by default. +If a closing square bracket is required as a member of the class, it should be +the first data character in the class (after an initial circumflex, if present) +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +square bracket at the start does end the (empty) class. +

+

+A character class matches a single character in the subject. A matched +character must be in the set of characters defined by the class, unless the +first character in the class definition is a circumflex, in which case the +subject character must not be in the set defined by the class. If a circumflex +is actually required as a member of the class, ensure it is not the first +character, or escape it with a backslash. +

+

+For example, the character class [aeiou] matches any lower case vowel, while +[^aeiou] matches any character that is not a lower case vowel. Note that a +circumflex is just a convenient notation for specifying the characters that +are in the class by enumerating those that are not. A class that starts with a +circumflex is not an assertion; it still consumes a character from the subject +string, and therefore it fails if the current pointer is at the end of the +string. +

+

+Characters in a class may be specified by their code points using \o, \x, or +\N{U+hh..} in the usual way. When caseless matching is set, any letters in a +class represent both their upper case and lower case versions, so for example, +a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not +match "A", whereas a caseful version would. Note that there are two ASCII +characters, K and S, that, in addition to their lower case ASCII equivalents, +are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) +respectively when either PCRE2_UTF or PCRE2_UCP is set. +

+

+Characters that might indicate line breaks are never treated in any special way +when matching character classes, whatever line-ending sequence is in use, and +whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A +class such as [^a] always matches one of these characters. +

+

+The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s, +\S, \v, \V, \w, and \W may appear in a character class, and add the +characters that they match to the class. For example, [\dABCDEF] matches any +hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of +\d, \s, \w and their upper case partners, just as it does when they appear +outside a character class, as described in the section entitled +"Generic character types" +above. The escape sequence \b has a different meaning inside a character +class; it matches the backspace character. The sequences \B, \R, and \X are +not special inside a character class. Like any other unrecognized escape +sequences, they cause an error. The same is true for \N when not followed by +an opening brace. +

+

+The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +

+

+Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or before or after a character type escape such as \d or \H. +However, unless the hyphen is the last character in the class, Perl outputs a +warning in its warning mode, as this is most likely a user error. As PCRE2 has +no facility for warning, an error is given in these cases. +

+

+It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of range, so [W-\]46] is interpreted as a class containing a range +followed by two other characters. The octal or hexadecimal representation of +"]" can also be used to end a range. +

+

+Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\000-\037]. Ranges can include any characters that are valid for the +current mode. In any UTF mode, the so-called "surrogate" characters (those +whose code points lie between 0xd800 and 0xdfff inclusive) may not be specified +explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables +this check). However, ranges such as [\x{d7ff}-\x{e000}], which include the +surrogates, are always permitted. +

+

+There is a special case in EBCDIC environments for ranges whose end points are +both specified as literal letters in the same case. For compatibility with +Perl, EBCDIC code points within the range that are not letters are omitted. For +example, [h-k] matches only four characters, even though the codes for h and k +are 0x88 and 0x92, a range of 11 code points. However, if the range is +specified numerically, for example, [\x88-\x92] or [h-\x92], all code points +are included. +

+

+If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\xc8-\xcb] matches accented E +characters in both cases. +

+

+A circumflex can conveniently be used with the upper case character types to +specify a more restricted set of characters than the matching lower case type. +For example, the class [^\W_] matches any letter or digit, but not underscore, +whereas [\w] includes underscore. A positive character class should be read as +"something OR something OR ..." and a negative class as "NOT something AND NOT +something AND NOT ...". +

+

+The only metacharacters that are recognized in character classes are backslash, +hyphen (only where it can be interpreted as specifying a range), circumflex +(only at the start), opening square bracket (only when it can be interpreted as +introducing a POSIX class name, or for a special compatibility feature - see +the next two sections), and the terminating closing square bracket. However, +escaping other non-alphanumeric characters does no harm. +

+
POSIX CHARACTER CLASSES
+

+Perl supports the POSIX notation for character classes. This uses names +enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports +this notation. For example, +

+  [01[:alpha:]%]
+
+matches "0", "1", any alphabetic character, or "%". The supported class names +are: +
+  alnum    letters and digits
+  alpha    letters
+  ascii    character codes 0 - 127
+  blank    space or tab only
+  cntrl    control characters
+  digit    decimal digits (same as \d)
+  graph    printing characters, excluding space
+  lower    lower case letters
+  print    printing characters, including space
+  punct    printing characters, excluding letters and digits and space
+  space    white space (the same as \s from PCRE2 8.34)
+  upper    upper case letters
+  word     "word" characters (same as \w)
+  xdigit   hexadecimal digits
+
+The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), +and space (32). If locale-specific matching is taking place, the list of space +characters may be different; there may be fewer or more of them. "Space" and +\s match the same set of characters, as do "word" and \w. +

+

+The name "word" is a Perl extension, and "blank" is a GNU extension from Perl +5.8. Another Perl extension is negation, which is indicated by a ^ character +after the colon. For example, +

+  [12[:^digit:]]
+
+matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the POSIX +syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not +supported, and an error is given if they are encountered. +

+

+By default, characters with values greater than 127 do not match any of the +POSIX character classes, although this may be different for characters in the +range 128-255 when locale-specific matching is happening. However, in UCP mode, +unless certain options are set (see below), some of the classes are changed so +that Unicode character properties are used. This is achieved by replacing +POSIX classes with other sequences, as follows: +

+  [:alnum:]  becomes  \p{Xan}
+  [:alpha:]  becomes  \p{L}
+  [:blank:]  becomes  \h
+  [:cntrl:]  becomes  \p{Cc}
+  [:digit:]  becomes  \p{Nd}
+  [:lower:]  becomes  \p{Ll}
+  [:space:]  becomes  \p{Xps}
+  [:upper:]  becomes  \p{Lu}
+  [:word:]   becomes  \p{Xwd}
+
+Negated versions, such as [:^alpha:] use \P instead of \p. Four other POSIX +classes are handled specially in UCP mode: +

+

+[:graph:] +This matches characters that have glyphs that mark the page when printed. In +Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf +properties, except for: +

+  U+061C           Arabic Letter Mark
+  U+180E           Mongolian Vowel Separator
+  U+2066 - U+2069  Various "isolate"s
+
+
+

+

+[:print:] +This matches the same characters as [:graph:] plus space characters that are +not controls, that is, characters with the Zs property. +

+

+[:punct:] +This matches all characters that have the Unicode P (punctuation) property, +plus those characters with code points less than 256 that have the S (Symbol) +property. +

+

+[:xdigit:] +In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" +versions of those characters, whose Unicode code points start at U+FF10. This +is a change that was made in PCRE release 10.43 for Perl compatibility. +

+

+The other POSIX classes are unchanged by PCRE2_UCP, and match only characters +with code points less than 256. +

+

+There are two options that can be used to restrict the POSIX classes to ASCII +characters when PCRE2_UCP is set. The option PCRE2_EXTRA_ASCII_DIGIT affects +just [:digit:] and [:xdigit:]. Within a pattern, this can be set and unset by +(?aT) and (?-aT). The PCRE2_EXTRA_ASCII_POSIX option disables UCP processing +for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, +(?aP) and (?-aP) set and unset both these options for consistency. +

+
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
+

+In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly +syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of +word". PCRE2 treats these items as follows: +

+  [[:<:]]  is converted to  \b(?=\w)
+  [[:>:]]  is converted to  \b(?<=\w)
+
+Only these exact character sequences are recognized. A sequence such as +[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is +not compatible with Perl. It is provided to help migrations from other +environments, and is best not used in any new patterns. Note that \b matches +at the start and the end of a word (see +"Simple assertions" +above), and in a Perl-style pattern the preceding or following character +normally shows which is wanted, without the need for the assertions that are +used above in order to give exactly the POSIX behaviour. Note also that the +PCRE2_UCP option changes the meaning of \w (and therefore \b) by default, so +it also affects these POSIX sequences. +

+
VERTICAL BAR
+

+Vertical bar characters are used to separate alternative patterns. For example, +the pattern +

+  gilbert|sullivan
+
+matches either "gilbert" or "sullivan". Any number of alternatives may appear, +and an empty alternative is permitted (matching the empty string). The matching +process tries each alternative in turn, from left to right, and the first one +that succeeds is used. If the alternatives are within a group +(defined below), +"succeeds" means matching the rest of the main pattern as well as the +alternative in the group. +

+
INTERNAL OPTION SETTING
+

+The settings of several options can be changed within a pattern by a sequence +of letters enclosed between "(?" and ")". The following are Perl-compatible, +and are described in detail in the +pcre2api +documentation. The option letters are: +

+  i  for PCRE2_CASELESS
+  m  for PCRE2_MULTILINE
+  n  for PCRE2_NO_AUTO_CAPTURE
+  s  for PCRE2_DOTALL
+  x  for PCRE2_EXTENDED
+  xx for PCRE2_EXTENDED_MORE
+
+For example, (?im) sets caseless, multiline matching. It is also possible to +unset these options by preceding the relevant letters with a hyphen, for +example (?-im). The two "extended" options are not independent; unsetting +either one cancels the effects of both of them. +

+

+A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS +and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also +permitted. Only one hyphen may appear in the options string. If a letter +appears both before and after the hyphen, the option is unset. An empty options +setting "(?)" is allowed. Needless to say, it has no effect. +

+

+If the first character following (? is a circumflex, it causes all of the above +options to be unset. Letters may follow the circumflex to cause some options to +be re-instated, but a hyphen may not appear. +

+

+Some PCRE2-specific options can be changed by the same mechanism using these +pairs or individual letters: +

+  aD for PCRE2_EXTRA_ASCII_BSD
+  aS for PCRE2_EXTRA_ASCII_BSS
+  aW for PCRE2_EXTRA_ASCII_BSW
+  aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT
+  aT for PCRE2_EXTRA_ASCII_DIGIT
+  r  for PCRE2_EXTRA_CASELESS_RESTRICT
+  J  for PCRE2_DUPNAMES
+  U  for PCRE2_UNGREEDY
+
+However, except for 'r', these are not unset by (?^), which is equivalent to +(?-imnrsx). If 'a' is not followed by any of the upper case letters shown +above, it sets (or unsets) all the ASCII options. +

+

+PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EXTRA_ASCII_POSIX +is set, but including it in (?aP) means that (?-aP) suppresses all ASCII +restrictions for POSIX classes. +

+

+When one of these option changes occurs at top level (that is, not inside group +parentheses), the change applies until a subsequent change, or the end of the +pattern. An option change within a group (see below for a description of +groups) affects only that part of the group that follows it. At the end of the +group these options are reset to the state they were before the group. For +example, +

+  (a(?i)b)c
+
+matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not set +externally). Any changes made in one alternative do carry on into subsequent +branches within the same group. For example, +
+  (a(?i)b|c)
+
+matches "ab", "aB", "c", and "C", even though when matching "C" the first +branch is abandoned before the option setting. This is because the effects of +option settings happen at compile time. There would be some very weird +behaviour otherwise. +

+

+As a convenient shorthand, if any option settings are required at the start of +a non-capturing group (see the next section), the option letters may +appear between the "?" and the ":". Thus the two patterns +

+  (?i:saturday|sunday)
+  (?:(?i)saturday|sunday)
+
+match exactly the same set of strings. +

+

+Note: There are other PCRE2-specific options, applying to the whole +pattern, which can be set by the application when the compiling function is +called. In addition, the pattern can contain special leading sequences such as +(*CRLF) to override what the application has set or what has been defaulted. +Details are given in the section entitled +"Newline sequences" +above. There are also the (*UTF) and (*UCP) leading sequences that can be used +to set UTF and Unicode property modes; they are equivalent to setting the +PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set +the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, which lock out the use of the +(*UTF) and (*UCP) sequences. +

+
GROUPS
+

+Groups are delimited by parentheses (round brackets), which can be nested. +Turning part of a pattern into a group does two things: +
+
+1. It localizes a set of alternatives. For example, the pattern +

+  cat(aract|erpillar|)
+
+matches "cataract", "caterpillar", or "cat". Without the parentheses, it would +match "cataract", "erpillar" or an empty string. +
+
+2. It creates a "capture group". This means that, when the whole pattern +matches, the portion of the subject string that matched the group is passed +back to the caller, separately from the portion that matched the whole pattern. +(This applies only to the traditional matching function; the DFA matching +function does not support capturing.) +

+

+Opening parentheses are counted from left to right (starting from 1) to obtain +numbers for capture groups. For example, if the string "the red king" is +matched against the pattern +

+  the ((red|white) (king|queen))
+
+the captured substrings are "red king", "red", and "king", and are numbered 1, +2, and 3, respectively. +

+

+The fact that plain parentheses fulfil two functions is not always helpful. +There are often times when grouping is required without capturing. If an +opening parenthesis is followed by a question mark and a colon, the group +does not do any capturing, and is not counted when computing the number of any +subsequent capture groups. For example, if the string "the white queen" +is matched against the pattern +

+  the ((?:red|white) (king|queen))
+
+the captured substrings are "white queen" and "queen", and are numbered 1 and +2. The maximum number of capture groups is 65535. +

+

+As a convenient shorthand, if any option settings are required at the start of +a non-capturing group, the option letters may appear between the "?" and the +":". Thus the two patterns +

+  (?i:saturday|sunday)
+  (?:(?i)saturday|sunday)
+
+match exactly the same set of strings. Because alternative branches are tried +from left to right, and options are not reset until the end of the group is +reached, an option setting in one branch does affect subsequent branches, so +the above patterns match "SUNDAY" as well as "Saturday". +

+
DUPLICATE GROUP NUMBERS
+

+Perl 5.10 introduced a feature whereby each alternative in a group uses the +same numbers for its capturing parentheses. Such a group starts with (?| and is +itself a non-capturing group. For example, consider this pattern: +

+  (?|(Sat)ur|(Sun))day
+
+Because the two alternatives are inside a (?| group, both sets of capturing +parentheses are numbered one. Thus, when the pattern matches, you can look +at captured substring number one, whichever alternative matched. This construct +is useful when you want to capture part, but not all, of one of a number of +alternatives. Inside a (?| group, parentheses are numbered as usual, but the +number is reset at the start of each branch. The numbers of any capturing +parentheses that follow the whole group start after the highest number used in +any branch. The following example is taken from the Perl documentation. The +numbers underneath show in which buffer the captured content will be stored. +
+  # before  ---------------branch-reset----------- after
+  / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+  # 1            2         2  3        2     3     4
+
+A backreference to a capture group uses the most recent value that is set for +the group. The following pattern matches "abcabc" or "defdef": +
+  /(?|(abc)|(def))\1/
+
+In contrast, a subroutine call to a capture group always refers to the +first one in the pattern with the given number. The following pattern matches +"abcabc" or "defabc": +
+  /(?|(abc)|(def))(?1)/
+
+A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +

+

+If a +condition test +for a group's having matched refers to a non-unique number, the test is +true if any group with that number has matched. +

+

+An alternative approach to using this "branch reset" feature is to use +duplicate named groups, as described in the next section. +

+
NAMED CAPTURE GROUPS
+

+Identifying capture groups by number is simple, but it can be very hard to keep +track of the numbers in complicated patterns. Furthermore, if an expression is +modified, the numbers may change. To help with this difficulty, PCRE2 supports +the naming of capture groups. This feature was not added to Perl until release +5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0, +using the Python syntax. PCRE2 supports both the Perl and the Python syntax. +

+

+In PCRE2, a capture group can be named in one of three ways: (?<name>...) or +(?'name'...) as in Perl, or (?P<name>...) as in Python. Names may be up to 128 +code units long. When PCRE2_UTF is not set, they may contain only ASCII +alphanumeric characters and underscores, but must start with a non-digit. When +PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode +letter or Unicode decimal digit. In other words, group names must match one of +these patterns: +

+  ^[_A-Za-z][_A-Za-z0-9]*\z   when PCRE2_UTF is not set
+  ^[_\p{L}][_\p{L}\p{Nd}]*\z  when PCRE2_UTF is set
+
+References to capture groups from other parts of the pattern, such as +backreferences, +recursion, +and +conditions, +can all be made by name as well as by number. +

+

+Named capture groups are allocated numbers as well as names, exactly as +if the names were not present. In both PCRE2 and Perl, capture groups +are primarily identified by numbers; any names are just aliases for these +numbers. The PCRE2 API provides function calls for extracting the complete +name-to-number translation table from a compiled pattern, as well as +convenience functions for extracting captured substrings by name. +

+

+Warning: When more than one capture group has the same number, as +described in the previous section, a name given to one of them applies to all +of them. Perl allows identically numbered groups to have different names. +Consider this pattern, where there are two capture groups, both numbered 1: +

+  (?|(?<AA>aa)|(?<BB>bb))
+
+Perl allows this, with both names AA and BB as aliases of group 1. Thus, after +a successful match, both names yield the same value (either "aa" or "bb"). +

+

+In an attempt to reduce confusion, PCRE2 does not allow the same group number +to be associated with more than one name. The example above provokes a +compile-time error. However, there is still scope for confusion. Consider this +pattern: +

+  (?|(?<AA>aa)|(bb))
+
+Although the second group number 1 is not explicitly named, the name AA is +still an alias for any group 1. Whether the pattern matches "aa" or "bb", a +reference by name to group AA yields the matched string. +

+

+By default, a name must be unique within a pattern, except that duplicate names +are permitted for groups with the same number, for example: +

+  (?|(?<AA>aa)|(?<AA>bb))
+
+The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES +option at compile time, or by the use of (?J) within the pattern, as described +in the section entitled +"Internal Option Setting" +above. +

+

+Duplicate names can be useful for patterns where only one instance of the named +capture group can match. Suppose you want to match the name of a weekday, +either as a 3-letter abbreviation or as the full name, and in both cases you +want to extract the abbreviation. This pattern (ignoring the line breaks) does +the job: +

+  (?J)
+  (?<DN>Mon|Fri|Sun)(?:day)?|
+  (?<DN>Tue)(?:sday)?|
+  (?<DN>Wed)(?:nesday)?|
+  (?<DN>Thu)(?:rsday)?|
+  (?<DN>Sat)(?:urday)?
+
+There are five capture groups, but only one is ever set after a match. The +convenience functions for extracting the data by name returns the substring for +the first (and in this example, the only) group of that name that matched. This +saves searching to find which numbered group it was. (An alternative way of +solving this problem is to use a "branch reset" group, as described in the +previous section.) +

+

+If you make a backreference to a non-unique named group from elsewhere in the +pattern, the groups to which the name refers are checked in the order in which +they appear in the overall pattern. The first one that is set is used for the +reference. For example, this pattern matches both "foofoo" and "barbar" but not +"foobar" or "barfoo": +

+  (?J)(?:(?<n>foo)|(?<n>bar))\k<n>
+
+
+

+

+If you make a subroutine call to a non-unique named group, the one that +corresponds to the first occurrence of the name is used. In the absence of +duplicate numbers this is the one with the lowest number. +

+

+If you use a named reference in a condition +test (see the +section about conditions +below), either to check whether a capture group has matched, or to check for +recursion, all groups with the same name are tested. If the condition is true +for any one of them, the overall condition is true. This is the same behaviour +as testing by number. For further details of the interfaces for handling named +capture groups, see the +pcre2api +documentation. +

+
REPETITION
+

+Repetition is specified by quantifiers, which may follow any one of these +items: +

+  a literal data character
+  the dot metacharacter
+  the \C escape sequence
+  the \R escape sequence
+  the \X escape sequence
+  any escape sequence that matches a single character
+  a character class
+  a backreference
+  a parenthesized group (including lookaround assertions)
+  a subroutine call (recursive or otherwise)
+
+If a quantifier does not follow a repeatable item, an error occurs. The +general repetition quantifier specifies a minimum and maximum number of +permitted matches by giving two numbers in curly brackets (braces), separated +by a comma. The numbers must be less than 65536, and the first must be less +than or equal to the second. For example, +
+  z{2,4}
+
+matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special +character. If the second number is omitted, but the comma is present, there is +no upper limit; if the second number and the comma are both omitted, the +quantifier specifies an exact number of required matches. Thus +
+  [aeiou]{3,}
+
+matches at least 3 successive vowels, but may match many more, whereas +
+  \d{8}
+
+matches exactly 8 digits. If the first number is omitted, the lower limit is +taken as zero; in this case the upper limit must be present. +
+  X{,4} is interpreted as X{0,4}
+
+This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 10.43. In +earlier versions such a sequence was not interpreted as a quantifier. Other +regular expression engines may behave either way. +

+

+If the characters that follow an opening brace do not match the syntax of a +quantifier, the brace is taken as a literal character. In particular, this +means that {,} is a literal string of three characters. +

+

+Note that not every opening brace is potentially the start of a quantifier +because braces are used in other items such as \N{U+345} or \k{name}. +

+

+In UTF modes, quantifiers apply to characters rather than to individual code +units. Thus, for example, \x{100}{2} matches two characters, each of +which is represented by a two-byte sequence in a UTF-8 string. Similarly, +\X{3} matches three Unicode extended grapheme clusters, each of which may be +several code units long (and they may be of different lengths). +

+

+The quantifier {0} is permitted, causing the expression to behave as if the +previous item and the quantifier were not present. This may be useful for +capture groups that are referenced as +subroutines +from elsewhere in the pattern (but see also the section entitled +"Defining capture groups for use by reference only" +below). Except for parenthesized groups, items that have a {0} quantifier are +omitted from the compiled pattern. +

+

+For convenience, the three most common quantifiers have single-character +abbreviations: +

+  *    is equivalent to {0,}
+  +    is equivalent to {1,}
+  ?    is equivalent to {0,1}
+
+It is possible to construct infinite loops by following a group that can match +no characters with a quantifier that has no upper limit, for example: +
+  (a?)*
+
+Earlier versions of Perl and PCRE1 used to give an error at compile time for +such patterns. However, because there are cases where this can be useful, such +patterns are now accepted, but whenever an iteration of such a group matches no +characters, matching moves on to the next item in the pattern instead of +repeatedly matching an empty string. This does not prevent backtracking into +any of the iterations if a subsequent item fails to match. +

+

+By default, quantifiers are "greedy", that is, they match as much as possible +(up to the maximum number of permitted repetitions), without causing the rest +of the pattern to fail. The classic example of where this gives problems is in +trying to match comments in C programs. These appear between /* and */ and +within the comment, individual * and / characters may appear. An attempt to +match C comments by applying the pattern +

+  /\*.*\*/
+
+to the string +
+  /* first comment */  not comment  /* second comment */
+
+fails, because it matches the entire string owing to the greediness of the .* +item. However, if a quantifier is followed by a question mark, it ceases to be +greedy, and instead matches the minimum number of times possible, so the +pattern +
+  /\*.*?\*/
+
+does the right thing with C comments. The meaning of the various quantifiers is +not otherwise changed, just the preferred number of matches. Do not confuse +this use of question mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in +
+  \d??\d
+
+which matches one digit by preference, but can match two if that is the only +way the rest of the pattern matches. +

+

+If the PCRE2_UNGREEDY option is set (an option that is not available in Perl), +the quantifiers are not greedy by default, but individual ones can be made +greedy by following them with a question mark. In other words, it inverts the +default behaviour. +

+

+When a parenthesized group is quantified with a minimum repeat count that +is greater than 1 or with a limited maximum, more memory is required for the +compiled pattern, in proportion to the size of the minimum or maximum. +

+

+If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option (equivalent +to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is +implicitly anchored, because whatever follows will be tried against every +character position in the subject string, so there is no point in retrying the +overall match at any position after the first. PCRE2 normally treats such a +pattern as though it were preceded by \A. +

+

+In cases where it is known that the subject string contains no newlines, it is +worth setting PCRE2_DOTALL in order to obtain this optimization, or +alternatively, using ^ to indicate anchoring explicitly. +

+

+However, there are some cases where the optimization cannot be used. When .* +is inside capturing parentheses that are the subject of a backreference +elsewhere in the pattern, a match at the start may fail where a later one +succeeds. Consider, for example: +

+  (.*)abc\1
+
+If the subject is "xyz123abc123" the match point is the fourth character. For +this reason, such a pattern is not implicitly anchored. +

+

+Another case where implicit anchoring is not applied is when the leading .* is +inside an atomic group. Once again, a match at the start may fail where a later +one succeeds. Consider this pattern: +

+  (?>.*?a)b
+
+It matches "ab" in the subject "aab". The use of the backtracking control verbs +(*PRUNE) and (*SKIP) also disable this optimization, and there is an option, +PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. +

+

+When a capture group is repeated, the value captured is the substring that +matched the final iteration. For example, after +

+  (tweedle[dume]{3}\s*)+
+
+has matched "tweedledum tweedledee" the value of the captured substring is +"tweedledee". However, if there are nested capture groups, the corresponding +captured values may have been set in previous iterations. For example, after +
+  (a|(b))+
+
+matches "aba" the value of the second captured substring is "b". +

+
ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
+

+With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") +repetition, failure of what follows normally causes the repeated item to be +re-evaluated to see if a different number of repeats allows the rest of the +pattern to match. Sometimes it is useful to prevent this, either to change the +nature of the match, or to cause it fail earlier than it otherwise might, when +the author of the pattern knows there is no point in carrying on. +

+

+Consider, for example, the pattern \d+foo when applied to the subject line +

+  123456bar
+
+After matching all 6 digits and then failing to match "foo", the normal +action of the matcher is to try again with only 5 digits matching the \d+ +item, and then with 4, and so on, before ultimately failing. "Atomic grouping" +(a term taken from Jeffrey Friedl's book) provides the means for specifying +that once a group has matched, it is not to be re-evaluated in this way. +

+

+If we use atomic grouping for the previous example, the matcher gives up +immediately on failing to match "foo" the first time. The notation is a kind of +special parenthesis, starting with (?> as in this example: +

+  (?>\d+)foo
+
+Perl 5.28 introduced an experimental alphabetic form starting with (* which may +be easier to remember: +
+  (*atomic:\d+)foo
+
+This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. +

+

+An alternative description is that a group of this type matches exactly the +string of characters that an identical standalone pattern would match, if +anchored at the current point in the subject string. +

+

+Atomic groups are not capture groups. Simple cases such as the above example +can be thought of as a maximizing repeat that must swallow everything it can. +So, while both \d+ and \d+? are prepared to adjust the number of digits they +match in order to make the rest of the pattern match, (?>\d+) can only match +an entire sequence of digits. +

+

+Atomic groups in general can of course contain arbitrarily complicated +expressions, and can be nested. However, when the contents of an atomic +group is just a single repeated item, as in the example above, a simpler +notation, called a "possessive quantifier" can be used. This consists of an +additional + character following a quantifier. Using this notation, the +previous example can be rewritten as +

+  \d++foo
+
+Note that a possessive quantifier can be used with an entire group, for +example: +
+  (abc|xyz){2,3}+
+
+Possessive quantifiers are always greedy; the setting of the PCRE2_UNGREEDY +option is ignored. They are a convenient notation for the simpler forms of +atomic group. However, there is no difference in the meaning of a possessive +quantifier and the equivalent atomic group, though there may be a performance +difference; possessive quantifiers should be slightly faster. +

+

+The possessive quantifier syntax is an extension to the Perl 5.8 syntax. +Jeffrey Friedl originated the idea (and the name) in the first edition of his +book. Mike McCloskey liked it, so implemented it when he built Sun's Java +package, and PCRE1 copied it from there. It found its way into Perl at release +5.10. +

+

+PCRE2 has an optimization that automatically "possessifies" certain simple +pattern constructs. For example, the sequence A+B is treated as A++B because +there is no point in backtracking into a sequence of A's when B must follow. +This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting +the pattern with (*NO_AUTO_POSSESS). +

+

+When a pattern contains an unlimited repeat inside a group that can itself be +repeated an unlimited number of times, the use of an atomic group is the only +way to avoid some failing matches taking a very long time indeed. The pattern +

+  (\D+|<\d+>)*[!?]
+
+matches an unlimited number of substrings that either consist of non-digits, or +digits enclosed in <>, followed by either ! or ?. When it matches, it runs +quickly. However, if it is applied to +
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+it takes a long time before reporting failure. This is because the string can +be divided between the internal \D+ repeat and the external * repeat in a +large number of ways, and all have to be tried. (The example uses [!?] rather +than a single character at the end, because both PCRE2 and Perl have an +optimization that allows for fast failure when a single character is used. They +remember the last single character that is required for a match, and fail early +if it is not present in the string.) If the pattern is changed so that it uses +an atomic group, like this: +
+  ((?>\D+)|<\d+>)*[!?]
+
+sequences of non-digits cannot be broken, and failure happens quickly. +

+
BACKREFERENCES
+

+Outside a character class, a backslash followed by a digit greater than 0 (and +possibly further digits) is a backreference to a capture group earlier (that +is, to its left) in the pattern, provided there have been that many previous +capture groups. +

+

+However, if the decimal number following the backslash is less than 8, it is +always taken as a backreference, and causes an error only if there are not that +many capture groups in the entire pattern. In other words, the group that is +referenced need not be to the left of the reference for numbers less than 8. A +"forward backreference" of this type can make sense when a repetition is +involved and the group to the right has participated in an earlier iteration. +

+

+It is not possible to have a numerical "forward backreference" to a group whose +number is 8 or more using this syntax because a sequence such as \50 is +interpreted as a character defined in octal. See the subsection entitled +"Non-printing characters" +above +for further details of the handling of digits following a backslash. Other +forms of backreferencing do not suffer from this restriction. In particular, +there is no problem when named capture groups are used (see below). +

+

+Another way of avoiding the ambiguity inherent in the use of digits following a +backslash is to use the \g escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical: +

+  (ring), \1
+  (ring), \g1
+  (ring), \g{1}
+
+An unsigned number specifies an absolute reference without the ambiguity that +is present in the older syntax. It is also useful when literal digits follow +the reference. A signed number is a relative reference. Consider this example: +
+  (abc(def)ghi)\g{-1}
+
+The sequence \g{-1} is a reference to the capture group whose number is one +less than the number of the next group to be started, so in this example (where +the next group would be numbered 3) is it equivalent to \2, and \g{-2} would +be equivalent to \1. Note that if this construct is inside a capture group, +that group is included in the count, so in this example \g{-2} also refers to +group 1: +
+  (A)(\g{-2}B)
+
+The use of relative references can be helpful in long patterns, and also in +patterns that are created by joining together fragments that contain references +within themselves. +

+

+The sequence \g{+1} is a reference to the next capture group that is started +after this item, and \g{+2} refers to the one after that, and so on. This kind +of forward reference can be useful in patterns that repeat. Perl does not +support the use of + in this way. +

+

+A backreference matches whatever actually most recently matched the capture +group in the current subject string, rather than anything at all that matches +the group (see +"Groups as subroutines" +below for a way of doing that). So the pattern +

+  (sens|respons)e and \1ibility
+
+matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If caseful matching is in force at the time of the +backreference, the case of letters is relevant. For example, +
+  ((?i)rah)\s+\1
+
+matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original +capture group is matched caselessly. +

+

+There are several different ways of writing backreferences to named capture +groups. The .NET syntax is \k{name}, the Python syntax is (?=name), and the +original Perl syntax is \k<name> or \k'name'. All of these are now supported +by both Perl and PCRE2. Perl 5.10's unified backreference syntax, in which \g +can be used for both numeric and named references, is also supported by PCRE2. +We could rewrite the above example in any of the following ways: +

+  (?<p1>(?i)rah)\s+\k<p1>
+  (?'p1'(?i)rah)\s+\k{p1}
+  (?P<p1>(?i)rah)\s+(?P=p1)
+  (?<p1>(?i)rah)\s+\g{p1}
+
+A capture group that is referenced by name may appear in the pattern before or +after the reference. +

+

+There may be more than one backreference to the same group. If a group has not +actually been used in a particular match, backreferences to it always fail by +default. For example, the pattern +

+  (a|(bc))\2
+
+always fails if it starts to match "a" rather than "bc". However, if the +PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an +unset value matches an empty string. +

+

+Because there may be many capture groups in a pattern, all digits following a +backslash are taken as part of a potential backreference number. If the pattern +continues with a digit character, some delimiter must be used to terminate the +backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this +can be white space. Otherwise, the \g{} syntax or an empty comment (see +"Comments" +below) can be used. +

+
+Recursive backreferences +
+

+A backreference that occurs inside the group to which it refers fails when the +group is first used, so, for example, (a\1) never matches. However, such +references can be useful inside repeated groups. For example, the pattern +

+  (a|b\1)+
+
+matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of +the group, the backreference matches the character string corresponding to the +previous iteration. In order for this to work, the pattern must be such that +the first iteration does not need to match the backreference. This can be done +using alternation, as in the example above, or by a quantifier with a minimum +of zero. +

+

+For versions of PCRE2 less than 10.25, backreferences of this type used to +cause the group that they reference to be treated as an +atomic group. +This restriction no longer applies, and backtracking into such groups can occur +as normal. +

+
ASSERTIONS
+

+An assertion is a test on the characters following or preceding the current +matching point that does not consume any characters. The simple assertions +coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described +above. +

+

+More complicated assertions are coded as parenthesized groups. There are two +kinds: those that look ahead of the current position in the subject string, and +those that look behind it, and in each case an assertion may be positive (must +match for the assertion to be true) or negative (must not match for the +assertion to be true). An assertion group is matched in the normal way, +and if it is true, matching continues after it, but with the matching position +in the subject string reset to what it was before the assertion was processed. +

+

+The Perl-compatible lookaround assertions are atomic. If an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic assertions can be +useful. PCRE2 has some support for these, described in the section entitled +"Non-atomic assertions" +below, but they are not Perl-compatible. +

+

+A lookaround assertion may appear as the condition in a +conditional group +(see below). In this case, the result of matching the assertion determines +which branch of the condition is followed. +

+

+Assertion groups are not capture groups. If an assertion contains capture +groups within it, these are counted for the purposes of numbering the capture +groups in the whole pattern. Within each branch of an assertion, locally +captured substrings may be referenced in the usual way. For example, a sequence +such as (.)\g{-1} can be used to check that two adjacent characters are the +same. +

+

+When a branch within an assertion fails to match, any substrings that were +captured are discarded (as happens with any pattern branch that fails to +match). A negative assertion is true only when all its branches fail to match; +this means that no captured substrings are ever retained after a successful +negative assertion. When an assertion contains a matching branch, what happens +depends on the type of assertion. +

+

+For a positive assertion, internally captured substrings in the successful +branch are retained, and matching continues with the next pattern item after +the assertion. For a negative assertion, a matching branch means that the +assertion is not true. If such an assertion is being used as a condition in a +conditional group +(see below), captured substrings are retained, because matching continues with +the "no" branch of the condition. For other failing negative assertions, +control passes to the previous backtracking point, thus discarding any captured +strings within the assertion. +

+

+Most assertion groups may be repeated; though it makes no sense to assert the +same thing several times, the side effect of capturing in positive assertions +may occasionally be useful. However, an assertion that forms the condition for +a conditional group may not be quantified. PCRE2 used to restrict the +repetition of assertions, but from release 10.35 the only restriction is that +an unlimited maximum repetition is changed to be one more than the minimum. For +example, {3,} is treated as {3,4}. +

+
+Alphabetic assertion names +
+

+Traditionally, symbolic sequences such as (?= and (?<= have been used to +specify lookaround assertions. Perl 5.28 introduced some experimental +alphabetic alternatives which might be easier to remember. They all start with +(* instead of (? and must be written using lower case letters. PCRE2 supports +the following synonyms: +

+  (*positive_lookahead:  or (*pla: is the same as (?=
+  (*negative_lookahead:  or (*nla: is the same as (?!
+  (*positive_lookbehind: or (*plb: is the same as (?<=
+  (*negative_lookbehind: or (*nlb: is the same as (?<!
+
+For example, (*pla:foo) is the same assertion as (?=foo). In the following +sections, the various assertions are described using the original symbolic +forms. +

+
+Lookahead assertions +
+

+Lookahead assertions start with (?= for positive assertions and (?! for +negative assertions. For example, +

+  \w+(?=;)
+
+matches a word followed by a semicolon, but does not include the semicolon in +the match, and +
+  foo(?!bar)
+
+matches any occurrence of "foo" that is not followed by "bar". Note that the +apparently similar pattern +
+  (?!foo)bar
+
+does not find an occurrence of "bar" that is preceded by something other than +"foo"; it finds any occurrence of "bar" whatsoever, because the assertion +(?!foo) is always true when the next three characters are "bar". A +lookbehind assertion is needed to achieve the other effect. +

+

+If you want to force a matching failure at some point in a pattern, the most +convenient way to do it is with (?!) because an empty string always matches, so +an assertion that requires there not to be an empty string must always fail. +The backtracking control verb (*FAIL) or (*F) is a synonym for (?!). +

+
+Lookbehind assertions +
+

+Lookbehind assertions start with (?<= for positive assertions and (?<! for +negative assertions. For example, +

+  (?<!foo)bar
+
+does find an occurrence of "bar" that is not preceded by "foo". The contents of +a lookbehind assertion are restricted such that there must be a known maximum +to the lengths of all the strings it matches. There are two cases: +

+

+If every top-level alternative matches a fixed length, for example +

+  (?<=colour|color)
+
+there is a limit of 65535 characters to the lengths, which do not have to be +the same, as this example demonstrates. This is the only kind of lookbehind +supported by PCRE2 versions earlier than 10.43 and by the alternative matching +function pcre2_dfa_match(). +

+

+In PCRE2 10.43 and later, pcre2_match() supports lookbehind assertions in +which one or more top-level alternatives can match more than one string length, +for example +

+  (?<=colou?r)
+
+The maximum matching length for any branch of the lookbehind is limited to a +value set by the calling program (default 255 characters). Unlimited repetition +(for example \d*) is not supported. In some cases, the escape sequence \K +(see above) +can be used instead of a lookbehind assertion at the start of a pattern to get +round the length limit restriction. +

+

+In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\X and \R escapes, which can match different numbers of code units, are never +permitted in lookbehinds. +

+

+"Subroutine" +calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long +as the called capture group matches a limited-length string. However, +recursion, +that is, a "subroutine" call into a group that is already active, +is not supported. +

+

+PCRE2 supports backreferences in lookbehinds, but only if certain conditions +are met. The PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no +use of (?| in the pattern (it creates duplicate group numbers), and if the +backreference is by name, the name must be unique. Of course, the referenced +group must itself match a limited length substring. The following pattern +matches words containing at least two characters that begin and end with the +same character: +

+   \b(\w)\w++(?<=\1)
+
+

+

+Possessive quantifiers can be used in conjunction with lookbehind assertions to +specify efficient matching at the end of subject strings. Consider a simple +pattern such as +

+  abcd$
+
+when applied to a long string that does not match. Because matching proceeds +from left to right, PCRE2 will look for each "a" in the subject and then see if +what follows matches the rest of the pattern. If the pattern is specified as +
+  ^.*abcd$
+
+the initial .* matches the entire string at first, but when this fails (because +there is no following "a"), it backtracks to match all but the last character, +then all but the last two characters, and so on. Once again the search for "a" +covers the entire string, from right to left, so we are no better off. However, +if the pattern is written as +
+  ^.*+(?<=abcd)
+
+there can be no backtracking for the .*+ item because of the possessive +quantifier; it can match only the entire string. The subsequent lookbehind +assertion does a single test on the last four characters. If it fails, the +match fails immediately. For long strings, this approach makes a significant +difference to the processing time. +

+
+Using multiple assertions +
+

+Several assertions (of any sort) may occur in succession. For example, +

+  (?<=\d{3})(?<!999)foo
+
+matches "foo" preceded by three digits that are not "999". Notice that each of +the assertions is applied independently at the same point in the subject +string. First there is a check that the previous three characters are all +digits, and then there is a check that the same three characters are not "999". +This pattern does not match "foo" preceded by six characters, the first +of which are digits and the last three of which are not "999". For example, it +doesn't match "123abcfoo". A pattern to do that is +
+  (?<=\d{3}...)(?<!999)foo
+
+This time the first assertion looks at the preceding six characters, checking +that the first three are digits, and then the second assertion checks that the +preceding three characters are not "999". +

+

+Assertions can be nested in any combination. For example, +

+  (?<=(?<!foo)bar)baz
+
+matches an occurrence of "baz" that is preceded by "bar" which in turn is not +preceded by "foo", while +
+  (?<=\d{3}(?!999)...)foo
+
+is another pattern that matches "foo" preceded by three digits and any three +characters that are not "999". +

+
NON-ATOMIC ASSERTIONS
+

+Traditional lookaround assertions are atomic. That is, if an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic positive assertions +can be useful. PCRE2 provides these using the following syntax: +

+  (*non_atomic_positive_lookahead:  or (*napla: or (?*
+  (*non_atomic_positive_lookbehind: or (*naplb: or (?<*
+
+Consider the problem of finding the right-most word in a string that also +appears earlier in the string, that is, it must appear at least twice in total. +This pattern returns the required result as captured substring 1: +
+  ^(?x)(*napla: .* \b(\w++)) (?> .*? \b\1\b ){2}
+
+For a subject such as "word1 word2 word3 word2 word3 word4" the result is +"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the +"x" option, which causes white space (introduced for readability) to be +ignored. Inside the assertion, the greedy .* at first consumes the entire +string, but then has to backtrack until the rest of the assertion can match a +word, which is captured by group 1. In other words, when the assertion first +succeeds, it captures the right-most word in the string. +

+

+The current matching point is then reset to the start of the subject, and the +rest of the pattern match checks for two occurrences of the captured word, +using an ungreedy .*? to scan from the left. If this succeeds, we are done, but +if the last word in the string does not occur twice, this part of the pattern +fails. If a traditional atomic lookahead (?= or (*pla: had been used, the +assertion could not be re-entered, and the whole match would fail. The pattern +would succeed only if the very last word in the subject was found twice. +

+

+Using a non-atomic lookahead, however, means that when the last word does not +occur twice in the string, the lookahead can backtrack and find the second-last +word, and so on, until either the match succeeds, or all words have been +tested. +

+

+Two conditions must be met for a non-atomic assertion to be useful: the +contents of one or more capturing groups must change after a backtrack into the +assertion, and there must be a backreference to a changed group later in the +pattern. If this is not the case, the rest of the pattern match fails exactly +as before because nothing has changed, so using a non-atomic assertion just +wastes resources. +

+

+There is one exception to backtracking into a non-atomic assertion. If an +(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That +is, a subsequent match failure cannot backtrack into the assertion. +

+

+Non-atomic assertions are not supported by the alternative matching function +pcre2_dfa_match(). They are supported by JIT, but only if they do not +contain any control verbs such as (*ACCEPT). (This may change in future). Note +that assertions that appear as conditions for +conditional groups +(see below) must be atomic. +

+
SCRIPT RUNS
+

+In concept, a script run is a sequence of characters that are all from the same +Unicode script such as Latin or Greek. However, because some scripts are +commonly used together, and because some diacritical and other marks are used +with multiple scripts, it is not that simple. There is a full description of +the rules that PCRE2 uses in the section entitled +"Script Runs" +in the +pcre2unicode +documentation. +

+

+If part of a pattern is enclosed between (*script_run: or (*sr: and a closing +parenthesis, it fails if the sequence of characters that it matches are not a +script run. After a failure, normal backtracking occurs. Script runs can be +used to detect spoofing attacks using characters that look the same, but are +from different scripts. The string "paypal.com" is an infamous example, where +the letters could be a mixture of Latin and Cyrillic. This pattern ensures that +the matched characters in a sequence of non-spaces that follow white space are +a script run: +

+  \s+(*sr:\S+)
+
+To be sure that they are all from the Latin script (for example), a lookahead +can be used: +
+  \s+(?=\p{Latin})(*sr:\S+)
+
+This works as long as the first character is expected to be a character in that +script, and not (for example) punctuation, which is allowed with any script. If +this is not the case, a more creative lookahead is needed. For example, if +digits, underscore, and dots are permitted at the start: +
+  \s+(?=[0-9_.]*\p{Latin})(*sr:\S+)
+
+
+

+

+In many cases, backtracking into a script run pattern fragment is not +desirable. The script run can employ an atomic group to prevent this. Because +this is a common requirement, a shorthand notation is provided by +(*atomic_script_run: or (*asr: +

+  (*asr:...) is the same as (*sr:(?>...))
+
+Note that the atomic group is inside the script run. Putting it outside would +not prevent backtracking into the script run pattern. +

+

+Support for script runs is not available if PCRE2 is compiled without Unicode +support. A compile-time error is given if any of the above constructs is +encountered. Script runs are not supported by the alternate matching function, +pcre2_dfa_match() because they use the same mechanism as capturing +parentheses. +

+

+Warning: The (*ACCEPT) control verb +(see below) +should not be used within a script run group, because it causes an immediate +exit from the group, bypassing the script run checking. +

+
CONDITIONAL GROUPS
+

+It is possible to cause the matching process to obey a pattern fragment +conditionally or to choose between two alternative fragments, depending on +the result of an assertion, or whether a specific capture group has +already been matched. The two possible forms of conditional group are: +

+  (?(condition)yes-pattern)
+  (?(condition)yes-pattern|no-pattern)
+
+If the condition is satisfied, the yes-pattern is used; otherwise the +no-pattern (if present) is used. An absent no-pattern is equivalent to an empty +string (it always matches). If there are more than two alternatives in the +group, a compile-time error occurs. Each of the two alternatives may itself +contain nested groups of any form, including conditional groups; the +restriction to two alternatives applies only at the level of the condition +itself. This pattern fragment is an example where the alternatives are complex: +
+  (?(1) (A|B|C) | (D | (?(2)E|F) | E) )
+
+
+

+

+There are five kinds of condition: references to capture groups, references to +recursion, two pseudo-conditions called DEFINE and VERSION, and assertions. +

+
+Checking for a used capture group by number +
+

+If the text between the parentheses consists of a sequence of digits, the +condition is true if a capture group of that number has previously matched. If +there is more than one capture group with the same number (see the earlier +section about duplicate group numbers), +the condition is true if any of them have matched. An alternative notation, +which is a PCRE2 extension, not supported by Perl, is to precede the digits +with a plus or minus sign. In this case, the group number is relative rather +than absolute. The most recently opened capture group (which could be enclosing +this condition) can be referenced by (?(-1), the next most recent by (?(-2), +and so on. Inside loops it can also make sense to refer to subsequent groups. +The next capture group to be opened can be referenced as (?(+1), and so on. The +value zero in any of these forms is not used; it provokes a compile-time error. +

+

+Consider the following pattern, which contains non-significant white space to +make it more readable (assume the PCRE2_EXTENDED option) and to divide it into +three parts for ease of discussion: +

+  ( \( )?    [^()]+    (?(1) \) )
+
+The first part matches an optional opening parenthesis, and if that +character is present, sets it as the first captured substring. The second part +matches one or more characters that are not parentheses. The third part is a +conditional group that tests whether or not the first capture group +matched. If it did, that is, if subject started with an opening parenthesis, +the condition is true, and so the yes-pattern is executed and a closing +parenthesis is required. Otherwise, since no-pattern is not present, the +conditional group matches nothing. In other words, this pattern matches a +sequence of non-parentheses, optionally enclosed in parentheses. +

+

+If you were embedding this pattern in a larger one, you could use a relative +reference: +

+  ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...
+
+This makes the fragment independent of the parentheses in the larger pattern. +

+
+Checking for a used capture group by name +
+

+Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used +capture group by name. For compatibility with earlier versions of PCRE1, which +had this facility before Perl, the syntax (?(name)...) is also recognized. +Note, however, that undelimited names consisting of the letter R followed by +digits are ambiguous (see the following section). Rewriting the above example +to use a named group gives this: +

+  (?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )
+
+If the name used in a condition of this kind is a duplicate, the test is +applied to all groups of the same name, and is true if any one of them has +matched. +

+
+Checking for pattern recursion +
+

+"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled +"Recursive patterns" +and +"Groups as subroutines" +below for details of recursion and subroutine calls. +

+

+If a condition is the string (R), and there is no capture group with the name +R, the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any capture group. If digits follow the letter R, +and there is no group with that name, the condition is true if the most recent +call is into a group with the given number, which must exist somewhere in the +overall pattern. This is a contrived example that is equivalent to a+b: +

+  ((?(R1)a+|(?1)b))
+
+However, in both cases, if there is a capture group with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?<R1>) to the above pattern completely changes its meaning. +

+

+If a name preceded by ampersand follows the letter R, for example: +

+  (?(R&name)...)
+
+the condition is true if the most recent recursion is into a group of that name +(which must exist within the pattern). +

+

+This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all groups of the same name, and is true if any one of +them is the most recent recursion. +

+

+At "top level", all these recursion test conditions are false. +

+
+Defining capture groups for use by reference only +
+

+If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one +alternative in the rest of the conditional group. It is always skipped if +control reaches this point in the pattern; the idea of DEFINE is that it can be +used to define subroutines that can be referenced from elsewhere. (The use of +subroutines +is described below.) For example, a pattern to match an IPv4 address such as +"192.168.23.245" could be written like this (ignore white space and line +breaks): +

+  (?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
+  \b (?&byte) (\.(?&byte)){3} \b
+
+The first part of the pattern is a DEFINE group inside which another group +named "byte" is defined. This matches an individual component of an IPv4 +address (a number less than 256). When matching takes place, this part of the +pattern is skipped because DEFINE acts like a false condition. The rest of the +pattern uses references to the named group to match the four dot-separated +components of an IPv4 address, insisting on a word boundary at each end. +

+
+Checking the PCRE2 version +
+

+Programs that link with a PCRE2 library can check the version by calling +pcre2_config() with appropriate arguments. Users of applications that do +not have access to the underlying code cannot do this. A special "condition" +called VERSION exists to allow such users to discover which version of PCRE2 +they are dealing with by using this condition to match a string such as +"yesno". VERSION must be followed either by "=" or ">=" and a version number. +For example: +

+  (?(VERSION>=10.4)yes|no)
+
+This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or +"no" otherwise. The fractional part of the version number may not contain more +than two digits. +

+
+Assertion conditions +
+

+If the condition is not in any of the above formats, it must be a parenthesized +assertion. This may be a positive or negative lookahead or lookbehind +assertion. However, it must be a traditional atomic assertion, not one of the +non-atomic assertions. +

+

+Consider this pattern, again containing non-significant white space, and with +the two alternatives on the second line: +

+  (?(?=[^a-z]*[a-z])
+  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
+
+The condition is a positive lookahead assertion that matches an optional +sequence of non-letters followed by a letter. In other words, it tests for the +presence of at least one letter in the subject. If a letter is found, the +subject is matched against the first alternative; otherwise it is matched +against the second. This pattern matches strings in one of the two forms +dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. +

+

+When an assertion that is a condition contains capture groups, any +capturing that occurs in a matching branch is retained afterwards, for both +positive and negative assertions, because matching always continues after the +assertion, whether it succeeds or fails. (Compare non-conditional assertions, +for which captures are retained only for positive assertions that succeed.) +

+
COMMENTS
+

+There are two ways of including comments in patterns that are processed by +PCRE2. In both cases, the start of the comment must not be in a character +class, nor in the middle of any other sequence of related characters such as +(?: or a group name or number. The characters that make up a comment play +no part in the pattern matching. +

+

+The sequence (?# marks the start of a comment that continues up to the next +closing parenthesis. Nested parentheses are not permitted. If the +PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character +also introduces a comment, which in this case continues to immediately after +the next newline character or character sequence in the pattern. Which +characters are interpreted as newlines is controlled by an option passed to the +compiling function or by a special sequence at the start of the pattern, as +described in the section entitled +"Newline conventions" +above. Note that the end of this type of comment is a literal newline sequence +in the pattern; escape sequences that happen to represent a newline do not +count. For example, consider this pattern when PCRE2_EXTENDED is set, and the +default newline convention (a single linefeed character) is in force: +

+  abc #comment \n still comment
+
+On encountering the # character, pcre2_compile() skips along, looking for +a newline in the pattern. The sequence \n is still literal at this stage, so +it does not terminate the comment. Only an actual character with the code value +0x0a (the default newline) does so. +

+
RECURSIVE PATTERNS
+

+Consider the problem of matching a string in parentheses, allowing for +unlimited nested parentheses. Without the use of recursion, the best that can +be done is to use a pattern that matches up to some fixed depth of nesting. It +is not possible to handle an arbitrary nesting depth. +

+

+For some time, Perl has provided a facility that allows regular expressions to +recurse (amongst other things). It does this by interpolating Perl code in the +expression at run time, and the code can refer to the expression itself. A Perl +pattern using code interpolation to solve the parentheses problem can be +created like this: +

+  $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;
+
+The (?p{...}) item interpolates Perl code at run time, and in this case refers +recursively to the pattern in which it appears. +

+

+Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it +supports special syntax for recursion of the entire pattern, and also for +individual capture group recursion. After its introduction in PCRE1 and Python, +this kind of recursion was subsequently introduced into Perl at release 5.10. +

+

+A special item that consists of (? followed by a number greater than zero and a +closing parenthesis is a recursive subroutine call of the capture group of the +given number, provided that it occurs inside that group. (If not, it is a +non-recursive subroutine +call, which is described in the next section.) The special item (?R) or (?0) is +a recursive call of the entire regular expression. +

+

+This PCRE2 pattern solves the nested parentheses problem (assume the +PCRE2_EXTENDED option is set so that white space is ignored): +

+  \( ( [^()]++ | (?R) )* \)
+
+First it matches an opening parenthesis. Then it matches any number of +substrings which can either be a sequence of non-parentheses, or a recursive +match of the pattern itself (that is, a correctly parenthesized substring). +Finally there is a closing parenthesis. Note the use of a possessive quantifier +to avoid backtracking into sequences of non-parentheses. +

+

+If this were part of a larger pattern, you would not want to recurse the entire +pattern, so instead you could use this: +

+  ( \( ( [^()]++ | (?1) )* \) )
+
+We have put the pattern into parentheses, and caused the recursion to refer to +them instead of the whole pattern. +

+

+In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. Instead of (?1) in the +pattern above you can write (?-2) to refer to the second most recently opened +parentheses preceding the recursion. In other words, a negative number counts +capturing parentheses leftwards from the point at which it is encountered. +

+

+Be aware however, that if +duplicate capture group numbers +are in use, relative references refer to the earliest group with the +appropriate number. Consider, for example: +

+  (?|(a)|(b)) (c) (?-2)
+
+The first two capture groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +

+

+It is also possible to refer to subsequent capture groups, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +non-recursive subroutine +calls, as described in the next section. +

+

+An alternative approach is to use named parentheses. The Perl syntax for this +is (?&name); PCRE1's earlier syntax (?P>name) is also supported. We could +rewrite the above example as follows: +

+  (?<pn> \( ( [^()]++ | (?&pn) )* \) )
+
+If there is more than one group with the same name, the earliest one is +used. +

+

+The example pattern that we have been looking at contains nested unlimited +repeats, and so the use of a possessive quantifier for matching strings of +non-parentheses is important when applying the pattern to strings that do not +match. For example, when this pattern is applied to +

+  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+
+it yields "no match" quickly. However, if a possessive quantifier is not used, +the match runs for a very long time indeed because there are so many different +ways the + and * repeats can carve up the subject, and all have to be tested +before failure can be reported. +

+

+At the end of a match, the values of capturing parentheses are those from +the outermost level. If you want to obtain intermediate values, a callout +function can be used (see below and the +pcre2callout +documentation). If the pattern above is matched against +

+  (ab(cd)ef)
+
+the value for the inner capturing parentheses (numbered 2) is "ef", which is +the last value taken on at the top level. If a capture group is not matched at +the top level, its final captured value is unset, even if it was (temporarily) +set at a deeper level during the matching process. +

+

+Do not confuse the (?R) item with the condition (R), which tests for recursion. +Consider this pattern, which matches text in angle brackets, allowing for +arbitrary nesting. Only digits are allowed in nested brackets (that is, when +recursing), whereas any characters are permitted at the outer level. +

+  < (?: (?(R) \d++  | [^<>]*+) | (?R)) * >
+
+In this pattern, (?(R) is the start of a conditional group, with two different +alternatives for the recursive and non-recursive cases. The (?R) item is the +actual recursive call. +

+
+Differences in recursion processing between PCRE2 and Perl +
+

+Some former differences between PCRE2 and Perl no longer exist. +

+

+Before release 10.30, recursion processing in PCRE2 differed from Perl in that +a recursive subroutine call was always treated as an atomic group. That is, +once it had matched some of the subject string, it was never re-entered, even +if it contained untried alternatives and there was a subsequent matching +failure. (Historical note: PCRE implemented recursion before Perl did.) +

+

+Starting with release 10.30, recursive subroutine calls are no longer treated +as atomic. That is, they can be re-entered to try unused alternatives if there +is a matching failure later in the pattern. This is now compatible with the way +Perl works. If you want a subroutine call to be atomic, you must explicitly +enclose it in an atomic group. +

+

+Supporting backtracking into recursions simplifies certain types of recursive +pattern. For example, this pattern matches palindromic strings: +

+  ^((.)(?1)\2|.?)$
+
+The second branch in the group matches a single central character in the +palindrome when there are an odd number of characters, or nothing when there +are an even number of characters, but in order to work it has to be able to try +the second case when the rest of the pattern match fails. If you want to match +typical palindromic phrases, the pattern has to ignore all non-word characters, +which can be done like this: +
+  ^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$
+
+If run with the PCRE2_CASELESS option, this pattern matches phrases such as "A +man, a plan, a canal: Panama!". Note the use of the possessive quantifier *+ to +avoid backtracking into sequences of non-word characters. Without this, PCRE2 +takes a great deal longer (ten times or more) to match typical phrases, and +Perl takes so long that you think it has gone into a loop. +

+

+Another way in which PCRE2 and Perl used to differ in their recursion +processing is in the handling of captured values. Formerly in Perl, when a +group was called recursively or as a subroutine (see the next section), it +had no access to any values that were captured outside the recursion, whereas +in PCRE2 these values can be referenced. Consider this pattern: +

+  ^(.)(\1|a(?2))
+
+This pattern matches "bab". The first capturing parentheses match "b", then in +the second group, when the backreference \1 fails to match "b", the second +alternative matches "a" and then recurses. In the recursion, \1 does now match +"b" and so the whole match succeeds. This match used to fail in Perl, but in +later versions (I tried 5.024) it now works. +

+
GROUPS AS SUBROUTINES
+

+If the syntax for a recursive group call (either by number or by name) is used +outside the parentheses to which it refers, it operates a bit like a subroutine +in a programming language. More accurately, PCRE2 treats the referenced group +as an independent subpattern which it tries to match at the current matching +position. The called group may be defined before or after the reference. A +numbered reference can be absolute or relative, as in these examples: +

+  (...(absolute)...)...(?2)...
+  (...(relative)...)...(?-1)...
+  (...(?+1)...(relative)...
+
+An earlier example pointed out that the pattern +
+  (sens|respons)e and \1ibility
+
+matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If instead the pattern +
+  (sens|respons)e and (?1)ibility
+
+is used, it does match "sense and responsibility" as well as the other two +strings. Another example is given in the discussion of DEFINE above. +

+

+Like recursions, subroutine calls used to be treated as atomic, but this +changed at PCRE2 release 10.30, so backtracking into subroutine calls can now +occur. However, any capturing parentheses that are set during the subroutine +call revert to their previous values afterwards. +

+

+Processing options such as case-independence are fixed when a group is +defined, so if it is used as a subroutine, such options cannot be changed for +different calls. For example, consider this pattern: +

+  (abc)(?i:(?-1))
+
+It matches "abcabc". It does not match "abcABC" because the change of +processing option does not affect the called group. +

+

+The behaviour of +backtracking control verbs +in groups when called as subroutines is described in the section entitled +"Backtracking verbs in subroutines" +below. +

+
ONIGURUMA SUBROUTINE SYNTAX
+

+For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for calling a group as a subroutine, possibly recursively. Here are two +of the examples used above, rewritten using this syntax: +

+  (?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
+  (sens|respons)e and \g'1'ibility
+
+PCRE2 supports an extension to Oniguruma: if a number is preceded by a +plus or a minus sign it is taken as a relative reference. For example: +
+  (abc)(?i:\g<-1>)
+
+Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not +synonymous. The former is a backreference; the latter is a subroutine call. +

+
CALLOUTS
+

+Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl +code to be obeyed in the middle of matching a regular expression. This makes it +possible, amongst other things, to extract different substrings that match the +same pair of parentheses when there is a repetition. +

+

+PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl +code. The feature is called "callout". The caller of PCRE2 provides an external +function by putting its entry point in a match context using the function +pcre2_set_callout(), and then passing that context to pcre2_match() +or pcre2_dfa_match(). If no match context is passed, or if the callout +entry point is set to NULL, callouts are disabled. +

+

+Within a regular expression, (?C<arg>) indicates a point at which the external +function is to be called. There are two kinds of callout: those with a +numerical argument and those with a string argument. (?C) on its own with no +argument is treated as (?C0). A numerical argument allows the application to +distinguish between different callouts. String arguments were added for release +10.20 to make it possible for script languages that use PCRE2 to embed short +scripts within patterns in a similar way to Perl. +

+

+During matching, when PCRE2 reaches a callout point, the external function is +called. It is provided with the number or string argument of the callout, the +position in the pattern, and one item of data that is also set in the match +block. The callout function may cause matching to proceed, to backtrack, or to +fail. +

+

+By default, PCRE2 implements a number of optimizations at matching time, and +one side-effect is that sometimes callouts are skipped. If you need all +possible callouts to happen, you need to set options that disable the relevant +optimizations. More details, including a complete description of the +programming interface to the callout function, are given in the +pcre2callout +documentation. +

+
+Callouts with numerical arguments +
+

+If you just want to have a means of identifying different callout points, put a +number less than 256 after the letter C. For example, this pattern has two +callout points: +

+  (?C1)abc(?C2)def
+
+If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical +callouts are automatically installed before each item in the pattern. They are +all numbered 255. If there is a conditional group in the pattern whose +condition is an assertion, an additional callout is inserted just before the +condition. An explicit callout may also be set at this position, as in this +example: +
+  (?(?C9)(?=a)abc|def)
+
+Note that this applies only to assertion conditions, not to other types of +condition. +

+
+Callouts with string arguments +
+

+A delimited string may be used instead of a number as a callout argument. The +starting delimiter must be one of ` ' " ^ % # $ { and the ending delimiter is +the same as the start, except for {, where the ending delimiter is }. If the +ending delimiter is needed within the string, it must be doubled. For +example: +

+  (?C'ab ''c'' d')xyz(?C{any text})pqr
+
+The doubling is removed before the string is passed to the callout function. +

+
BACKTRACKING CONTROL
+

+There are a number of special "Backtracking Control Verbs" (to use Perl's +terminology) that modify the behaviour of backtracking during matching. They +are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form, +and may behave differently depending on whether or not a name argument is +present. The names are not required to be unique within the pattern. +

+

+By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +

+

+When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \Q, \E, and sequences such as +\x{100} that define character code points. Character type escapes such as \d +are faulted. +

+

+A closing parenthesis can be included in a name either as \) or between \Q +and \E. In addition to backslash processing, if the PCRE2_EXTENDED or +PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is +skipped, and #-comments are recognized, exactly as in the rest of the pattern. +PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless +PCRE2_ALT_VERBNAMES is also set. +

+

+The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern. Except for +(*ACCEPT), they may not be quantified. +

+

+Since these verbs are specifically related to backtracking, most of them can be +used only when the pattern is to be matched using the traditional matching +function, because that uses a backtracking algorithm. With the exception of +(*FAIL), which behaves like a failing negative assertion, the backtracking +control verbs cause an error if encountered by the DFA matching function. +

+

+The behaviour of these verbs in +repeated groups, +assertions, +and in +capture groups called as subroutines +(whether or not recursively) is documented below. +

+
+Optimizations that affect backtracking verbs +
+

+PCRE2 contains some optimizations that are used to speed up matching by running +some checks at the start of each match attempt. For example, it may know the +minimum length of matching subject, or that a particular character must be +present. When one of these optimizations bypasses the running of a match, any +included backtracking verbs will not, of course, be processed. You can suppress +the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option +when calling pcre2_compile(), or by starting the pattern with +(*NO_START_OPT). There is more discussion of this option in the section +entitled +"Compiling a pattern" +in the +pcre2api +documentation. +

+

+Experiments with Perl suggest that it too has similar optimizations, and like +PCRE2, turning them off can change the result of a match. +

+
+Verbs that act immediately +
+

+The following verbs act as soon as they are encountered. +

+   (*ACCEPT) or (*ACCEPT:NAME)
+
+This verb causes the match to end successfully, skipping the remainder of the +pattern. However, when it is inside a capture group that is called as a +subroutine, only that group is ended successfully. Matching then continues +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +assertion succeeds; in a negative assertion, the assertion fails. +

+

+If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For +example: +

+  A((?:A|B(*ACCEPT)|C)D)
+
+This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by +the outer parentheses. +

+

+(*ACCEPT) is the only backtracking verb that is allowed to be quantified +because an ungreedy quantification with a minimum of zero acts only when a +backtrack happens. Consider, for example, +

+  (A(*ACCEPT)??B)C
+
+where A, B, and C may be complex expressions. After matching "A", the matcher +processes "BC"; if that fails, causing a backtrack, (*ACCEPT) is triggered and +the match succeeds. In both cases, all but C is captured. Whereas (*COMMIT) +(see below) means "fail on backtrack", a repeated (*ACCEPT) of this type means +"succeed on backtrack". +

+

+Warning: (*ACCEPT) should not be used within a script run group, because +it causes an immediate exit from the group, bypassing the script run checking. +

+  (*FAIL) or (*FAIL:NAME)
+
+This verb causes a matching failure, forcing backtracking to occur. It may be +abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl +documentation notes that it is probably useful only when combined with (?{}) or +(??{}). Those are, of course, Perl features that are not present in PCRE2. The +nearest equivalent is the callout feature, as for example in this pattern: +
+  a+(?C)(*FAIL)
+
+A match with the string "aaaa" always fails, but the callout is taken before +each backtrack happens (in this example, 10 times). +

+

+(*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is recorded just before +the verb acts. +

+
+Recording which path was taken +
+

+There is one verb whose main purpose is to track how a match was arrived at, +though it also has a secondary use in conjunction with advancing the match +starting point (see (*SKIP) below). +

+  (*MARK:NAME) or (*:NAME)
+
+A name is always required with this verb. For all the other backtracking +control verbs, a NAME argument is optional. +

+

+When a match succeeds, the name of the last-encountered mark name on the +matching path is passed back to the caller as described in the section entitled +"Other information about the match" +in the +pcre2api +documentation. This applies to all instances of (*MARK) and other verbs, +including those inside assertions and atomic groups. However, there are +differences in those cases when (*MARK) is used in conjunction with (*SKIP) as +described below. +

+

+The mark name that was last encountered on the matching path is passed back. A +verb without a NAME argument is ignored for this purpose. Here is an example of +pcre2test output, where the "mark" modifier requests the retrieval and +outputting of (*MARK) data: +

+    re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
+  data> XY
+   0: XY
+  MK: A
+  XZ
+   0: XZ
+  MK: B
+
+The (*MARK) name is tagged with "MK:" in this output, and in this example it +indicates which of the two alternatives matched. This is a more efficient way +of obtaining this information than putting each alternative in its own +capturing parentheses. +

+

+If a verb with a name is encountered in a positive assertion that is true, the +name is recorded and passed back if it is the last-encountered. This does not +happen for negative assertions or failing positive assertions. +

+

+After a partial match or a failed match, the last encountered name in the +entire match process is returned. For example: +

+    re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
+  data> XP
+  No match, mark = B
+
+Note that in this unanchored example the mark is retained from the match +attempt that started at the letter "X" in the subject. Subsequent match +attempts starting at "P" and then with an empty string do not get as far as the +(*MARK) item, but nevertheless do not reset it. +

+

+If you are interested in (*MARK) values after failed matches, you should +probably set the PCRE2_NO_START_OPTIMIZE option +(see above) +to ensure that the match is always attempted. +

+
+Verbs that act after backtracking +
+

+The following verbs do nothing when they are encountered. Matching continues +with what follows, but if there is a subsequent match failure, causing a +backtrack to the verb, a failure is forced. That is, backtracking cannot pass +to the left of the verb. However, when one of these verbs appears inside an +atomic group or in a lookaround assertion that is true, its effect is confined +to that group, because once the group has been matched, there is never any +backtracking into it. Backtracking from beyond an assertion or an atomic group +ignores the entire group, and seeks a preceding backtracking point. +

+

+These verbs differ in exactly what kind of failure occurs when backtracking +reaches them. The behaviour described below is what happens when the verb is +not in a subroutine or an assertion. Subsequent sections cover these special +cases. +

+  (*COMMIT) or (*COMMIT:NAME)
+
+This verb causes the whole match to fail outright if there is a later matching +failure that causes backtracking to reach it. Even if the pattern is +unanchored, no further attempts to find a match by advancing the starting point +take place. If (*COMMIT) is the only backtracking verb that is encountered, +once it has been passed pcre2_match() is committed to finding a match at +the current starting point, or not at all. For example: +
+  a+(*COMMIT)b
+
+This matches "xxaab" but not "aacaab". It can be thought of as a kind of +dynamic anchor, or "I've started, so I must finish." +

+

+The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names that are set with +(*MARK), ignoring those set by any of the other backtracking verbs. +

+

+If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +match does not always guarantee that a match must be at this starting point. +

+

+Note that (*COMMIT) at the start of a pattern is not the same as an anchor, +unless PCRE2's start-of-match optimizations are turned off, as shown in this +output from pcre2test: +

+    re> /(*COMMIT)abc/
+  data> xyzabc
+   0: abc
+  data>
+  re> /(*COMMIT)abc/no_start_optimize
+  data> xyzabc
+  No match
+
+For the first pattern, PCRE2 knows that any match must start with "a", so the +optimization skips along the subject to "a" before applying the pattern to the +first set of data. The match attempt then succeeds. The second pattern disables +the optimization that skips along to the first character. The pattern is now +applied starting at "x", and so the (*COMMIT) causes the match to fail without +trying any other starting points. +
+  (*PRUNE) or (*PRUNE:NAME)
+
+This verb causes the match to fail at the current starting position in the +subject if there is a later matching failure that causes backtracking to reach +it. If the pattern is unanchored, the normal "bumpalong" advance to the next +starting character then happens. Backtracking can occur as usual to the left of +(*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but +if there is no match to the right, backtracking cannot cross (*PRUNE). In +simple cases, the use of (*PRUNE) is just an alternative to an atomic group or +possessive quantifier, but there are some uses of (*PRUNE) that cannot be +expressed in any other way. In an anchored pattern (*PRUNE) has the same effect +as (*COMMIT). +

+

+The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +

+  (*SKIP)
+
+This verb, when given without a name, is like (*PRUNE), except that if the +pattern is unanchored, the "bumpalong" advance is not to the next character, +but to the position in the subject where (*SKIP) was encountered. (*SKIP) +signifies that whatever text was matched leading up to it cannot be part of a +successful match if there is a later mismatch. Consider: +
+  a+(*SKIP)b
+
+If the subject is "aaaac...", after the first match attempt fails (starting at +the first character in the string), the starting point skips on to start the +next attempt at "c". Note that a possessive quantifier does not have the same +effect as this example; although it would suppress backtracking during the +first match attempt, the second attempt would start at the second character +instead of skipping on to "c". +

+

+If (*SKIP) is used to specify a new starting position that is the same as the +starting position of the current match, or (by being inside a lookbehind) +earlier, the position specified by (*SKIP) is ignored, and instead the normal +"bumpalong" occurs. +

+  (*SKIP:NAME)
+
+When (*SKIP) has an associated name, its behaviour is modified. When such a +(*SKIP) is triggered, the previous path through the pattern is searched for the +most recent (*MARK) that has the same name. If one is found, the "bumpalong" +advance is to the subject position that corresponds to that (*MARK) instead of +to where (*SKIP) was encountered. If no (*MARK) with a matching name is found, +the (*SKIP) is ignored. +

+

+The search for a (*MARK) name uses the normal backtracking mechanism, which +means that it does not see (*MARK) settings that are inside atomic groups or +assertions, because they are never re-entered by backtracking. Compare the +following pcre2test examples: +

+    re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/
+  data: abc
+   0: a
+   1: a
+  data:
+    re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/
+  data: abc
+   0: b
+   1: b
+
+In the first example, the (*MARK) setting is in an atomic group, so it is not +seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows +the second branch of the pattern to be tried at the first character position. +In the second example, the (*MARK) setting is not in an atomic group. This +allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new +matching attempt to start at the second character. This time, the (*MARK) is +never seen because "a" does not match "b", so the matcher immediately jumps to +the second branch of the pattern. +

+

+Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +names that are set by other backtracking verbs. +

+  (*THEN) or (*THEN:NAME)
+
+This verb causes a skip to the next innermost alternative when backtracking +reaches it. That is, it cancels any further backtracking within the current +alternative. Its name comes from the observation that it can be used for a +pattern-based if-then-else block: +
+  ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
+
+If the COND1 pattern matches, FOO is tried (and possibly further items after +the end of the group if FOO succeeds); on failure, the matcher skips to the +second alternative and tries COND2, without backtracking into COND1. If that +succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no +more alternatives, so there is a backtrack to whatever came before the entire +group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). +

+

+The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +

+

+A group that does not contain a | character is just a part of the enclosing +alternative; it is not a nested alternation with only one alternative. The +effect of (*THEN) extends beyond such a group to the enclosing alternative. +Consider this pattern, where A, B, etc. are complex pattern fragments that do +not contain any | characters at this level: +

+  A (B(*THEN)C) | D
+
+If A and B are matched, but there is a failure in C, matching does not +backtrack into A; instead it moves to the next alternative, that is, D. +However, if the group containing (*THEN) is given an alternative, it +behaves differently: +
+  A (B(*THEN)C | (*FAIL)) | D
+
+The effect of (*THEN) is now confined to the inner group. After a failure in C, +matching moves to (*FAIL), which causes the whole group to fail because there +are no more alternatives to try. In this case, matching does backtrack into A. +

+

+Note that a conditional group is not considered as having two alternatives, +because only one is ever used. In other words, the | character in a conditional +group has a different meaning. Ignoring white space, consider: +

+  ^.*? (?(?=a) a | b(*THEN)c )
+
+If the subject is "ba", this pattern does not match. Because .*? is ungreedy, +it initially matches zero characters. The condition (?=a) then fails, the +character "b" is matched, but "c" is not. At this point, matching does not +backtrack to .*? as might perhaps be expected from the presence of the | +character. The conditional group is part of the single alternative that +comprises the whole pattern, and so the match fails. (If there was a backtrack +into .*?, allowing it to match "b", the match would succeed.) +

+

+The verbs just described provide four different "strengths" of control when +subsequent matching fails. (*THEN) is the weakest, carrying on the match at the +next alternative. (*PRUNE) comes next, failing the match at the current +starting position, but allowing an advance to the next character (for an +unanchored pattern). (*SKIP) is similar, except that the advance may be more +than one character. (*COMMIT) is the strongest, causing the entire match to +fail. +

+
+More than one backtracking verb +
+

+If more than one backtracking verb is present in a pattern, the one that is +backtracked onto first acts. For example, consider this pattern, where A, B, +etc. are complex pattern fragments: +

+  (A(*COMMIT)B(*THEN)C|ABD)
+
+If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +the next alternative (ABD) to be tried. This behaviour is consistent, but is +not always the same as Perl's. It means that if two or more backtracking verbs +appear in succession, all but the last of them has no effect. Consider this +example: +
+  ...(*COMMIT)(*PRUNE)...
+
+If there is a matching failure to the right, backtracking onto (*PRUNE) causes +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). +

+
+Backtracking verbs in repeated groups +
+

+PCRE2 sometimes differs from Perl in its handling of backtracking verbs in +repeated groups. For example, consider: +

+  /(a(*COMMIT)b)+ac/
+
+If the subject is "abac", Perl matches unless its optimizations are disabled, +but PCRE2 always fails because the (*COMMIT) in the second repeat of the group +acts. +

+
+Backtracking verbs in assertions +
+

+(*FAIL) in any assertion has its normal effect: it forces an immediate +backtrack. The behaviour of the other backtracking verbs depends on whether or +not the assertion is standalone or acting as the condition in a conditional +group. +

+

+(*ACCEPT) in a standalone positive assertion causes the assertion to succeed +without any further processing; captured strings and a mark name (if set) are +retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to +fail without any further processing; captured substrings and any mark name are +discarded. +

+

+If the assertion is a condition, (*ACCEPT) causes the condition to be true for +a positive assertion and false for a negative one; captured substrings are +retained in both cases. +

+

+The remaining verbs act only when a later failure causes a backtrack to +reach them. This means that, for the Perl-compatible assertions, their effect +is confined to the assertion, because Perl lookaround assertions are atomic. A +backtrack that occurs after such an assertion is complete does not jump back +into the assertion. Note in particular that a (*MARK) name that is set in an +assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. +

+

+PCRE2 now supports non-atomic positive assertions, as described in the section +entitled +"Non-atomic assertions" +above. These assertions must be standalone (not used as conditions). They are +not Perl-compatible. For these assertions, a later backtrack does jump back +into the assertion, and therefore verbs such as (*COMMIT) can be triggered by +backtracks from later in the pattern. +

+

+The effect of (*THEN) is not allowed to escape beyond an assertion. If there +are no more branches to try, (*THEN) causes a positive assertion to be false, +and a negative assertion to be true. +

+

+The other backtracking verbs are not treated specially if they appear in a +standalone positive assertion. In a conditional positive assertion, +backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE) +causes the condition to be false. However, for both standalone and conditional +negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes +the assertion to be true, without considering any further alternative branches. +

+
+Backtracking verbs in subroutines +
+

+These behaviours occur whether or not the group is called recursively. +

+

+(*ACCEPT) in a group called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the +subroutine call. Perl documents this behaviour. Perl's treatment of the other +verbs in subroutines is different in some cases. +

+

+(*FAIL) in a group called as a subroutine has its normal effect: it forces +an immediate backtrack. +

+

+(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when +triggered by being backtracked to in a group called as a subroutine. There is +then a backtrack at the outer level. +

+

+(*THEN), when triggered, skips to the next alternative in the innermost +enclosing group that has alternatives (its normal behaviour). However, if there +is no such group within the subroutine's group, the subroutine match fails and +there is a backtrack at the outer level. +

+
SEE ALSO
+

+pcre2api(3), pcre2callout(3), pcre2matching(3), +pcre2syntax(3), pcre2(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 04 June 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2perform.html b/doc/html/pcre2perform.html new file mode 100644 index 0000000..55fdf20 --- /dev/null +++ b/doc/html/pcre2perform.html @@ -0,0 +1,280 @@ + + +pcre2perform specification + + +

pcre2perform man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 PERFORMANCE
+

+Two aspects of performance are discussed below: memory usage and processing +time. The way you express your pattern as a regular expression can affect both +of them. +

+
COMPILED PATTERN MEMORY USAGE
+

+Patterns are compiled by PCRE2 into a reasonably efficient interpretive code, +so that most simple patterns do not use much memory for storing the compiled +version. However, there is one case where the memory usage of a compiled +pattern can be unexpectedly large. If a parenthesized group has a quantifier +with a minimum greater than 1 and/or a limited maximum, the whole group is +repeated in the compiled code. For example, the pattern +

+  (abc|def){2,4}
+
+is compiled as if it were +
+  (abc|def)(abc|def)((abc|def)(abc|def)?)?
+
+(Technical aside: It is done this way so that backtrack points within each of +the repetitions can be independently maintained.) +

+

+For regular expressions whose quantifiers use only small numbers, this is not +usually a problem. However, if the numbers are large, and particularly if such +repetitions are nested, the memory usage can become an embarrassment. For +example, the very simple pattern +

+  ((ab){1,1000}c){1,3}
+
+uses over 50KiB when compiled using the 8-bit library. When PCRE2 is +compiled with its default internal pointer size of two bytes, the size limit on +a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and +this is reached with the above pattern if the outer repetition is increased +from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus +handle larger compiled patterns, but it is better to try to rewrite your +pattern to use less memory if you can. +

+

+One way of reducing the memory usage for such patterns is to make use of +PCRE2's +"subroutine" +facility. Re-writing the above pattern as +

+  ((ab)(?2){0,999}c)(?1){0,2}
+
+reduces the memory requirements to around 16KiB, and indeed it remains under +20KiB even with the outer repetition increased to 100. However, this kind of +pattern is not always exactly equivalent, because any captures within +subroutine calls are lost when the subroutine completes. If this is not a +problem, this kind of rewriting will allow you to process patterns that PCRE2 +cannot otherwise handle. The matching performance of the two different versions +of the pattern are roughly the same. (This applies from release 10.30 - things +were different in earlier releases.) +

+
STACK AND HEAP USAGE AT RUN TIME
+

+From release 10.30, the interpretive (non-JIT) version of pcre2_match() +uses very little system stack at run time. In earlier releases recursive +function calls could use a great deal of stack, and this could cause problems, +but this usage has been eliminated. Backtracking positions are now explicitly +remembered in memory frames controlled by the code. +

+

+The size of each frame depends on the size of pointer variables and the number +of capturing parenthesized groups in the pattern being matched. On a 64-bit +system the frame size for a pattern with no captures is 128 bytes. For each +capturing group the size increases by 16 bytes. +

+

+Until release 10.41, an initial 20KiB frames vector was allocated on the system +stack, but this still caused some issues for multi-thread applications where +each thread has a very small stack. From release 10.41 backtracking memory +frames are always held in heap memory. An initial heap allocation is obtained +the first time any match data block is passed to pcre2_match(). This is +remembered with the match data block and re-used if that block is used for +another match. It is freed when the match data block itself is freed. +

+

+The size of the initial block is the larger of 20KiB or ten times the pattern's +frame size, unless the heap limit is less than this, in which case the heap +limit is used. If the initial block proves to be too small during matching, it +is replaced by a larger block, subject to the heap limit. The heap limit is +checked only when a new block is to be allocated. Reducing the heap limit +between calls to pcre2_match() with the same match data block does not +affect the saved block. +

+

+In contrast to pcre2_match(), pcre2_dfa_match() does use recursive +function calls, but only for processing atomic groups, lookaround assertions, +and recursion within the pattern. The original version of the code used to +allocate quite large internal workspace vectors on the stack, which caused some +problems for some patterns in environments with small stacks. From release +10.32 the code for pcre2_dfa_match() has been re-factored to use heap +memory when necessary for internal workspace when recursing, though recursive +function calls are still used. +

+

+The "match depth" parameter can be used to limit the depth of function +recursion, and the "match heap" parameter to limit heap memory in +pcre2_dfa_match(). +

+
PROCESSING TIME
+

+Certain items in regular expression patterns are processed more efficiently +than others. It is more efficient to use a character class like [aeiou] than a +set of single-character alternatives such as (a|e|i|o|u). In general, the +simplest construction that provides the required behaviour is usually the most +efficient. Jeffrey Friedl's book contains a lot of useful general discussion +about optimizing regular expressions for efficient performance. This document +contains a few observations about PCRE2. +

+

+Using Unicode character properties (the \p, \P, and \X escapes) is slow, +because PCRE2 has to use a multi-stage table lookup whenever it needs a +character's property. If you can find an alternative pattern that does not use +character properties, it will probably be faster. +

+

+By default, the escape sequences \b, \d, \s, and \w, and the POSIX +character classes such as [:alpha:] do not use Unicode properties, partly for +backwards compatibility, and partly for performance reasons. However, you can +set the PCRE2_UCP option or start the pattern with (*UCP) if you want Unicode +character properties to be used. This can double the matching time for items +such as \d, when matched with pcre2_match(); the performance loss is +less with a DFA matching function, and in both cases there is not much +difference for \b. +

+

+When a pattern begins with .* not in atomic parentheses, nor in parentheses +that are the subject of a backreference, and the PCRE2_DOTALL option is set, +the pattern is implicitly anchored by PCRE2, since it can match only at the +start of a subject string. If the pattern has multiple top-level branches, they +must all be anchorable. The optimization can be disabled by the +PCRE2_NO_DOTSTAR_ANCHOR option, and is automatically disabled if the pattern +contains (*PRUNE) or (*SKIP). +

+

+If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, because the +dot metacharacter does not then match a newline, and if the subject string +contains newlines, the pattern may match from the character immediately +following one of them instead of from the very start. For example, the pattern +

+  .*second
+
+matches the subject "first\nand second" (where \n stands for a newline +character), with the match starting at the seventh character. In order to do +this, PCRE2 has to retry the match starting after every newline in the subject. +

+

+If you are using such a pattern with subject strings that do not contain +newlines, the best performance is obtained by setting PCRE2_DOTALL, or starting +the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE2 +from having to scan along the subject looking for a newline to restart at. +

+

+Beware of patterns that contain nested indefinite repeats. These can take a +long time to run when applied to a string that does not match. Consider the +pattern fragment +

+  ^(a+)*
+
+This can match "aaaa" in 16 different ways, and this number increases very +rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 +times, and for each of those cases other than 0 or 4, the + repeats can match +different numbers of times.) When the remainder of the pattern is such that the +entire match is going to fail, PCRE2 has in principle to try every possible +variation, and this can take an extremely long time, even for relatively short +strings. +

+

+An optimization catches some of the more simple cases such as +

+  (a+)*b
+
+where a literal character follows. Before embarking on the standard matching +procedure, PCRE2 checks that there is a "b" later in the subject string, and if +there is not, it fails the match immediately. However, when there is no +following literal this optimization cannot be used. You can see the difference +by comparing the behaviour of +
+  (a+)*\d
+
+with the pattern above. The former gives a failure almost instantly when +applied to a whole line of "a" characters, whereas the latter takes an +appreciable time with strings longer than about 20 characters. +

+

+In many cases, the solution to this kind of performance issue is to use an +atomic group or a possessive quantifier. This can often reduce memory +requirements as well. As another example, consider this pattern: +

+  ([^<]|<(?!inet))+
+
+It matches from wherever it starts until it encounters "<inet" or the end of +the data, and is the kind of pattern that might be used when processing an XML +file. Each iteration of the outer parentheses matches either one character that +is not "<" or a "<" that is not followed by "inet". However, each time a +parenthesis is processed, a backtracking position is passed, so this +formulation uses a memory frame for each matched character. For a long string, +a lot of memory is required. Consider now this rewritten pattern, which matches +exactly the same strings: +
+  ([^<]++|<(?!inet))+
+
+This runs much faster, because sequences of characters that do not contain "<" +are "swallowed" in one item inside the parentheses, and a possessive quantifier +is used to stop any backtracking into the runs of non-"<" characters. This +version also uses a lot less memory because entry to a new set of parentheses +happens only when a "<" character that is not followed by "inet" is encountered +(and we assume this is relatively rare). +

+

+This example shows that one way of optimizing performance when matching long +subject strings is to write repeated parenthesized subpatterns to match more +than one character whenever possible. +

+
+SETTING RESOURCE LIMITS +
+

+You can set limits on the amount of processing that takes place when matching, +and on the amount of heap memory that is used. The default values of the limits +are very large, and unlikely ever to operate. They can be changed when PCRE2 is +built, and they can also be set when pcre2_match() or +pcre2_dfa_match() is called. For details of these interfaces, see the +pcre2build +documentation and the section entitled +"The match context" +in the +pcre2api +documentation. +

+

+The pcre2test test program has a modifier called "find_limits" which, if +applied to a subject line, causes it to find the smallest limits that allow a +pattern to match. This is done by repeatedly matching with different limits. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 July 2022 +
+Copyright © 1997-2022 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2posix.html b/doc/html/pcre2posix.html new file mode 100644 index 0000000..6e7abd9 --- /dev/null +++ b/doc/html/pcre2posix.html @@ -0,0 +1,379 @@ + + +pcre2posix specification + + +

pcre2posix man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+#include <pcre2posix.h> +

+

+int pcre2_regcomp(regex_t *preg, const char *pattern, + int cflags); +
+
+int pcre2_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); +
+
+size_t pcre2_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); +
+
+void pcre2_regfree(regex_t *preg); +

+
DESCRIPTION
+

+This set of functions provides a POSIX-style API for the PCRE2 regular +expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit +and 32-bit libraries. See the +pcre2api +documentation for a description of PCRE2's native API, which contains much +additional functionality. +

+

+IMPORTANT NOTE: The functions described here are NOT thread-safe, and +should not be used in multi-threaded applications. They are also limited to +processing subjects that are not bigger than 2GB. Use the native API instead. +

+

+These functions are wrapper functions that ultimately call the PCRE2 native +API. Their prototypes are defined in the pcre2posix.h header file, and +they all have unique names starting with pcre2_. However, the +pcre2posix.h header also contains macro definitions that convert the +standard POSIX names such regcomp() into pcre2_regcomp() etc. This +means that a program can use the usual POSIX names without running the risk of +accidentally linking with POSIX functions from a different library. +

+

+On Unix-like systems the PCRE2 POSIX library is called libpcre2-posix, so +can be accessed by adding -lpcre2-posix to the command for linking an +application. Because the POSIX functions call the native ones, it is also +necessary to add -lpcre2-8. +

+

+On Windows systems, if you are linking to a DLL version of the library, it is +recommended that PCRE2POSIX_SHARED is defined before including the +pcre2posix.h header, as it will allow for a more efficient way to +invoke the functions by adding the __declspec(dllimport) decorator. +

+

+Although they were not defined as prototypes in pcre2posix.h, releases +10.33 to 10.36 of the library contained functions with the POSIX names +regcomp() etc. These simply passed their arguments to the PCRE2 +functions. These functions were provided for backwards compatibility with +earlier versions of PCRE2, which had only POSIX names. However, this has proved +troublesome in situations where a program links with several libraries, some of +which use PCRE2's POSIX interface while others use the real POSIX functions. +For this reason, the POSIX names have been removed since release 10.37. +

+

+Calling the header file pcre2posix.h avoids any conflict with other POSIX +libraries. It can, of course, be renamed or aliased as regex.h, which is +the "correct" name, if there is no clash. It provides two structure types, +regex_t for compiled internal forms, and regmatch_t for returning +captured substrings. It also defines some constants whose names start with +"REG_"; these are used for setting options and identifying error codes. +

+
USING THE POSIX FUNCTIONS
+

+Note that these functions are just POSIX-style wrappers for PCRE2's native API. +They do not give POSIX regular expression behaviour, and they are not +thread-safe or even POSIX compatible. +

+

+Those POSIX option bits that can reasonably be mapped to PCRE2 native options +have been implemented. In addition, the option REG_EXTENDED is defined with the +value zero. This has no effect, but since programs that are written to the +POSIX interface often use it, this makes it easier to slot in PCRE2 as a +replacement library. Other POSIX options are not even defined. +

+

+There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface or to add BSD or GNU functionality. +

+

+When PCRE2 is called via these functions, it is only the API that is POSIX-like +in style. The syntax and semantics of the regular expressions themselves are +still those of Perl, subject to the setting of various PCRE2 options, as +described below. "POSIX-like in style" means that the API approximates to the +POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding +domains it is probably even less compatible. +

+

+The descriptions below use the actual names of the functions, but, as described +above, the standard POSIX names (without the pcre2_ prefix) may also be +used. +

+
COMPILING A PATTERN
+

+The function pcre2_regcomp() is called to compile a pattern into an +internal form. By default, the pattern is a C string terminated by a binary +zero (but see REG_PEND below). The preg argument is a pointer to a +regex_t structure that is used as a base for storing information about +the compiled regular expression. It is also used for input when REG_PEND is +set. The regex_t structure used by pcre2_regcomp() is defined in +pcre2posix.h and is not the same as the structure used by other libraries +that provide POSIX-style matching. +

+

+The argument cflags is either zero, or contains one or more of the bits +defined by the following macros: +

+  REG_DOTALL
+
+The PCRE2_DOTALL option is set when the regular expression is passed for +compilation to the native function. Note that REG_DOTALL is not part of the +POSIX standard. +
+  REG_ICASE
+
+The PCRE2_CASELESS option is set when the regular expression is passed for +compilation to the native function. +
+  REG_NEWLINE
+
+The PCRE2_MULTILINE option is set when the regular expression is passed for +compilation to the native function. Note that this does not mimic the +defined POSIX behaviour for REG_NEWLINE (see the following section). +
+  REG_NOSPEC
+
+The PCRE2_LITERAL option is set when the regular expression is passed for +compilation to the native function. This disables all meta characters in the +pattern, causing it to be treated as a literal string. The only other options +that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and +REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. +
+  REG_NOSUB
+
+When a pattern that is compiled with this flag is passed to +pcre2_regexec() for matching, the nmatch and pmatch arguments +are ignored, and no captured strings are returned. Versions of the PCRE library +prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this +no longer happens because it disables the use of backreferences. +
+  REG_PEND
+
+If this option is set, the reg_endp field in the preg structure +(which has the type const char *) must be set to point to the character beyond +the end of the pattern before calling pcre2_regcomp(). The pattern itself +may now contain binary zeros, which are treated as data characters. Without +REG_PEND, a binary zero terminates the pattern and the re_endp field is +ignored. This is a GNU extension to the POSIX standard and should be used with +caution in software intended to be portable to other systems. +
+  REG_UCP
+
+The PCRE2_UCP option is set when the regular expression is passed for +compilation to the native function. This causes PCRE2 to use Unicode properties +when matching \d, \w, etc., instead of just recognizing ASCII values. Note +that REG_UCP is not part of the POSIX standard. +
+  REG_UNGREEDY
+
+The PCRE2_UNGREEDY option is set when the regular expression is passed for +compilation to the native function. Note that REG_UNGREEDY is not part of the +POSIX standard. +
+  REG_UTF
+
+The PCRE2_UTF option is set when the regular expression is passed for +compilation to the native function. This causes the pattern itself and all data +strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF +is not part of the POSIX standard. +

+

+In the absence of these flags, no options are passed to the native function. +This means that the regex is compiled with PCRE2 default semantics. In +particular, the way it handles newline characters in the subject string is the +Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only +some of the effects specified for REG_NEWLINE. It does not affect the way +newlines are matched by the dot metacharacter (they are not) or by a negative +class such as [^a] (they are). +

+

+The yield of pcre2_regcomp() is zero on success, and non-zero otherwise. +The preg structure is filled in on success, and one other member of the +structure (as well as re_endp) is public: re_nsub contains the +number of capturing subpatterns in the regular expression. Various error codes +are defined in the header file. +

+

+NOTE: If the yield of pcre2_regcomp() is non-zero, you must not attempt +to use the contents of the preg structure. If, for example, you pass it +to pcre2_regexec(), the result is undefined and your program is likely to +crash. +

+
MATCHING NEWLINE CHARACTERS
+

+This area is not simple, because POSIX and Perl take different views of things. +It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was +never intended to be a POSIX engine. The following table lists the different +possibilities for matching newline characters in Perl and PCRE2: +

+                          Default   Change with
+
+  . matches newline          no     PCRE2_DOTALL
+  newline matches [^a]       yes    not changeable
+  $ matches \n at end        yes    PCRE2_DOLLAR_ENDONLY
+  $ matches \n in middle     no     PCRE2_MULTILINE
+  ^ matches \n in middle     no     PCRE2_MULTILINE
+
+This is the equivalent table for a POSIX-compatible pattern matcher: +
+                          Default   Change with
+
+  . matches newline          yes    REG_NEWLINE
+  newline matches [^a]       yes    REG_NEWLINE
+  $ matches \n at end        no     REG_NEWLINE
+  $ matches \n in middle     no     REG_NEWLINE
+  ^ matches \n in middle     no     REG_NEWLINE
+
+This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a]. +

+

+Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's pcre2_regcomp() function +causes PCRE2_MULTILINE to be passed to pcre2_compile(), and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY. +

+
MATCHING A PATTERN
+

+The function pcre2_regexec() is called to match a compiled pattern +preg against a given string, which is by default terminated by a +zero byte (but see REG_STARTEND below), subject to the options in eflags. +These can be: +

+  REG_NOTBOL
+
+The PCRE2_NOTBOL option is set when calling the underlying PCRE2 matching +function. +
+  REG_NOTEMPTY
+
+The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 matching +function. Note that REG_NOTEMPTY is not part of the POSIX standard. However, +setting this option can give more POSIX-like behaviour in some situations. +
+  REG_NOTEOL
+
+The PCRE2_NOTEOL option is set when calling the underlying PCRE2 matching +function. +
+  REG_STARTEND
+
+When this option is set, the subject string starts at string + +pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which +should point to the first character beyond the string. There may be binary +zeros within the subject string, and indeed, using REG_STARTEND is the only +way to pass a subject string that contains a binary zero. +

+

+Whatever the value of pmatch[0].rm_so, the offsets of the matched string +and any captured substrings are still given relative to the start of +string itself. (Before PCRE2 release 10.30 these were given relative to +string + pmatch[0].rm_so, but this differs from other +implementations.) +

+

+This is a BSD extension, compatible with but not specified by IEEE Standard +1003.2 (POSIX.2), and should be used with caution in software intended to be +portable to other systems. Note that a non-zero rm_so does not imply +REG_NOTBOL; REG_STARTEND affects only the location and length of the string, +not how it is matched. Setting REG_STARTEND and passing pmatch as NULL +are mutually exclusive; the error REG_INVARG is returned. +

+

+If the pattern was compiled with the REG_NOSUB flag, no data about any matched +strings is returned. The nmatch and pmatch arguments of +pcre2_regexec() are ignored (except possibly as input for REG_STARTEND). +

+

+The value of nmatch may be zero, and the value pmatch may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned. +

+

+Otherwise, the portion of the string that was matched, and also any captured +substrings, are returned via the pmatch argument, which points to an +array of nmatch structures of type regmatch_t, containing the +members rm_so and rm_eo. These contain the byte offset to the first +character of each substring and the offset to the first character after the end +of each substring, respectively. The 0th element of the vector relates to the +entire portion of string that was matched; subsequent elements relate to +the capturing subpatterns of the regular expression. Unused entries in the +array have both structure members set to -1. +

+

+regmatch_t as well as the regoff_t typedef it uses are defined in +pcre2posix.h and are not warranted to have the same size or layout as other +similarly named types from other libraries that provide POSIX-style matching. +

+

+A successful match yields a zero return; various error codes are defined in the +header file, of which REG_NOMATCH is the "expected" failure code. +

+
ERROR MESSAGES
+

+The pcre2_regerror() function maps a non-zero errorcode from either +pcre2_regcomp() or pcre2_regexec() to a printable message. If +preg is not NULL, the error should have arisen from the use of that +structure. A message terminated by a binary zero is placed in errbuf. If +the buffer is too short, only the first errbuf_size - 1 characters of the +error message are used. The yield of the function is the size of buffer needed +to hold the whole message, including the terminating zero. This value is +greater than errbuf_size if the message was truncated. +

+
MEMORY USAGE
+

+Compiling a regular expression causes memory to be allocated and associated +with the preg structure. The function pcre2_regfree() frees all +such memory, after which preg may no longer be used as a compiled +expression. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 19 January 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2sample.html b/doc/html/pcre2sample.html new file mode 100644 index 0000000..345df03 --- /dev/null +++ b/doc/html/pcre2sample.html @@ -0,0 +1,110 @@ + + +pcre2sample specification + + +

pcre2sample man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+PCRE2 SAMPLE PROGRAM +
+

+A simple, complete demonstration program to get you started with using PCRE2 is +supplied in the file pcre2demo.c in the src directory in the PCRE2 +distribution. A listing of this program is given in the +pcre2demo +documentation. If you do not have a copy of the PCRE2 distribution, you can +save this listing to re-create the contents of pcre2demo.c. +

+

+The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings. +

+

+If the -g option is given on the command line, the program then goes on to +check for further matches of the same regular expression in the same subject +string. The logic is a little bit tricky because of the possibility of matching +an empty string. Comments in the code explain what is going on. +

+

+The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +

+

+If PCRE2 is installed in the standard include and library directories for your +operating system, you should be able to compile the demonstration program using +a command like this: +

+  cc -o pcre2demo pcre2demo.c -lpcre2-8
+
+If PCRE2 is installed elsewhere, you may need to add additional options to the +command line. For example, on a Unix-like system that has PCRE2 installed in +/usr/local, you can compile the demonstration program using a command +like this: +
+  cc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8
+
+Once you have built the demonstration program, you can run simple tests like +this: +
+  ./pcre2demo 'cat|dog' 'the cat sat on the mat'
+  ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'
+
+Note that there is a much more comprehensive test program, called +pcre2test, +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The +pcre2demo +program is provided as a relatively simple coding example. +

+

+If you try to run +pcre2demo +when PCRE2 is not installed in the standard library directory, you may get an +error like this on some operating systems (e.g. Solaris): +

+  ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory
+
+This is caused by the way shared library support works on those systems. You +need to add +
+  -R/usr/local/lib
+
+(for example) to the compile command to get round this problem. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 02 February 2016 +
+Copyright © 1997-2016 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2serialize.html b/doc/html/pcre2serialize.html new file mode 100644 index 0000000..19418a8 --- /dev/null +++ b/doc/html/pcre2serialize.html @@ -0,0 +1,212 @@ + + +pcre2serialize specification + + +

pcre2serialize man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS
+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +
+
+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +
+
+void pcre2_serialize_free(uint8_t *bytes); +
+
+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +
+
+If you are running an application that uses a large number of regular +expression patterns, it may be useful to store them in a precompiled form +instead of having to compile them every time the application is run. However, +if you are using the just-in-time optimization feature, it is not possible to +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library. +

+

+Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET serialization. The serialized output is +really just a bytecode dump, which is why it can only be reloaded in the same +environment as the one that created it. Hence the restrictions mentioned above. +Applications that are not statically linked with a fixed version of PCRE2 must +be prepared to recompile patterns from their sources, in order to be immune to +PCRE2 upgrades. +

+
SECURITY CONCERNS
+

+The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +pcre2_serialize_decode() is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. Corrupted data may cause +undefined results. For example, if the length field of a pattern in the +serialized data is corrupted, the deserializing code may read beyond the end of +the byte stream that is passed to it. +

+
SAVING COMPILED PATTERNS
+

+Before compiled patterns can be saved they must be serialized, which in PCRE2 +means converting the pattern to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). For more details of character tables, see the +section on locale support +in the +pcre2api +documentation. +

+

+The function pcre2_serialize_encode() creates a serialized byte stream +from a list of compiled patterns. Its first two arguments specify the list, +being a pointer to a vector of pointers to compiled patterns, and the length of +the vector. The third and fourth arguments point to variables which are set to +point to the created byte stream and its length, respectively. The final +argument is a pointer to a general context, which can be used to specify custom +memory management functions. If this argument is NULL, malloc() is used +to obtain memory for the byte stream. The yield of the function is the number +of serialized patterns, or one of the following negative error codes: +

+  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
+  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
+  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
+  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
+
+PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +

+

+Once a set of patterns has been serialized you can save the data in any +appropriate manner. Here is sample code that compiles two patterns and writes +them to a file. It assumes that the variable fd refers to a file that is +open for output. The error checking that should be present in a real +application has been omitted for simplicity. +

+  int errorcode;
+  uint8_t *bytes;
+  PCRE2_SIZE erroroffset;
+  PCRE2_SIZE bytescount;
+  pcre2_code *list_of_codes[2];
+  list_of_codes[0] = pcre2_compile("first pattern",
+    PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
+  list_of_codes[1] = pcre2_compile("second pattern",
+    PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
+  errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes,
+    &bytescount, NULL);
+  errorcode = fwrite(bytes, 1, bytescount, fd);
+
+Note that the serialized data is binary data that may contain any of the 256 +possible byte values. On systems that make a distinction between binary and +non-binary data, be sure that the file is opened for binary output. +

+

+Serializing a set of patterns leaves the original data untouched, so they can +still be used for matching. Their memory must eventually be freed in the usual +way by calling pcre2_code_free(). When you have finished with the byte +stream, it too must be freed by calling pcre2_serialize_free(). If this +function is called with a NULL argument, it returns immediately without doing +anything. +

+
RE-USING PRECOMPILED PATTERNS
+

+In order to re-use a set of saved patterns you must first make the serialized +byte stream available in main memory (for example, by reading from a file). The +management of this memory block is up to the application. You can use the +pcre2_serialize_get_number_of_codes() function to find out how many +compiled patterns are in the serialized data without actually decoding the +patterns: +

+  uint8_t *bytes = <serialized data>;
+  int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes);
+
+The pcre2_serialize_decode() function reads a byte stream and recreates +the compiled patterns in new memory blocks, setting pointers to them in a +vector. The first two arguments are a pointer to a suitable vector and its +length, and the third argument points to a byte stream. The final argument is a +pointer to a general context, which can be used to specify custom memory +management functions for the decoded patterns. If this argument is NULL, +malloc() and free() are used. After deserialization, the byte +stream is no longer needed and can be discarded. +
+  pcre2_code *list_of_codes[2];
+  uint8_t *bytes = <serialized data>;
+  int32_t number_of_codes =
+    pcre2_serialize_decode(list_of_codes, 2, bytes, NULL);
+
+If the vector is not large enough for all the patterns in the byte stream, it +is filled with those that fit, and the remainder are ignored. The yield of the +function is the number of decoded patterns, or one of the following negative +error codes: +
+  PCRE2_ERROR_BADDATA    second argument is zero or less
+  PCRE2_ERROR_BADMAGIC   mismatch of id bytes in the data
+  PCRE2_ERROR_BADMODE    mismatch of code unit size or PCRE2 version
+  PCRE2_ERROR_BADSERIALIZEDDATA  other sanity check failure
+  PCRE2_ERROR_MEMORY     memory allocation failed
+  PCRE2_ERROR_NULL       first or third argument is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+Decoded patterns can be used for matching in the usual way, and must be freed +by calling pcre2_code_free(). However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to +arrange for its memory to be automatically freed when the last pattern is +freed, but there is no locking on this reference count. Therefore, if you want +to call pcre2_code_free() for these patterns in different threads, you +must arrange your own locking, and ensure that pcre2_code_free() cannot +be called by two threads at the same time. +

+

+If a pattern was processed by pcre2_jit_compile() before being +serialized, the JIT data is discarded and so is no longer available after a +save/restore cycle. You can, however, process a restored pattern with +pcre2_jit_compile() if you wish. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 June 2018 +
+Copyright © 1997-2018 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html new file mode 100644 index 0000000..1c0ccb0 --- /dev/null +++ b/doc/html/pcre2syntax.html @@ -0,0 +1,635 @@ + + +pcre2syntax specification + + +

pcre2syntax man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY
+

+The full syntax and semantics of the regular expressions that are supported by +PCRE2 are described in the +pcre2pattern +documentation. This document contains a quick-reference summary of the syntax. +

+
QUOTING
+

+

+  \x         where x is non-alphanumeric is a literal x
+  \Q...\E    treat enclosed characters as literal
+
+Note that white space inside \Q...\E is always treated as literal, even if +PCRE2_EXTENDED is set, causing most other white space to be ignored. +

+
BRACED ITEMS
+

+With one exception, wherever brace characters { and } are required to enclose +data for constructions such as \g{2} or \k{name}, space and/or horizontal tab +characters that follow { or precede } are allowed and are ignored. In the case +of quantifiers, they may also appear before or after the comma. The exception +is \u{...} which is not Perl-compatible and is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and +follows ECMAScript's behaviour. +

+
ESCAPED CHARACTERS
+

+This table applies to ASCII and Unicode environments. An unrecognized escape +sequence causes an error. +

+  \a         alarm, that is, the BEL character (hex 07)
+  \cx        "control-x", where x is a non-control ASCII character
+  \e         escape (hex 1B)
+  \f         form feed (hex 0C)
+  \n         newline (hex 0A)
+  \r         carriage return (hex 0D)
+  \t         tab (hex 09)
+  \0dd       character with octal code 0dd
+  \ddd       character with octal code ddd, or backreference
+  \o{ddd..}  character with octal code ddd..
+  \N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
+  \xhh       character with hex code hh
+  \x{hh..}   character with hex code hh..
+
+If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the +following are also recognized: +
+  \U         the character "U"
+  \uhhhh     character with hex code hhhh
+  \u{hh..}   character with hex code hh.. but only for EXTRA_ALT_BSUX
+
+When \x is not followed by {, from zero to two hexadecimal digits are read, +but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be +recognized as a hexadecimal escape; otherwise it matches a literal "x". +Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits +or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it +matches a literal "u". +

+

+Note that \0dd is always an octal code. The treatment of backslash followed by +a non-zero digit is complicated; for details see the section +"Non-printing characters" +in the +pcre2pattern +documentation, where details of escape processing in EBCDIC environments are +also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not +supported in EBCDIC environments. Note that \N not followed by an opening +curly bracket has a different meaning (see below). +

+
CHARACTER TYPES
+

+

+  .          any character except newline;
+               in dotall mode, any character whatsoever
+  \C         one code unit, even in UTF mode (best avoided)
+  \d         a decimal digit
+  \D         a character that is not a decimal digit
+  \h         a horizontal white space character
+  \H         a character that is not a horizontal white space character
+  \N         a character that is not a newline
+  \p{xx}     a character with the xx property
+  \P{xx}     a character without the xx property
+  \R         a newline sequence
+  \s         a white space character
+  \S         a character that is not a white space character
+  \v         a vertical white space character
+  \V         a character that is not a vertical white space character
+  \w         a "word" character
+  \W         a "non-word" character
+  \X         a Unicode extended grapheme cluster
+
+\C is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \C by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \C permanently disabled. +

+

+By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode +or in the 16-bit and 32-bit libraries. However, if locale-specific matching is +happening, \s and \w may also match characters with code points in the range +128-255. If the PCRE2_UCP option is set, the behaviour of these escape +sequences is changed to use Unicode properties and they match many more +characters, but there are some option settings that can restrict individual +sequences to matching only ASCII characters. +

+

+Property descriptions in \p and \P are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. +

+
GENERAL CATEGORY PROPERTIES FOR \p and \P
+

+

+  C          Other
+  Cc         Control
+  Cf         Format
+  Cn         Unassigned
+  Co         Private use
+  Cs         Surrogate
+
+  L          Letter
+  Ll         Lower case letter
+  Lm         Modifier letter
+  Lo         Other letter
+  Lt         Title case letter
+  Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
+  L&         Ll, Lu, or Lt
+
+  M          Mark
+  Mc         Spacing mark
+  Me         Enclosing mark
+  Mn         Non-spacing mark
+
+  N          Number
+  Nd         Decimal number
+  Nl         Letter number
+  No         Other number
+
+  P          Punctuation
+  Pc         Connector punctuation
+  Pd         Dash punctuation
+  Pe         Close punctuation
+  Pf         Final punctuation
+  Pi         Initial punctuation
+  Po         Other punctuation
+  Ps         Open punctuation
+
+  S          Symbol
+  Sc         Currency symbol
+  Sk         Modifier symbol
+  Sm         Mathematical symbol
+  So         Other symbol
+
+  Z          Separator
+  Zl         Line separator
+  Zp         Paragraph separator
+  Zs         Space separator
+
+

+
PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P
+

+

+  Xan        Alphanumeric: union of properties L and N
+  Xps        POSIX space: property Z or tab, NL, VT, FF, CR
+  Xsp        Perl space: property Z or tab, NL, VT, FF, CR
+  Xuc        Universally-named character: one that can be
+               represented by a Universal Character Name
+  Xwd        Perl word: property Xan or underscore
+
+Perl and POSIX space are now the same. Perl added VT to its space character set +at release 5.18. +

+
BINARY PROPERTIES FOR \p AND \P
+

+Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

+  pcre2test -LP
+
+

+
SCRIPT MATCHING WITH \p AND \P
+

+Many script names and their 4-letter abbreviations are recognized in +\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of +course). You can obtain a list of these scripts by running this command: +

+  pcre2test -LS
+
+

+
THE BIDI_CLASS PROPERTY FOR \p AND \P
+

+

+  \p{Bidi_Class:<class>}   matches a character with the given class
+  \p{BC:<class>}           matches a character with the given class
+
+The recognized classes are: +
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+
+

+
CHARACTER CLASSES
+

+

+  [...]       positive character class
+  [^...]      negative character class
+  [x-y]       range (can be used for hex characters)
+  [[:xxx:]]   positive POSIX named set
+  [[:^xxx:]]  negative POSIX named set
+
+  alnum       alphanumeric
+  alpha       alphabetic
+  ascii       0-127
+  blank       space or tab
+  cntrl       control character
+  digit       decimal digit
+  graph       printing, excluding space
+  lower       lower case letter
+  print       printing, including space
+  punct       printing, excluding alphanumeric
+  space       white space
+  upper       upper case letter
+  word        same as \w
+  xdigit      hexadecimal digit
+
+In PCRE2, POSIX character set names recognize only ASCII characters by default, +but some of them use Unicode properties if PCRE2_UCP is set. You can use +\Q...\E inside a character class. +

+
QUANTIFIERS
+

+

+  ?           0 or 1, greedy
+  ?+          0 or 1, possessive
+  ??          0 or 1, lazy
+  *           0 or more, greedy
+  *+          0 or more, possessive
+  *?          0 or more, lazy
+  +           1 or more, greedy
+  ++          1 or more, possessive
+  +?          1 or more, lazy
+  {n}         exactly n
+  {n,m}       at least n, no more than m, greedy
+  {n,m}+      at least n, no more than m, possessive
+  {n,m}?      at least n, no more than m, lazy
+  {n,}        n or more, greedy
+  {n,}+       n or more, possessive
+  {n,}?       n or more, lazy
+  {,m}        zero up to m, greedy
+  {,m}+       zero up to m, possessive
+  {,m}?       zero up to m, lazy
+
+

+
ANCHORS AND SIMPLE ASSERTIONS
+

+

+  \b          word boundary
+  \B          not a word boundary
+  ^           start of subject
+                also after an internal newline in multiline mode
+                (after any newline if PCRE2_ALT_CIRCUMFLEX is set)
+  \A          start of subject
+  $           end of subject
+                also before newline at end of subject
+                also before internal newline in multiline mode
+  \Z          end of subject
+                also before newline at end of subject
+  \z          end of subject
+  \G          first matching position in subject
+
+

+
REPORTED MATCH POINT SETTING
+

+

+  \K          set reported start of match
+
+From release 10.38 \K is not permitted by default in lookaround assertions, +for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option is set, the previous behaviour is re-enabled. When this option is set, +\K is honoured in positive assertions, but ignored in negative ones. +

+
ALTERNATION
+

+

+  expr|expr|expr...
+
+

+
CAPTURING
+

+

+  (...)           capture group
+  (?<name>...)    named capture group (Perl)
+  (?'name'...)    named capture group (Perl)
+  (?P<name>...)   named capture group (Python)
+  (?:...)         non-capture group
+  (?|...)         non-capture group; reset group numbers for
+                   capture groups in each alternative
+
+In non-UTF modes, names may contain underscores and ASCII letters and digits; +in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In +both cases, a name must not start with a digit. +

+
ATOMIC GROUPS
+

+

+  (?>...)         atomic non-capture group
+  (*atomic:...)   atomic non-capture group
+
+

+
COMMENT
+

+

+  (?#....)        comment (not nestable)
+
+

+
OPTION SETTING
+

+Changes of these options within a group are automatically cancelled at the end +of the group. +

+  (?a)            all ASCII options
+  (?aD)           restrict \d to ASCII in UCP mode
+  (?aS)           restrict \s to ASCII in UCP mode
+  (?aW)           restrict \w to ASCII in UCP mode
+  (?aP)           restrict all POSIX classes to ASCII in UCP mode
+  (?aT)           restrict POSIX digit classes to ASCII in UCP mode
+  (?i)            caseless
+  (?J)            allow duplicate named groups
+  (?m)            multiline
+  (?n)            no auto capture
+  (?r)            restrict caseless to either ASCII or non-ASCII
+  (?s)            single line (dotall)
+  (?U)            default ungreedy (lazy)
+  (?x)            ignore white space except in classes or \Q...\E
+  (?xx)           as (?x) but also ignore space and tab in classes
+  (?-...)         unset the given option(s)
+  (?^)            unset imnrsx options
+
+(?aP) implies (?aT) as well, though this has no additional effect. However, it +means that (?-aP) is really (?-PT) which disables all ASCII restrictions for +POSIX classes. +

+

+Unsetting x or xx unsets both. Several options may be set at once, and a +mixture of setting and unsetting such as (?i-x) is allowed, but there may be +only one hyphen. Setting (but no unsetting) is allowed after (?^ for example +(?^in). An option setting may appear at the start of a non-capture group, for +example (?i:...). +

+

+The following are recognized only at the very start of a pattern or after one +of the newline or \R options with similar syntax. More than one of them may +appear. For the first three, d is a decimal number. +

+  (*LIMIT_DEPTH=d) set the backtracking limit to d
+  (*LIMIT_HEAP=d)  set the heap size limit to d * 1024 bytes
+  (*LIMIT_MATCH=d) set the match limit to d
+  (*NOTEMPTY)      set PCRE2_NOTEMPTY when matching
+  (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
+  (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
+  (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR)
+  (*NO_JIT)       disable JIT optimization
+  (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
+  (*UTF)          set appropriate UTF mode for the library in use
+  (*UCP)          set PCRE2_UCP (use Unicode properties for \d etc)
+
+Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of +the limits set by the caller of pcre2_match() or pcre2_dfa_match(), +not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The +application can lock out the use of (*UTF) and (*UCP) by setting the +PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time. +

+
NEWLINE CONVENTION
+

+These are recognized only at the very start of the pattern or after option +settings with a similar syntax. +

+  (*CR)           carriage return only
+  (*LF)           linefeed only
+  (*CRLF)         carriage return followed by linefeed
+  (*ANYCRLF)      all three of the above
+  (*ANY)          any Unicode newline sequence
+  (*NUL)          the NUL character (binary zero)
+
+

+
WHAT \R MATCHES
+

+These are recognized only at the very start of the pattern or after option +setting with a similar syntax. +

+  (*BSR_ANYCRLF)  CR, LF, or CRLF
+  (*BSR_UNICODE)  any Unicode newline sequence
+
+

+
LOOKAHEAD AND LOOKBEHIND ASSERTIONS
+

+

+  (?=...)                     )
+  (*pla:...)                  ) positive lookahead
+  (*positive_lookahead:...)   )
+
+  (?!...)                     )
+  (*nla:...)                  ) negative lookahead
+  (*negative_lookahead:...)   )
+
+  (?<=...)                    )
+  (*plb:...)                  ) positive lookbehind
+  (*positive_lookbehind:...)  )
+
+  (?<!...)                    )
+  (*nlb:...)                  ) negative lookbehind
+  (*negative_lookbehind:...)  )
+
+Each top-level branch of a lookbehind must have a limit for the number of +characters it matches. If any branch can match a variable number of characters, +the maximum for each branch is limited to a value set by the caller of +pcre2_compile() or defaulted. The default is set when PCRE2 is built +(ultimate default 255). If every branch matches a fixed number of characters, +the limit for each branch is 65535 characters. +

+
NON-ATOMIC LOOKAROUND ASSERTIONS
+

+These assertions are specific to PCRE2 and are not Perl-compatible. +

+  (?*...)                                )
+  (*napla:...)                           ) synonyms
+  (*non_atomic_positive_lookahead:...)   )
+
+  (?<*...)                               )
+  (*naplb:...)                           ) synonyms
+  (*non_atomic_positive_lookbehind:...)  )
+
+

+
SCRIPT RUNS
+

+

+  (*script_run:...)           ) script run, can be backtracked into
+  (*sr:...)                   )
+
+  (*atomic_script_run:...)    ) atomic script run
+  (*asr:...)                  )
+
+

+
BACKREFERENCES
+

+

+  \n              reference by number (can be ambiguous)
+  \gn             reference by number
+  \g{n}           reference by number
+  \g+n            relative reference by number (PCRE2 extension)
+  \g-n            relative reference by number
+  \g{+n}          relative reference by number (PCRE2 extension)
+  \g{-n}          relative reference by number
+  \k<name>        reference by name (Perl)
+  \k'name'        reference by name (Perl)
+  \g{name}        reference by name (Perl)
+  \k{name}        reference by name (.NET)
+  (?P=name)       reference by name (Python)
+
+

+
SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)
+

+

+  (?R)            recurse whole pattern
+  (?n)            call subroutine by absolute number
+  (?+n)           call subroutine by relative number
+  (?-n)           call subroutine by relative number
+  (?&name)        call subroutine by name (Perl)
+  (?P>name)       call subroutine by name (Python)
+  \g<name>        call subroutine by name (Oniguruma)
+  \g'name'        call subroutine by name (Oniguruma)
+  \g<n>           call subroutine by absolute number (Oniguruma)
+  \g'n'           call subroutine by absolute number (Oniguruma)
+  \g<+n>          call subroutine by relative number (PCRE2 extension)
+  \g'+n'          call subroutine by relative number (PCRE2 extension)
+  \g<-n>          call subroutine by relative number (PCRE2 extension)
+  \g'-n'          call subroutine by relative number (PCRE2 extension)
+
+

+
CONDITIONAL PATTERNS
+

+

+  (?(condition)yes-pattern)
+  (?(condition)yes-pattern|no-pattern)
+
+  (?(n)               absolute reference condition
+  (?(+n)              relative reference condition (PCRE2 extension)
+  (?(-n)              relative reference condition (PCRE2 extension)
+  (?(<name>)          named reference condition (Perl)
+  (?('name')          named reference condition (Perl)
+  (?(name)            named reference condition (PCRE2, deprecated)
+  (?(R)               overall recursion condition
+  (?(Rn)              specific numbered group recursion condition
+  (?(R&name)          specific named group recursion condition
+  (?(DEFINE)          define groups for reference
+  (?(VERSION[>]=n.m)  test PCRE2 version
+  (?(assert)          assertion condition
+
+Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists. +

+
BACKTRACKING CONTROL
+

+All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the +name is mandatory, for the others it is optional. (*SKIP) changes its behaviour +if :NAME is present. The others just set a name for passing back to the caller, +but this is not a name that (*SKIP) can see. The following act immediately they +are reached: +

+  (*ACCEPT)       force successful match
+  (*FAIL)         force backtrack; synonym (*F)
+  (*MARK:NAME)    set name to be passed back; synonym (*:NAME)
+
+The following act only when a subsequent match failure causes a backtrack to +reach them. They all force a match failure, but they differ in what happens +afterwards. Those that advance the start-of-match point do so only if the +pattern is not anchored. +
+  (*COMMIT)       overall failure, no advance of starting point
+  (*PRUNE)        advance to next starting character
+  (*SKIP)         advance to current matching position
+  (*SKIP:NAME)    advance to position corresponding to an earlier
+                  (*MARK:NAME); if not found, the (*SKIP) is ignored
+  (*THEN)         local failure, backtrack to next alternation
+
+The effect of one of these verbs in a group called as a subroutine is confined +to the subroutine call. +

+
CALLOUTS
+

+

+  (?C)            callout (assumed number 0)
+  (?Cn)           callout with numerical data n
+  (?C"text")      callout with string data
+
+The allowed string delimiters are ` ' " ^ % # $ (which are the same for the +start and the end), and the starting delimiter { matched with the ending +delimiter }. To encode the ending delimiter within the string, double it. +

+
SEE ALSO
+

+pcre2pattern(3), pcre2api(3), pcre2callout(3), +pcre2matching(3), pcre2(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 12 October 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html new file mode 100644 index 0000000..6cc3cc3 --- /dev/null +++ b/doc/html/pcre2test.html @@ -0,0 +1,2213 @@ + + +pcre2test specification + + +

pcre2test man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2test [options] [input file [output file]] +
+
+pcre2test is a test program for the PCRE2 regular expression libraries, +but it can also be used for experimenting with regular expressions. This +document describes the features of the test program; for details of the regular +expressions themselves, see the +pcre2pattern +documentation. For details of the PCRE2 library function calls and their +options, see the +pcre2api +documentation. +

+

+The input for pcre2test is a sequence of regular expression patterns and +subject strings to be matched. There are also command lines for setting +defaults and controlling some special actions. The output shows the result of +each match attempt. Modifiers on external or internal command lines, the +patterns, and the subject lines specify PCRE2 function options, control how the +subject is processed, and what output is produced. +

+

+There are many obscure modifiers, some of which are specifically designed for +use in conjunction with the test script and data files that are distributed as +part of PCRE2. All the modifiers are documented here, some without much +justification, but many of them are unlikely to be of use except when testing +the libraries. +

+
PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
+

+Different versions of the PCRE2 library can be built to support character +strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or +all three of these libraries may be simultaneously installed. The +pcre2test program can be used to test all the libraries. However, its own +input and output are always in 8-bit format. When testing the 16-bit or 32-bit +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output. +

+

+In the rest of this document, the names of library functions and structures +are given in generic form, for example, pcre2_compile(). The actual +names used in the libraries have a suffix _8, _16, or _32, as appropriate. +

+
INPUT ENCODING
+

+Input to pcre2test is processed line by line, either by calling the C +library's fgets() function, or via the libreadline or libedit +library. In some Windows environments character 26 (hex 1A) causes an immediate +end of file, and no further data is read, so this character should be avoided +unless you really want that action. +

+

+The input is processed using C's string functions, so must not contain binary +zeros, even though in Unix-like environments, fgets() treats any bytes +other than newline as data characters. An error is generated if a binary zero +is encountered. By default subject lines are processed for backslash escapes, +which makes it possible to include any data value in strings that are passed to +the library for matching. For patterns, there is a facility for specifying some +or all of the 8-bit input characters as hexadecimal pairs, which makes it +possible to include binary zeros. +

+
+Input for the 16-bit and 32-bit libraries +
+

+When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines, backslash escapes can be used. In addition, +when the utf modifier (see +"Setting compilation options" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +

+

+For non-UTF testing of wide characters, the utf8_input modifier can be +used. This is mutually exclusive with utf, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +

+

+UTF-8 (in its original definition) is not capable of encoding values greater +than 0x7fffffff, but such values can be handled by the 32-bit library. When +testing this library in non-UTF mode with utf8_input set, if any +character is preceded by the byte 0xff (which is an invalid byte in UTF-8) +0x80000000 is added to the character's value. This is the only way of passing +such code points in a pattern string. For subject strings, using an escape +sequence is preferable. +

+
COMMAND LINE OPTIONS
+

+-8 +If the 8-bit library has been built, this option causes it to be used (this is +the default). If the 8-bit library has not been built, this option causes an +error. +

+

+-16 +If the 16-bit library has been built, this option causes it to be used. If the +8-bit library has not been built, this is the default. If the 16-bit library +has not been built, this option causes an error. +

+

+-32 +If the 32-bit library has been built, this option causes it to be used. If no +other library has been built, this is the default. If the 32-bit library has +not been built, this option causes an error. +

+

+-ac +Behave as if each pattern has the auto_callout modifier, that is, insert +automatic callouts into every pattern that is compiled. +

+

+-AC +As for -ac, but in addition behave as if each subject line has the +callout_extra modifier, that is, show additional information from +callouts. +

+

+-b +Behave as if each pattern has the fullbincode modifier; the full +internal binary form of the pattern is output after compilation. +

+

+-C +Output the version number of the PCRE2 library, and all available information +about the optional features that are included, and then exit with zero exit +code. All other options are ignored. If both -C and -LM are present, whichever +is first is recognized. +

+

+-C option +Output information about a specific build-time option, then exit. This +functionality is intended for use in scripts such as RunTest. The +following options output the value and set the exit code as indicated: +

+  ebcdic-nl  the code for LF (= NL) in an EBCDIC environment:
+               0x15 or 0x25
+               0 if used in an ASCII environment
+               exit code is always 0
+  linksize   the configured internal link size (2, 3, or 4)
+               exit code is set to the link size
+  newline    the default newline setting:
+               CR, LF, CRLF, ANYCRLF, ANY, or NUL
+               exit code is always 0
+  bsr        the default setting for what \R matches:
+               ANYCRLF or ANY
+               exit code is always 0
+
+The following options output 1 for true or 0 for false, and set the exit code +to the same value: +
+  backslash-C  \C is supported (not locked out)
+  ebcdic       compiled for an EBCDIC environment
+  jit          just-in-time support is available
+  pcre2-16     the 16-bit library was built
+  pcre2-32     the 32-bit library was built
+  pcre2-8      the 8-bit library was built
+  unicode      Unicode support is available
+
+If an unknown option is given, an error message is output; the exit code is 0. +

+

+-d +Behave as if each pattern has the debug modifier; the internal +form and information about the compiled pattern is output after compilation; +-d is equivalent to -b -i. +

+

+-dfa +Behave as if each subject line has the dfa modifier; matching is done +using the pcre2_dfa_match() function instead of the default +pcre2_match(). +

+

+-error number[,number,...] +Call pcre2_get_error_message() for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +

+

+-help +Output a brief summary these options and then exit. +

+

+-i +Behave as if each pattern has the info modifier; information about the +compiled pattern is given after compilation. +

+

+-jit +Behave as if each pattern line has the jit modifier; after successful +compilation, each pattern is passed to the just-in-time compiler, if available. +

+

+-jitfast +Behave as if each pattern line has the jitfast modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and each subject line is passed directly to the JIT matcher via its +"fast path". +

+

+-jitverify +Behave as if each pattern line has the jitverify modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and the use of JIT for matching is verified. +

+

+-LM +List modifiers: write a list of available pattern and subject modifiers to the +standard output, then exit with zero exit code. All other options are ignored. +If both -C and any -Lx options are present, whichever is first is recognized. +

+

+-LP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +

+

+-LS +List scripts: write a list of recognized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +

+

+-pattern modifier-list +Behave as if each pattern line contains the given modifiers. +

+

+-q +Do not output the version number of pcre2test at the start of execution. +

+

+-S size +On Unix-like systems, set the size of the run-time stack to size +mebibytes (units of 1024*1024 bytes). +

+

+-subject modifier-list +Behave as if each subject line contains the given modifiers. +

+

+-t +Run each compile and match many times with a timer, and output the resulting +times per compile or match. When JIT is used, separate times are given for the +initial compile and the JIT compile. You can control the number of iterations +that are used for timing by following -t with a number (as a separate +item on the command line). For example, "-t 1000" iterates 1000 times. The +default is to iterate 500,000 times. +

+

+-tm +This is like -t except that it times only the matching phase, not the +compile phase. +

+

+-T -TM +These behave like -t and -tm, but in addition, at the end of a run, +the total times for all compiles and matches are output. +

+

+-version +Output the PCRE2 version number and then exit. +

+
DESCRIPTION
+

+If pcre2test is given two filename arguments, it reads from the first and +writes to the second. If the first name is "-", input is taken from the +standard input. If pcre2test is given only one argument, it reads from +that file and writes to stdout. Otherwise, it reads from stdin and writes to +stdout. +

+

+When pcre2test is built, a configuration option can specify that it +should be linked with the libreadline or libedit library. When this +is done, if the input is from a terminal, it is read using the readline() +function. This provides line-editing and history facilities. The output from +the -help option states whether or not readline() will be used. +

+

+The program handles any number of tests, each of which consists of a set of +input lines. Each set starts with a regular expression pattern, followed by any +number of subject lines to be matched against that pattern. In between sets of +test data, command lines that begin with # may appear. This file format, with +some restrictions, can also be processed by the perltest.sh script that +is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 +and Perl is the same. For a specification of perltest.sh, see the +comments near its beginning. See also the #perltest command below. +

+

+When the input is a terminal, pcre2test prompts for each line of input, +using "re>" to prompt for regular expression patterns, and "data>" to prompt +for subject lines. Command lines starting with # can be entered only in +response to the "re>" prompt. +

+

+Each subject line is matched separately and independently. If you want to do +multi-line matches, you have to use the \n escape sequence (or \r or \r\n, +etc., depending on the newline setting) in a single line of input to encode the +newline sequences. There is no limit on the length of subject lines; the input +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly. +

+

+An empty line or the end of the file signals the end of the subject lines for a +test, at which point a new pattern or command line is expected if there is +still input to be read. +

+
COMMAND LINES
+

+In between sets of test data, a line that begins with # is interpreted as a +command line. If the first character is followed by white space or an +exclamation mark, the line is treated as a comment, and ignored. Otherwise, the +following commands are recognized: +

+  #forbid_utf
+
+Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP +options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and +the use of (*UTF) and (*UCP) at the start of patterns. This command also forces +an error if a subsequent pattern contains any occurrences of \P, \p, or \X, +which are still supported when PCRE2_UTF is not set, but which require Unicode +property support to be included in the library. +

+

+This is a trigger guard that is used in test files to ensure that UTF or +Unicode property tests are not accidentally added to files that are used when +Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and +PCRE2_NEVER_UCP as a default can also be obtained by the use of #pattern; +the difference is that #forbid_utf cannot be unset, and the automatic +options are not displayed in pattern information, to avoid cluttering up test +output. +

+  #load <filename>
+
+This command is used to load a set of precompiled patterns from a file, as +described in the section entitled "Saving and restoring compiled patterns" +below. +
+  #loadtables <filename>
+
+This command is used to load a set of binary character tables that can be +accessed by the tables=3 qualifier. Such tables can be created by the +pcre2_dftables program with the -b option. +
+  #newline_default [<newline-list>]
+
+When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +

+

+The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, +ANY, or NUL (in upper or lower case), for example: +

+  #newline_default LF Any anyCRLF
+
+If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a newline modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a newline modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +

+

+When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the posix or posix_nosub +modifier is used when #newline_default would set a default for the +non-POSIX API. +

+  #pattern <modifier-list>
+
+This command sets a default modifier list that applies to all subsequent +patterns. Modifiers on a pattern can change these settings. +
+  #perltest
+
+This line is used in test files that can also be processed by perltest.sh +to confirm that Perl gives the same results as PCRE2. Subsequent tests are +checked for the use of pcre2test features that are incompatible with the +perltest.sh script. +

+

+Patterns must use '/' as their delimiter, and only certain modifiers are +supported. Comment lines, #pattern commands, and #subject commands that set or +unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and +#newline_default commands, which are needed in the relevant pcre2test files, +are silently ignored. All other command lines are ignored, but give a warning +message. The #perltest command helps detect tests that are accidentally +put in the wrong file or use the wrong delimiter. For more details of the +perltest.sh script see the comments it contains. +

+  #pop [<modifiers>]
+  #popcopy [<modifiers>]
+
+These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" +below. +
+  #save <filename>
+
+This command is used to save a set of compiled patterns to a file, as described +in the section entitled "Saving and restoring compiled patterns" +below. +
+  #subject <modifier-list>
+
+This command sets a default modifier list that applies to all subsequent +subject lines. Modifiers on a subject line can change these settings. +

+
MODIFIER SYNTAX
+

+Modifier lists are used with both pattern and subject lines. Items in a list +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting. +

+

+A few of the more common modifiers can also be specified as single letters, for +example "i" for "caseless". In documentation, following the Perl convention, +these are written with a slash ("the /i modifier") for clarity. Abbreviated +modifiers must all be concatenated in the first item of a modifier list. If the +first item is not recognized as a long modifier name, it is interpreted as a +sequence of these abbreviations. For example: +

+  /abc/ig,newline=cr,jit=3
+
+This is a pattern line whose modifier list starts with two one-letter modifiers +(/i and /g). The lower-case abbreviated modifiers are the same as used in Perl. +

+
PATTERN SYNTAX
+

+A pattern line must start with one of the following characters (common symbols, +excluding pattern meta-characters): +

+  / ! " ' ` - = _ : ; , % & @ ~
+
+This is interpreted as the pattern's delimiter. A regular expression may be +continued over several input lines, in which case the newline characters are +included within it. It is possible to include the delimiter as a literal within +the pattern by escaping it with a backslash, for example +
+  /abc\/def/
+
+If you do this, the escape and the delimiter form part of the pattern, but +since the delimiters are all non-alphanumeric, the inclusion of the backslash +does not affect the pattern's interpretation. Note, however, that this trick +does not work within \Q...\E literal bracketing because the backslash will +itself be interpreted as a literal. If the terminating delimiter is immediately +followed by a backslash, for example, +
+  /abc/\
+
+a backslash is added to the end of the pattern. This is done to provide a way +of testing the error condition that arises if a pattern finishes with a +backslash, because +
+  /abc\/
+
+is interpreted as the first line of a pattern that starts with "abc/", causing +pcre2test to read the next line as a continuation of the regular expression. +

+

+A pattern can be followed by a modifier list (details below). +

+
SUBJECT LINE SYNTAX
+

+Before each subject line is passed to pcre2_match(), +pcre2_dfa_match(), or pcre2_jit_match(), leading and trailing white +space is removed, and the line is scanned for backslash escapes, unless the +subject_literal modifier was set for the pattern. The following provide a +means of encoding non-printing characters in a visible way: +

+  \a         alarm (BEL, \x07)
+  \b         backspace (\x08)
+  \e         escape (\x27)
+  \f         form feed (\x0c)
+  \n         newline (\x0a)
+  \r         carriage return (\x0d)
+  \t         tab (\x09)
+  \v         vertical tab (\x0b)
+  \nnn       octal character (up to 3 octal digits); always
+               a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
+  \o{dd...}  octal character (any number of octal digits}
+  \xhh       hexadecimal byte (up to 2 hex digits)
+  \x{hh...}  hexadecimal character (any number of hex digits)
+
+The use of \x{hh...} is not dependent on the use of the utf modifier on +the pattern. It is recognized always. There may be any number of hexadecimal +digits inside the braces; invalid values provoke error messages. +

+

+Note that \xhh specifies one byte rather than one character in UTF-8 mode; +this makes it possible to construct invalid UTF-8 sequences for testing +purposes. On the other hand, \x{hh} is interpreted as a UTF-8 character in +UTF-8 mode, generating more than one byte if the value is greater than 127. +When testing the 8-bit library not in UTF-8 mode, \x{hh} generates one byte +for values less than 256, and causes an error for greater values. +

+

+In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it +possible to construct invalid UTF-16 sequences for testing purposes. +

+

+In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This makes it +possible to construct invalid UTF-32 sequences for testing purposes. +

+

+There is a special backslash sequence that specifies replication of one or more +characters: +

+  \[<characters>]{<count>}
+
+This makes it possible to test long strings without having to provide them as +part of the file. For example: +
+  \[abc]{4}
+
+is converted to "abcabcabcabc". This feature does not support nesting. To +include a closing square bracket in the characters, code it as \x5D. +

+

+A backslash followed by an equals sign marks the end of the subject string and +the start of a modifier list. For example: +

+  abc\=notbol,notempty
+
+If the subject string is empty and \= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +
+  \= This is a comment.
+  abc\= This is an invalid modifier list.
+
+A backslash followed by any other non-alphanumeric character just escapes that +character. A backslash followed by anything else causes an error. However, if +the very last character in the line is a backslash (and there is no modifier +list), it is ignored. This gives a way of passing an empty line as data, since +a real empty line terminates the data input. +

+

+If the subject_literal modifier is set for a pattern, all subject lines +that follow are treated as literals, with no special treatment of backslashes. +No replication is possible, and any subject modifiers must be set as defaults +by a #subject command. +

+
PATTERN MODIFIERS
+

+There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in #pattern commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous #pattern command. +

+
+Setting compilation options +
+

+The following modifiers set options for pcre2_compile(). Most of them set +bits in the options argument of that function, but those whose names start with +PCRE2_EXTRA are additional options that are set in the compile context. +Some of these options have single-letter abbreviations. There is special +handling for /x: if a second x is present, PCRE2_EXTENDED is converted into +PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, +though this makes no difference to the way pcre2_compile() behaves. See +pcre2api +for a description of the effects of these options. +

+      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+      alt_bsux                  set PCRE2_ALT_BSUX
+      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
+      alt_verbnames             set PCRE2_ALT_VERBNAMES
+      anchored                  set PCRE2_ANCHORED
+  /a  ascii_all                 set all ASCII options
+      ascii_bsd                 set PCRE2_EXTRA_ASCII_BSD
+      ascii_bss                 set PCRE2_EXTRA_ASCII_BSS
+      ascii_bsw                 set PCRE2_EXTRA_ASCII_BSW
+      ascii_digit               set PCRE2_EXTRA_ASCII_DIGIT
+      ascii_posix               set PCRE2_EXTRA_ASCII_POSIX
+      auto_callout              set PCRE2_AUTO_CALLOUT
+      bad_escape_is_literal     set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+  /i  caseless                  set PCRE2_CASELESS
+  /r  caseless_restrict         set PCRE2_EXTRA_CASELESS_RESTRICT
+      dollar_endonly            set PCRE2_DOLLAR_ENDONLY
+  /s  dotall                    set PCRE2_DOTALL
+      dupnames                  set PCRE2_DUPNAMES
+      endanchored               set PCRE2_ENDANCHORED
+      escaped_cr_is_lf          set PCRE2_EXTRA_ESCAPED_CR_IS_LF
+  /x  extended                  set PCRE2_EXTENDED
+  /xx extended_more             set PCRE2_EXTENDED_MORE
+      extra_alt_bsux            set PCRE2_EXTRA_ALT_BSUX
+      firstline                 set PCRE2_FIRSTLINE
+      literal                   set PCRE2_LITERAL
+      match_line                set PCRE2_EXTRA_MATCH_LINE
+      match_invalid_utf         set PCRE2_MATCH_INVALID_UTF
+      match_unset_backref       set PCRE2_MATCH_UNSET_BACKREF
+      match_word                set PCRE2_EXTRA_MATCH_WORD
+  /m  multiline                 set PCRE2_MULTILINE
+      never_backslash_c         set PCRE2_NEVER_BACKSLASH_C
+      never_ucp                 set PCRE2_NEVER_UCP
+      never_utf                 set PCRE2_NEVER_UTF
+  /n  no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+      no_auto_possess           set PCRE2_NO_AUTO_POSSESS
+      no_dotstar_anchor         set PCRE2_NO_DOTSTAR_ANCHOR
+      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+      no_utf_check              set PCRE2_NO_UTF_CHECK
+      ucp                       set PCRE2_UCP
+      ungreedy                  set PCRE2_UNGREEDY
+      use_offset_limit          set PCRE2_USE_OFFSET_LIMIT
+      utf                       set PCRE2_UTF
+
+As well as turning on the PCRE2_UTF option, the utf modifier causes all +non-printing characters in output strings to be printed using the \x{hh...} +notation. Otherwise, those less than 0x100 are output in hex without the curly +brackets. Setting utf in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions. +

+
+Setting compilation controls +
+

+The following modifiers affect the compilation process or request information +about the pattern. There are single-letter abbreviations for some that are +heavily used in the test files. +

+      bsr=[anycrlf|unicode]     specify \R handling
+  /B  bincode                   show binary code without lengths
+      callout_info              show callout information
+      convert=<options>         request foreign pattern conversion
+      convert_glob_escape=c     set glob escape character
+      convert_glob_separator=c  set glob separator character
+      convert_length            set convert buffer length
+      debug                     same as info,fullbincode
+      framesize                 show matching frame size
+      fullbincode               show binary code with lengths
+  /I  info                      show info about compiled pattern
+      hex                       unquoted characters are hexadecimal
+      jit[=<number>]            use JIT
+      jitfast                   use JIT fast path
+      jitverify                 verify JIT use
+      locale=<name>             use this locale
+      max_pattern_compiled      ) set maximum compiled pattern
+                 _length=<n>    )   length (bytes)
+      max_pattern_length=<n>    set maximum pattern length (code units)
+      max_varlookbehind=<n>     set maximum variable lookbehind length
+      memory                    show memory used
+      newline=<type>            set newline type
+      null_context              compile with a NULL context
+      null_pattern              pass pattern as NULL
+      parens_nest_limit=<n>     set maximum parentheses depth
+      posix                     use the POSIX API
+      posix_nosub               use the POSIX API with REG_NOSUB
+      push                      push compiled pattern onto the stack
+      pushcopy                  push a copy onto the stack
+      stackguard=<number>       test the stackguard feature
+      subject_literal           treat all subject lines as literal
+      tables=[0|1|2|3]          select internal tables
+      use_length                do not zero-terminate the pattern
+      utf8_input                treat input as UTF-8
+
+The effects of these modifiers are described in the following sections. +

+
+Newline and \R handling +
+

+The bsr modifier specifies what \R in a pattern should match. If it is +set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to "unicode", +\R matches any Unicode newline sequence. The default can be specified when +PCRE2 is built; if it is not, the default is set to Unicode. +

+

+The newline modifier specifies which characters are to be interpreted as +newlines, both in the pattern and in subject lines. The type must be one of CR, +LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). +

+
+Information about a pattern +
+

+The debug modifier is a shorthand for info,fullbincode, requesting +all available information. +

+

+The bincode modifier causes a representation of the compiled code to be +output after compilation. This information does not contain length and offset +values, which ensures that the same output is generated for different internal +link sizes and different code unit widths. By using bincode, the same +regression tests can be used in different environments. +

+

+The fullbincode modifier, by contrast, does include length and +offset values. This is used in a few special tests that run only for specific +code unit widths and link sizes, and is also useful for one-off tests. +

+

+The info modifier requests information about the compiled pattern +(whether it is anchored, has a fixed first character, and so on). The +information is obtained from the pcre2_pattern_info() function. Here are +some typical examples: +

+    re> /(?i)(^a|^b)/m,info
+  Capture group count = 1
+  Compile options: multiline
+  Overall options: caseless multiline
+  First code unit at start or follows newline
+  Subject length lower bound = 1
+
+    re> /(?i)abc/info
+  Capture group count = 0
+  Compile options: <none>
+  Overall options: caseless
+  First code unit = 'a' (caseless)
+  Last code unit = 'c' (caseless)
+  Subject length lower bound = 3
+
+"Compile options" are those specified by modifiers; "overall options" have +added options that are taken or deduced from the pattern. If both sets of +options are the same, just a single "options" line is output; if there are no +options, the line is omitted. "First code unit" is where any match must start; +if there is more than one they are listed as "starting code units". "Last code +unit" is the last literal code unit that must be present in any match. This is +not necessarily the last character. These lines are omitted if no starting or +ending code units are recorded. The subject length line is omitted when +no_start_optimize is set because the minimum length is not calculated +when it can never be used. +

+

+The framesize modifier shows the size, in bytes, of each storage frame +used by pcre2_match() for handling backtracking. The size depends on the +number of capturing parentheses in the pattern. A vector of these frames is +used at matching time; its overall size is shown when the heaframes_size +subject modifier is set. +

+

+The callout_info modifier requests information about all the callouts in +the pattern. A list of them is output at the end of any other information that +is requested. For each callout, either its number or string is given, followed +by the item that follows it in the pattern. +

+
+Passing a NULL context +
+

+Normally, pcre2test passes a context block to pcre2_compile(). If +the null_context modifier is set, however, NULL is passed. This is for +testing that pcre2_compile() behaves correctly in this case (it uses +default values). +

+
+Passing a NULL pattern +
+

+The null_pattern modifier is for testing the behaviour of +pcre2_compile() when the pattern argument is NULL. The length value +passed is the default PCRE2_ZERO_TERMINATED unless use_length is set. +Any length other than zero causes an error. +

+
+Specifying pattern characters in hexadecimal +
+

+The hex modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters: +

+  /ab 32 59/hex
+
+Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +
+  /ab "literal" 32/hex
+
+Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The hex and expand modifiers are +mutually exclusive. +

+
+Specifying the pattern's length +
+

+By default, patterns are passed to the compiling functions as zero-terminated +strings but can be passed by length instead of being zero-terminated. The +use_length modifier causes this to happen. Using a length happens +automatically (whether or not use_length is set) when hex is set, +because patterns specified in hexadecimal may contain binary zeros. +

+

+If hex or use_length is used with the POSIX wrapper API (see +"Using the POSIX wrapper API" +below), the REG_PEND extension is used to pass the pattern's length. +

+
+Specifying a maximum for variable lookbehinds +
+

+Variable lookbehind assertions are supported only if, for each one, there is a +maximum length (in characters) that it can match. There is a limit on this, +whose default can be set at build time, with an ultimate default of 255. The +max_varlookbehind modifier uses the pcre2_set_max_varlookbehind() +function to change the limit. Lookbehinds whose branches each match a fixed +length are limited to 65535 characters per branch. +

+
+Specifying wide characters in 16-bit and 32-bit modes +
+

+In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the utf modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input modifier +can be used. It is mutually exclusive with utf. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +"Input encoding" +above. +

+
+Generating long repetitive patterns +
+

+Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +expand modifier is present on a pattern, parts of the pattern that have +the form +

+  \[<characters>]{<count>}
+
+are expanded before the pattern is passed to pcre2_compile(). For +example, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The expand and hex modifiers are +mutually exclusive. +

+

+If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \[AB]{6000,6000} is not recognized as an +expansion item. +

+

+If the info modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output. +

+
+JIT compilation +
+

+Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +pcre2jit +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the partial +modifier in "Subject Modifiers" +below +for details of how these options are specified for each match attempt. +

+

+JIT compilation is requested by the jit pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +

+  1  compile JIT code for non-partial matching
+  2  compile JIT code for soft partial matching
+  4  compile JIT code for hard partial matching
+
+The possible values for the jit modifier are therefore: +
+  0  disable JIT
+  1  normal matching only
+  2  soft partial matching only
+  3  normal and soft partial matching
+  4  hard partial matching only
+  6  soft and hard partial matching only
+  7  all three modes
+
+If no number is given, 7 is assumed. The phrase "partial matching" means a call +to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, jit=2) but do not set the partial modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +

+

+If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the +pcre2jit +documentation. See also the jitstack modifier below for a way of +setting the size of the JIT stack. +

+

+If the jitfast modifier is specified, matching is done using the JIT +"fast path" interface, pcre2_jit_match(), which skips some of the sanity +checks that are done by pcre2_match(), and of course does not work when +JIT is not supported. If jitfast is specified without jit, jit=7 is +assumed. +

+

+If the jitverify modifier is specified, information about the compiled +pattern shows whether JIT compilation was or was not successful. If +jitverify is specified without jit, jit=7 is assumed. If JIT +compilation is successful when jitverify is set, the text "(JIT)" is +added to the first output line after a match or non match when JIT-compiled +code was actually used in the match. +

+
+Setting a locale +
+

+The locale modifier must specify the name of a locale, for example: +

+  /pattern/locale=fr_FR
+
+The given locale is set, pcre2_maketables() is called to build a set of +character tables for the locale, and this is then passed to +pcre2_compile() when compiling the regular expression. The same tables +are used when matching the following subject lines. The locale modifier +applies only to the pattern on which it appears, but can be given in a +#pattern command if a default is needed. Setting a locale and alternate +character tables are mutually exclusive. +

+
+Showing pattern memory +
+

+The memory modifier causes the size in bytes of the memory used to hold +the compiled pattern to be output. This does not include the size of the +pcre2_code block; it is just the actual compiled data. If the pattern is +subsequently passed to the JIT compiler, the size of the JIT compiled code is +also output. Here is an example: +

+    re> /a(b)c/jit,memory
+  Memory allocation (code space): 21
+  Memory allocation (JIT code): 1910
+
+
+

+
+Limiting nested parentheses +
+

+The parens_nest_limit modifier sets a limit on the depth of nested +parentheses in a pattern. Breaching the limit causes a compilation error. +The default for the library is set when PCRE2 is built, but pcre2test +sets its own default of 220, which is required for running the standard test +suite. +

+
+Limiting the pattern length +
+

+The max_pattern_length modifier sets a limit, in code units, to the +length of pattern that pcre2_compile() will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +

+
+Limiting the size of a compiled pattern +
+

+The max_pattern_compiled_length modifier sets a limit, in bytes, to the +amount of memory used by a compiled pattern. Breaching the limit causes a +compilation error. The default is the largest number a PCRE2_SIZE variable can +hold (essentially unlimited). +

+
+Using the POSIX wrapper API +
+

+The posix and posix_nosub modifiers cause pcre2test to call +PCRE2 via the POSIX wrapper API rather than its native API. When +posix_nosub is used, the POSIX option REG_NOSUB is passed to +regcomp(). The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +pcre2posix +documentation. The following pattern modifiers set options for the +regcomp() function: +

+  caseless           REG_ICASE
+  multiline          REG_NEWLINE
+  dotall             REG_DOTALL     )
+  ungreedy           REG_UNGREEDY   ) These options are not part of
+  ucp                REG_UCP        )   the POSIX standard
+  utf                REG_UTF8       )
+
+The regerror_buffsize modifier specifies a size for the error buffer that +is passed to regerror() in the event of a compilation error. For example: +
+  /abc/posix,regerror_buffsize=20
+
+This provides a means of testing the behaviour of regerror() when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +

+

+The aftertext and allaftertext subject modifiers work as described +below. All other modifiers are either ignored, with a warning message, or cause +an error. +

+

+The pattern is passed to regcomp() as a zero-terminated string by +default, but if the use_length or hex modifiers are set, the +REG_PEND extension is used to pass it by length. +

+
+Testing the stack guard feature +
+

+The stackguard modifier is used to test the use of +pcre2_set_compile_recursion_guard(), a function that is provided to +enable stack availability to be checked during compilation (see the +pcre2api +documentation for details). If the number specified by the modifier is greater +than zero, pcre2_set_compile_recursion_guard() is called to set up +callback from pcre2_compile() to a local function. The argument it +receives is the current nesting parenthesis depth; if this is greater than the +value given by the modifier, non-zero is returned, causing the compilation to +be aborted. +

+
+Using alternative character tables +
+

+The value specified for the tables modifier must be one of the digits 0, +1, 2, or 3. It causes a specific set of built-in character tables to be passed +to pcre2_compile(). This is used in the PCRE2 tests to check behaviour +with different character tables. The digit specifies the tables as follows: +

+  0   do not pass any special character tables
+  1   the default ASCII tables, as distributed in
+        pcre2_chartables.c.dist
+  2   a set of tables defining ISO 8859 characters
+  3   a set of tables loaded by the #loadtables command
+
+In tables 2, some characters whose codes are greater than 128 are identified as +letters, digits, spaces, etc. Tables 3 can be used only after a +#loadtables command has loaded them from a binary file. Setting alternate +character tables and a locale are mutually exclusive. +

+
+Setting certain match controls +
+

+The following modifiers are really subject modifiers, and are described under +"Subject Modifiers" below. However, they may be included in a pattern's +modifier list, in which case they are applied to every subject line that is +processed with that pattern. These modifiers do not affect the compilation +process. +

+      aftertext                   show text after match
+      allaftertext                show text after captures
+      allcaptures                 show all captures
+      allvector                   show the entire ovector
+      allusedtext                 show all consulted text
+      altglobal                   alternative global matching
+  /g  global                      global matching
+      heapframes_size             show match data heapframes size
+      jitstack=<n>                set size of JIT stack
+      mark                        show mark values
+      replace=<string>            specify a replacement string
+      startchar                   show starting character when relevant
+      substitute_callout          use substitution callouts
+      substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>         skip substitution <n>
+      substitute_stop=<n>         skip substitution <n> and following
+      substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY
+
+These modifiers may not appear in a #pattern command. If you want them as +defaults, set them in a #subject command. +

+
+Specifying literal subject lines +
+

+If the subject_literal modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any +that are set as defaults by a #subject command are recognized. +

+
+Saving a compiled pattern +
+

+When a pattern with the push modifier is successfully compiled, it is +pushed onto a stack of compiled patterns, and pcre2test expects the next +line to contain a new pattern (or a command) instead of a subject line. This +facility is used when saving compiled patterns to a file, as described in the +section entitled "Saving and restoring compiled patterns" +below. +If pushcopy is used instead of push, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +pcre2_code_copy() function. +The push and pushcopy modifiers are incompatible with compilation +modifiers such as global that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +replace, which causes an error. Note that jitverify, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. +

+
+Testing foreign pattern conversion +
+

+The experimental foreign pattern conversion functions in PCRE2 can be tested by +setting the convert modifier. Its argument is a colon-separated list of +options, which set the equivalent option for the pcre2_pattern_convert() +function: +

+  glob                    PCRE2_CONVERT_GLOB
+  glob_no_starstar        PCRE2_CONVERT_GLOB_NO_STARSTAR
+  glob_no_wild_separator  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+  posix_basic             PCRE2_CONVERT_POSIX_BASIC
+  posix_extended          PCRE2_CONVERT_POSIX_EXTENDED
+  unset                   Unset all options
+
+The "unset" value is useful for turning off a default that has been set by a +#pattern command. When one of these options is set, the input pattern is +passed to pcre2_pattern_convert(). If the conversion is successful, the +result is reflected in the output and then passed to pcre2_compile(). The +normal utf and no_utf_check options, if set, cause the +PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to +pcre2_pattern_convert(). +

+

+By default, the conversion function is allowed to allocate a buffer for its +output. However, if the convert_length modifier is set to a value greater +than zero, pcre2test passes a buffer of the given length. This makes it +possible to test the length check. +

+

+The convert_glob_escape and convert_glob_separator modifiers can be +used to specify the escape and separator characters for glob processing, +overriding the defaults, which are operating-system dependent. +

+
SUBJECT MODIFIERS
+

+The modifiers that can appear in subject lines and the #subject +command are of two types. +

+
+Setting match options +
+

+The following modifiers set options for pcre2_match() or +pcre2_dfa_match(). See +pcreapi +for a description of their effects. +

+      anchored                   set PCRE2_ANCHORED
+      endanchored                set PCRE2_ENDANCHORED
+      dfa_restart                set PCRE2_DFA_RESTART
+      dfa_shortest               set PCRE2_DFA_SHORTEST
+      disable_recurseloop_check  set PCRE2_DISABLE_RECURSELOOP_CHECK
+      no_jit                     set PCRE2_NO_JIT
+      no_utf_check               set PCRE2_NO_UTF_CHECK
+      notbol                     set PCRE2_NOTBOL
+      notempty                   set PCRE2_NOTEMPTY
+      notempty_atstart           set PCRE2_NOTEMPTY_ATSTART
+      noteol                     set PCRE2_NOTEOL
+      partial_hard (or ph)       set PCRE2_PARTIAL_HARD
+      partial_soft (or ps)       set PCRE2_PARTIAL_SOFT
+
+The partial matching modifiers are provided with abbreviations because they +appear frequently in tests. +

+

+If the posix or posix_nosub modifier was present on the pattern, +causing the POSIX wrapper API to be used, the only option-setting modifiers +that have any effect are notbol, notempty, and noteol, +causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to +regexec(). The other modifiers are ignored, with a warning message. +

+

+There is one additional modifier that can be used with the POSIX wrapper. It is +ignored (with a warning) if used for non-POSIX matching. +

+      posix_startend=<n>[:<m>]
+
+This causes the subject string to be passed to regexec() using the +REG_STARTEND option, which uses offsets to specify which part of the string is +searched. If only one number is given, the end offset is passed as the end of +the subject string. For more detail of REG_STARTEND, see the +pcre2posix +documentation. If the subject string contains binary zeros (coded as escapes +such as \x{00} because pcre2test does not support actual binary zeros in +its input), you must use posix_startend to specify its length. +

+
+Setting match controls +
+

+The following modifiers affect the matching process or request additional +information. Some of them may also be specified on a pattern line (see above), +in which case they apply to every subject line that is matched against that +pattern, but can be overridden by modifiers on the subject. +

+      aftertext                  show text after match
+      allaftertext               show text after captures
+      allcaptures                show all captures
+      allvector                  show the entire ovector
+      allusedtext                show all consulted text (non-JIT only)
+      altglobal                  alternative global matching
+      callout_capture            show captures at callout time
+      callout_data=<n>           set a value to pass via callouts
+      callout_error=<n>[:<m>]    control callout error
+      callout_extra              show extra callout information
+      callout_fail=<n>[:<m>]     control callout failure
+      callout_no_where           do not show position of a callout
+      callout_none               do not supply a callout function
+      copy=<number or name>      copy captured substring
+      depth_limit=<n>            set a depth limit
+      dfa                        use pcre2_dfa_match()
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
+      get=<number or name>       extract captured substring
+      getall                     extract all captured substrings
+  /g  global                     global matching
+      heapframes_size            show match data heapframes size
+      heap_limit=<n>             set a limit on heap memory (Kbytes)
+      jitstack=<n>               set size of JIT stack
+      mark                       show mark values
+      match_limit=<n>            set a match limit
+      memory                     show heap memory usage
+      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
+      offset=<n>                 set starting offset
+      offset_limit=<n>           set offset limit
+      ovector=<n>                set size of output vector
+      recursion_limit=<n>        obsolete synonym for depth_limit
+      replace=<string>           specify a replacement string
+      startchar                  show startchar when relevant
+      startoffset=<n>            same as offset=<n>
+      substitute_callout         use substitution callouts
+      substitute_extedded        use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>        skip substitution number n
+      substitute_stop=<n>        skip substitution number n and greater
+      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+      zero_terminate             pass the subject as zero-terminated
+
+The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the aftertext, allaftertext, +and ovector subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error. +

+
+Showing more text +
+

+The aftertext modifier requests that as well as outputting the part of +the subject string that matched the entire pattern, pcre2test should in +addition output the remainder of the subject string. This is useful for tests +where the subject contains multiple copies of the same substring. The +allaftertext modifier requests the same action for captured substrings as +well as the main matched substring. In each case the remainder is output on the +following line with a plus character following the capture number. +

+

+The allusedtext modifier requests that all the text that was consulted +during a successful pattern match by the interpreter should be shown, for both +full and partial matches. This feature is not supported for JIT matching, and +if requested with JIT it is ignored (with a warning message). Setting this +modifier affects the output if there is a lookbehind at the start of a match, +or, for a complete match, a lookahead at the end, or if \K is used in the +pattern. Characters that precede or follow the start and end of the actual +match are indicated in the output by '<' or '>' characters underneath them. +Here is an example: +

+    re> /(?<=pqr)abc(?=xyz)/
+  data> 123pqrabcxyz456\=allusedtext
+   0: pqrabcxyz
+      <<<   >>>
+  data> 123pqrabcxy\=ph,allusedtext
+  Partial match: pqrabcxy
+                 <<<
+
+The first, complete match shows that the matched string is "abc", with the +preceding and following strings "pqr" and "xyz" having been consulted during +the match (when processing the assertions). The partial match can indicate only +the preceding string. +

+

+The startchar modifier requests that the starting character for the match +be indicated, if it is different to the start of the matched string. The only +time when this occurs is when \K has been processed as part of the match. In +this situation, the output for the matched string is displayed from the +starting character instead of from the match point, with circumflex characters +under the earlier characters. For example: +

+    re> /abc\Kxyz/
+  data> abcxyz\=startchar
+   0: abcxyz
+      ^^^
+
+Unlike allusedtext, the startchar modifier can be used with JIT. +However, these two modifiers are mutually exclusive. +

+
+Showing the value of all capture groups +
+

+The allcaptures modifier requests that the values of all potential +captured parentheses be output after a match. By default, only those up to the +highest one actually used in the match are output (corresponding to the return +code from pcre2_match()). Groups that did not take part in the match +are output as "<unset>". This modifier is not relevant for DFA matching (which +does no capturing) and does not apply when replace is specified; it is +ignored, with a warning message, if present. +

+
+Showing the entire ovector, for all outcomes +
+

+The allvector modifier requests that the entire ovector be shown, +whatever the outcome of the match. Compare allcaptures, which shows only +up to the maximum number of capture groups for the pattern, and then only for a +successful complete non-DFA match. This modifier, which acts after any match +result, and also for DFA matching, provides a means of checking that there are +no unexpected modifications to ovector fields. Before each match attempt, the +ovector is filled with a special value, and if this is found in both elements +of a capturing pair, "<unchanged>" is output. After a successful match, this +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of +ovector that is used depends on the number of matches that were found. +

+
+Testing pattern callouts +
+

+A callout function is supplied when pcre2test calls the library matching +functions, unless callout_none is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +callout_. Details are given in the section entitled "Callouts" +below. +Testing callouts from pcre2_substitute() is described separately in +"Testing the substitution function" +below. +

+
+Finding all matches in a string +
+

+Searching for all possible matches within a subject can be requested by the +global or altglobal modifier. After finding a match, the matching +function is called again to search the remainder of the subject. The difference +between global and altglobal is that the former uses the +start_offset argument to pcre2_match() or pcre2_dfa_match() +to start searching at a new point within the entire string (which is what Perl +does), whereas the latter passes over a shortened subject. This makes a +difference to the matching process if the pattern begins with a lookbehind +assertion (including \b or \B). +

+

+If an empty string is matched, the next match is done with the +PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for +another, non-empty, match at the same point in the subject. If this match +fails, the start offset is advanced, and the normal match is retried. This +imitates the way Perl handles such cases when using the /g modifier or +the split() function. Normally, the start offset is advanced by one +character, but if the newline convention recognizes CRLF as a newline, and the +current character is CR followed by LF, an advance of two characters occurs. +

+
+Testing substring extraction functions +
+

+The copy and get modifiers can be used to test the +pcre2_substring_copy_xxx() and pcre2_substring_get_xxx() functions. +They can be given more than once, and each can specify a capture group name or +number, for example: +

+   abcd\=copy=1,copy=3,get=G1
+
+If the #subject command is used to set default copy and/or get lists, +these can be unset by specifying a negative number to cancel all numbered +groups and an empty name to cancel all named groups. +

+

+The getall modifier tests pcre2_substring_list_get(), which +extracts all captured substrings. +

+

+If the subject line is successfully matched, the substrings extracted by the +convenience functions are output with C, G, or L after the string number +instead of a colon. This is in addition to the normal full list. The string +length (that is, the return from the extraction function) is given in +parentheses after each substring, followed by the name when the extraction was +by name. +

+
+Testing the substitution function +
+

+If the replace modifier is set, the pcre2_substitute() function is +called instead of one of the matching functions (or after one call of +pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that +replacement strings cannot contain commas, because a comma signifies the end of +a modifier. This is not thought to be an issue in a test program. +

+

+Specifying a completely empty replacement string disables this modifier. +However, it is possible to specify an empty replacement by providing a buffer +length, as described below, for an otherwise empty replacement. +

+

+Unlike subject strings, pcre2test does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +

+

+The following modifiers set options (in additional to the normal match options) +for pcre2_substitute(): +

+  global                      PCRE2_SUBSTITUTE_GLOBAL
+  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+  substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+  substitute_matched          PCRE2_SUBSTITUTE_MATCHED
+  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+  substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
+
+See the +pcre2api +documentation for details of these options. +

+

+After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test: +

+  /abc/replace=xxx
+      =abc=abc=
+   1: =xxx=abc=
+      =abc=abc=\=global
+   2: =xxx=xxx=
+
+Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to pcre2_substitute() as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case: +
+  /abc/
+      123abc123\=replace=[10]XYZ
+   1: 123XYZ123
+      123abc123\=replace=[9]XYZ
+  Failed: error -47: no more memory
+
+The default action of pcre2_substitute() is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +substitute_overflow_length modifier), pcre2_substitute() continues +to go through the motions of matching and substituting (but not doing any +callouts), in order to compute the size of buffer that is required. When this +happens, pcre2test shows the required buffer length (which includes space +for the trailing zero) as part of the error message. For example: +
+  /abc/substitute_overflow_length
+      123abc123\=replace=[9]XYZ
+  Failed: error -47: no more memory: 10 code units are needed
+
+A replacement string is ignored with POSIX and DFA matching. Specifying partial +matching provokes an error return ("bad option value") from +pcre2_substitute(). +

+
+Testing substitute callouts +
+

+If the substitute_callout modifier is set, a substitution callout +function is set up. The null_context modifier must not be set, because +the address of the callout function is passed in a match context. When the +callout function is called (after each substitution), details of the input +and output strings are output. For example: +

+  /abc/g,replace=<$0>,substitute_callout
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "<abc>"
+   2(1) Old 6 9 "abc" New 8 13 "<abc>"
+   2: <abc>def<abc>pqr
+
+The first number on each callout line is the count of matches. The +parenthesized number is the number of pairs that are set in the ovector (that +is, one more than the number of capturing groups that were set). Then are +listed the offsets of the old substring, its contents, and the same for the +replacement. +

+

+By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If substitute_skip is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly substitute_stop returns -1. These cause the +replacement to be rejected, and -1 causes no further matching to take place. If +either of them are set, substitute_callout is assumed. For example: +

+  /abc/g,replace=<$0>,substitute_skip=1
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "<abc> SKIPPED"
+   2(1) Old 6 9 "abc" New 6 11 "<abc>"
+   2: abcdef<abc>pqr
+      abcdefabcpqr\=substitute_stop=1
+   1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
+   1: abcdefabcpqr
+
+If both are set for the same number, stop takes precedence. Only a single skip +or stop is supported, which is sufficient for testing that the feature works. +

+
+Setting the JIT stack size +
+

+The jitstack modifier provides a way of setting the maximum stack size +that is used by the just-in-time optimization code. It is ignored if JIT +optimization is not being used. The value is a number of kibibytes (units of +1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack +that is larger than the default is necessary only for very complicated +patterns. If jitstack is set non-zero on a subject line it overrides any +value that was set on the pattern. +

+
+Setting heap, match, and depth limits +
+

+The heap_limit, match_limit, and depth_limit modifiers set +the appropriate limits in the match context. These values are ignored when the +find_limits or find_limits_noheap modifier is specified. +

+
+Finding minimum limits +
+

+If the find_limits modifier is present on a subject line, pcre2test +calls the relevant matching function several times, setting different values in +the match context via pcre2_set_heap_limit(), +pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds +the smallest value for each parameter that allows the match to complete without +a "limit exceeded" error. The match itself may succeed or fail. An alternative +modifier, find_limits_noheap, omits the heap limit. This is used in the +standard tests, because the minimum heap limit varies between systems. If JIT +is being used, only the match limit is relevant, and the other two are +automatically omitted. +

+

+When using this modifier, the pattern should not contain any limit settings +such as (*LIMIT_MATCH=...) within it. If such a setting is present and is +lower than the minimum matching value, the minimum value cannot be found +because pcre2_set_match_limit() etc. are only able to reduce the value of +an in-pattern limit; they cannot increase it. +

+

+For non-DFA matching, the minimum depth_limit number is a measure of how +much nested backtracking happens (that is, how deeply the pattern's tree is +searched). In the case of DFA matching, depth_limit controls the depth of +recursive calls of the internal function that is used for handling pattern +recursion, lookaround assertions, and atomic groups. +

+

+For non-DFA matching, the match_limit number is a measure of the amount +of backtracking that takes place, and learning the minimum value can be +instructive. For most simple matches, the number is quite small, but for +patterns with very large numbers of matching possibilities, it can become large +very quickly with increasing length of subject string. In the case of DFA +matching, match_limit controls the total number of calls, both recursive +and non-recursive, to the internal matching function, thus controlling the +overall amount of computing resource that is used. +

+

+For both kinds of matching, the heap_limit number, which is in kibibytes +(units of 1024 bytes), limits the amount of heap memory used for matching. +

+
+Showing MARK names +
+

+The mark modifier causes the names from backtracking control verbs that +are returned from calls to pcre2_match() to be displayed. If a mark is +returned for a match, non-match, or partial match, pcre2test shows it. +For a match, it is on a line by itself, tagged with "MK:". Otherwise, it +is added to the non-match message. +

+
+Showing memory usage +
+

+The memory modifier causes pcre2test to log the sizes of all heap +memory allocation and freeing calls that occur during a call to +pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory +is used only when a match requires more internal workspace that the default +allocation on the stack, so in many cases there will be no output. No heap +memory is allocated during matching with JIT. For this modifier to work, the +null_context modifier must not be set on both the pattern and the +subject, though it can be set on one or the other. +

+
+Showing the heap frame overall vector size +
+

+The heapframes_size modifier is relevant for matches using +pcre2_match() without JIT. After a match has run (whether successful or +not) the size, in bytes, of the allocated heap frames vector that is left +attached to the match data block is shown. If the matching action involved +several calls to pcre2_match() (for example, global matching or for +timing) only the final value is shown. +

+

+This modifier is ignored, with a warning, for POSIX or DFA matching. JIT +matching does not use the heap frames vector, so the size is always zero, +unless there was a previous non-JIT match. Note that specifing a size of zero +for the output vector (see below) causes pcre2test to free its match data +block (and associated heap frames vector) and allocate a new one. +

+
+Setting a starting offset +
+

+The offset modifier sets an offset in the subject string at which +matching starts. Its value is a number of code units, not characters. +

+
+Setting an offset limit +
+

+The offset_limit modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the use_offset_limit modifier must have been set +for the pattern; if not, an error is generated. +

+
+Setting the size of the output vector +
+

+The ovector modifier applies only to the subject line in which it +appears, though of course it can also be used to set a default in a +#subject command. It specifies the number of pairs of offsets that are +available for storing matching information. The default is 15. +

+

+A value of zero is useful when testing the POSIX API because it causes +regexec() to be called with a NULL capture vector. When not testing the +POSIX API, a value of zero is used to cause +pcre2_match_data_create_from_pattern() to be called, in order to create a +new match block of exactly the right size for the pattern. (It is not possible +to create a match block with a zero-length ovector; there is always at least +one pair of offsets.) The old match data block is freed. +

+
+Passing the subject as zero-terminated +
+

+By default, the subject string is passed to a native API matching function with +its correct length. In order to test the facility for passing a zero-terminated +string, the zero_terminate modifier is provided. It causes the length to +be passed as PCRE2_ZERO_TERMINATED. When matching via the POSIX interface, +this modifier is ignored, with a warning. +

+

+When testing pcre2_substitute(), this modifier also has the effect of +passing the replacement string as zero-terminated. +

+
+Passing a NULL context, subject, or replacement +
+

+Normally, pcre2test passes a context block to pcre2_match(), +pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). +If the null_context modifier is set, however, NULL is passed. This is for +testing that the matching and substitution functions behave correctly in this +case (they use default values). This modifier cannot be used with the +find_limits, find_limits_noheap, or substitute_callout +modifiers. +

+

+Similarly, for testing purposes, if the null_subject or +null_replacement modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +

+
THE ALTERNATIVE MATCHING FUNCTION
+

+By default, pcre2test uses the standard PCRE2 matching function, +pcre2_match() to match each subject line. PCRE2 also supports an +alternative matching function, pcre2_dfa_match(), which operates in a +different way, and has some restrictions. The differences between the two +functions are described in the +pcre2matching +documentation. +

+

+If the dfa modifier is set, the alternative matching function is used. +This function finds all possible matches at a given point in the subject. If, +however, the dfa_shortest modifier is set, processing stops after the +first match is found. This is always the shortest possible match. +

+
DEFAULT OUTPUT FROM pcre2test
+

+This section describes the output when the normal matching function, +pcre2_match(), is being used. +

+

+When a match succeeds, pcre2test outputs the list of captured substrings, +starting with number 0 for the string that matched the whole pattern. +Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or +"Partial match:" followed by the partially matching substring when the +return is PCRE2_ERROR_PARTIAL. (Note that this is the +entire substring that was inspected during the partial match; it may include +characters before the actual match start if a lookbehind assertion, \K, \b, +or \B was involved.) +

+

+For any other return, pcre2test outputs the PCRE2 negative error number +and a short descriptive phrase. If the error is a failed UTF string check, the +code unit offset of the start of the failing character is also output. Here is +an example of an interactive pcre2test run. +

+  $ pcre2test
+  PCRE2 version 10.22 2016-07-29
+
+    re> /^abc(\d+)/
+  data> abc123
+   0: abc123
+   1: 123
+  data> xyz
+  No match
+
+Unset capturing substrings that are not followed by one that is set are not +shown by pcre2test unless the allcaptures modifier is specified. In +the following example, there are two capturing substrings, but when the first +data line is matched, the second, unset substring is not shown. An "internal" +unset substring is shown as "<unset>", as for the second data line. +
+    re> /(a)|(b)/
+  data> a
+   0: a
+   1: a
+  data> b
+   0: b
+   1: <unset>
+   2: b
+
+If the strings contain any non-printing characters, they are output as \xhh +escapes if the value is less than 256 and UTF mode is not set. Otherwise they +are output as \x{hh...} escapes. See below for the definition of non-printing +characters. If the aftertext modifier is set, the output for substring 0 +is followed by the rest of the subject string, identified by "0+" like this: +
+    re> /cat/aftertext
+  data> cataract
+   0: cat
+   0+ aract
+
+If global matching is requested, the results of successive matching attempts +are output in sequence, like this: +
+    re> /\Bi(\w\w)/g
+  data> Mississippi
+   0: iss
+   1: ss
+   0: iss
+   1: ss
+   0: ipp
+   1: pp
+
+"No match" is output only if the first match attempt fails. Here is an example +of a failure message (the offset 4 that is specified by the offset +modifier is past the end of the subject string): +
+    re> /xyz/
+  data> xyz\=offset=4
+  Error -24 (bad offset value)
+
+

+

+Note that whereas patterns can be continued over several lines (a plain ">" +prompt is used for continuations), subject lines may not. However newlines can +be included in a subject by means of the \n escape (or \r, \r\n, etc., +depending on the newline sequence setting). +

+
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
+

+When the alternative matching function, pcre2_dfa_match(), is used, the +output consists of a list of all the matches that start at the first point in +the subject where there is at least one match. For example: +

+    re> /(tang|tangerine|tan)/
+  data> yellow tangerine\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+
+Using the normal matching function on this data finds only "tang". The +longest matching string is always given first (and numbered zero). After a +PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the +partially matching substring. Note that this is the entire substring that was +inspected during the partial match; it may include characters before the actual +match start if a lookbehind assertion, \b, or \B was involved. (\K is not +supported for DFA matching.) +

+

+If global matching is requested, the search for further matches resumes +at the end of the longest match. For example: +

+    re> /(tang|tangerine|tan)/g
+  data> yellow tangerine and tangy sultana\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+   0: tang
+   1: tan
+   0: tan
+
+The alternative matching function does not support substring capture, so the +modifiers that are concerned with captured substrings are not relevant. +

+
RESTARTING AFTER A PARTIAL MATCH
+

+When the alternative matching function has given the PCRE2_ERROR_PARTIAL +return, indicating that the subject partially matched the pattern, you can +restart the match with additional subject data by means of the +dfa_restart modifier. For example: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 23ja\=ps,dfa
+  Partial match: 23ja
+  data> n05\=dfa,dfa_restart
+   0: n05
+
+For further information about partial matching, see the +pcre2partial +documentation. +

+
CALLOUTS
+

+If the pattern contains any callout requests, pcre2test's callout +function is called during matching unless callout_none is specified. This +works with both matching functions, and with JIT, though there are some +differences in behaviour. The output for callouts with numerical arguments and +those with string arguments is slightly different. +

+
+Callouts with numerical arguments +
+

+By default, the callout function displays the callout number, the start and +current positions in the subject text at the callout time, and the next pattern +item to be tested. For example: +

+  --->pqrabcdef
+    0    ^  ^     \d
+
+This output indicates that callout number 0 occurred for a match attempt +starting at the fourth character of the subject string, when the pointer was at +the seventh character, and when the next pattern item was \d. Just +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. +

+

+Callouts numbered 255 are assumed to be automatic callouts, inserted as a +result of the auto_callout pattern modifier. In this case, instead of +showing the callout number, the offset in the pattern, preceded by a plus, is +output. For example: +

+    re> /\d?[A-E]\*/auto_callout
+  data> E*
+  --->E*
+   +0 ^      \d?
+   +3 ^      [A-E]
+   +8 ^^     \*
+  +10 ^ ^
+   0: E*
+
+If a pattern contains (*MARK) items, an additional line is output whenever +a change of latest mark is passed to the callout function. For example: +
+    re> /a(*MARK:X)bc/auto_callout
+  data> abc
+  --->abc
+   +0 ^       a
+   +1 ^^      (*MARK:X)
+  +10 ^^      b
+  Latest Mark: X
+  +11 ^ ^     c
+  +12 ^  ^
+   0: abc
+
+The mark changes between matching "a" and "b", but stays the same for the rest +of the match, so nothing more is output. If, as a result of backtracking, the +mark reverts to being unset, the text "<unset>" is output. +

+
+Callouts with string arguments +
+

+The output for a callout with a string argument is similar, except that instead +of outputting a callout number before the position indicators, the callout +string and its offset in the pattern string are output before the reflection of +the subject string, and the subject string is reflected for each callout. For +example: +

+    re> /^ab(?C'first')cd(?C"second")ef/
+  data> abcdefg
+  Callout (7): 'first'
+  --->abcdefg
+      ^ ^         c
+  Callout (20): "second"
+  --->abcdefg
+      ^   ^       e
+   0: abcdef
+
+
+

+
+Callout modifiers +
+

+The callout function in pcre2test returns zero (carry on matching) by +default, but you can use a callout_fail modifier in a subject line to +change this and other parameters of the callout (see below). +

+

+If the callout_capture modifier is set, the current captured groups are +output when a callout occurs. This is useful only for non-DFA matching, as +pcre2_dfa_match() does not support capturing, so no captures are ever +shown. +

+

+The normal callout output, showing the callout number or pattern offset (as +described above) is suppressed if the callout_no_where modifier is set. +

+

+When using the interpretive matching function pcre2_match() without JIT, +setting the callout_extra modifier causes additional output from +pcre2test's callout function to be generated. For the first callout in a +match attempt at a new starting position in the subject, "New match attempt" is +output. If there has been a backtrack since the last callout (or start of +matching if this is the first callout), "Backtrack" is output, followed by "No +other matching paths" if the backtrack ended the previous match attempt. For +example: +

+   re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
+  data> aac\=callout_extra
+  New match attempt
+  --->aac
+   +0 ^       (
+   +1 ^       a+
+   +3 ^ ^     )
+   +4 ^ ^     b
+  Backtrack
+  --->aac
+   +3 ^^      )
+   +4 ^^      b
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0  ^      (
+   +1  ^      a+
+   +3  ^^     )
+   +4  ^^     b
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0   ^     (
+   +1   ^     a+
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0    ^    (
+   +1    ^    a+
+  No match
+
+Notice that various optimizations must be turned off if you want all possible +matching paths to be scanned. If no_start_optimize is not used, there is +an immediate "no match", without any callouts, because the starting +optimization fails to find "b" in the subject, which it knows must be present +for any match. If no_auto_possess is not used, the "a+" item is turned +into "a++", which reduces the number of backtracks. +

+

+The callout_extra modifier has no effect if used with the DFA matching +function, or with JIT. +

+
+Return values from callouts +
+

+The default return from the callout function is zero, which allows matching to +continue. The callout_fail modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers (<n>:<m>) +are given, 1 is returned when callout <n> is reached and there have been at +least <m> callouts. The callout_error modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +callout_error takes precedence. Note that callouts with string arguments +are always given the number zero. +

+

+The callout_data modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from pcre2test's callout function. +

+

+Inserting callouts can be helpful when using pcre2test to check +complicated regular expressions. For further information about callouts, see +the +pcre2callout +documentation. +

+
NON-PRINTING CHARACTERS
+

+When pcre2test is outputting text in the compiled version of a pattern, +bytes other than 32-126 are always treated as non-printing characters and are +therefore shown as hex escapes. +

+

+When pcre2test is outputting text that is a matched part of a subject +string, it behaves in the same way, unless a different locale has been set for +the pattern (using the locale modifier). In this case, the +isprint() function is used to distinguish printing and non-printing +characters. +

+
SAVING AND RESTORING COMPILED PATTERNS
+

+It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. JIT data cannot be saved. The host +on which the patterns are reloaded must be running the same version of PCRE2, +with the same code unit width, and must also have the same endianness, pointer +width and PCRE2_SIZE type. Before compiled patterns can be saved they must be +serialized, that is, converted to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). +

+

+The functions whose names begin with pcre2_serialize_ are used +for serializing and de-serializing. They are described in the +pcre2serialize +documentation. In this section we describe the features of pcre2test that +can be used to test these functions. +

+

+Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET. It just makes a reloadable byte code stream. +Hence the restrictions on reloading mentioned above. +

+

+In pcre2test, when a pattern with push modifier is successfully +compiled, it is pushed onto a stack of compiled patterns, and pcre2test +expects the next line to contain a new pattern (or command) instead of a +subject line. By contrast, the pushcopy modifier causes a copy of the +compiled pattern to be stacked, leaving the original available for immediate +matching. By using push and/or pushcopy, a number of patterns can +be compiled and retained. These modifiers are incompatible with posix, +and control modifiers that act at match time are ignored (with a message) for +the stacked patterns. The jitverify modifier applies only at compile +time. +

+

+The command +

+  #save <filename>
+
+causes all the stacked patterns to be serialized and the result written to the +named file. Afterwards, all the stacked patterns are freed. The command +
+  #load <filename>
+
+reads the data in the file, and then arranges for it to be de-serialized, with +the resulting compiled patterns added to the pattern stack. The pattern on the +top of the stack can be retrieved by the #pop command, which must be followed +by lines of subjects that are to be matched with the pattern, terminated as +usual by an empty line or end of file. This command may be followed by a +modifier list containing only +control modifiers +that act after a pattern has been compiled. In particular, hex, +posix, posix_nosub, push, and pushcopy are not allowed, +nor are any +option-setting modifiers. +The JIT modifiers are, however permitted. Here is an example that saves and +reloads two patterns. +
+  /abc/push
+  /xyz/push
+  #save tempfile
+  #load tempfile
+  #pop info
+  xyz
+
+  #pop jit,bincode
+  abc
+
+If jitverify is used with #pop, it does not automatically imply +jit, which is different behaviour from when it is used on a pattern. +

+

+The #popcopy command is analogous to the pushcopy modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. +

+
SEE ALSO
+

+pcre2(3), pcre2api(3), pcre2callout(3), +pcre2jit, pcre2matching(3), pcre2partial(d), +pcre2pattern(3), pcre2serialize(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 24 April 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html new file mode 100644 index 0000000..6f0972e --- /dev/null +++ b/doc/html/pcre2unicode.html @@ -0,0 +1,522 @@ + + +pcre2unicode specification + + +

pcre2unicode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+UNICODE AND UTF SUPPORT +
+

+PCRE2 is normally built with Unicode support, though if you do not need it, you +can build it without, in which case the library will be smaller. With Unicode +support, PCRE2 has knowledge of Unicode character properties and can process +strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit +width), but this is not the default. Unless specifically requested, PCRE2 +treats each code unit in a string as one character. +

+

+There are two ways of telling PCRE2 to switch to UTF mode, where characters may +consist of more than one code unit and the range of values is constrained. The +program can call +pcre2_compile() +with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF). +However, the latter facility can be locked out by the PCRE2_NEVER_UTF option. +That is, the programmer can prevent the supplier of the pattern from switching +to UTF mode. +

+

+Note that the PCRE2_MATCH_INVALID_UTF option (see +below) +forces PCRE2_UTF to be set. +

+

+In UTF mode, both the pattern and any subject strings that are matched against +it are treated as UTF strings instead of strings of individual one-code-unit +characters. There are also some other changes to the way characters are +handled, as documented below. +

+
+UNICODE PROPERTY SUPPORT +
+

+When PCRE2 is built with Unicode support, the escape sequences \p{..}, +\P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting. +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the derived properties +Any and LC (synonym L&), the Unicode script names such as Arabic or Han, +Bidi_Class, Bidi_Control, and a few binary properties. +

+

+The full lists are given in the +pcre2pattern +and +pcre2syntax +documentation. In general, only the short names for properties are supported. +For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. +

+
+WIDE CHARACTERS AND UTF MODES +
+

+Code points less than 256 can be specified in patterns by either braced or +unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger +values have to use braced sequences. Unbraced octal code points up to \777 are +also recognized; larger ones can be coded using \o{...}. +

+

+The escape sequence \N{U+<hex digits>} is recognized as another way of +specifying a Unicode character by code point in a UTF mode. It is not allowed +in non-UTF mode. +

+

+In UTF mode, repeat quantifiers apply to complete UTF characters, not to +individual code units. +

+

+In UTF mode, the dot metacharacter matches one UTF character instead of a +single code unit. +

+

+In UTF mode, capture group names are not restricted to ASCII, and may contain +any Unicode letters and decimal digits, as well as underscore. +

+

+The escape sequence \C can be used to match a single code unit in UTF mode, +but its use can lead to some strange effects because it breaks up multi-unit +characters (see the description of \C in the +pcre2pattern +documentation). For this reason, there is a build-time option that disables +support for \C completely. There is also a less draconian compile-time option +for locking out the use of \C when a pattern is compiled. +

+

+The use of \C is not supported by the alternative matching function +pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \C in these modes provokes +a match-time error. Also, the JIT optimization does not support \C in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \C, it will not succeed, and so when pcre2_match() is called, +the matching will be carried out by the interpretive function. +

+

+The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test +characters of any code value, but, by default, the characters that PCRE2 +recognizes as digits, spaces, or word characters remain the same set as in +non-UTF mode, all with code points less than 256. This remains true even when +PCRE2 is built to include Unicode support, because to do otherwise would slow +down matching in many common cases. Note that this also applies to \b +and \B, because they are defined in terms of \w and \W. If you want +to test for a wider sense of, say, "digit", you can use explicit Unicode +property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option, +the way that the character escapes work is changed so that Unicode properties +are used to determine which characters match, though there are some options +that suppress this for individual escapes. For details see the section on +generic character types +in the +pcre2pattern +documentation. +

+

+Like the escapes, characters that match the POSIX named character classes are +all low-valued characters unless the PCRE2_UCP option is set, but there is an +option to override this. +

+

+In contrast to the character escapes and character classes, the special +horizontal and vertical white space escapes (\h, \H, \v, and \V) do match +all the appropriate Unicode characters, whether or not PCRE2_UCP is set. +

+
+UNICODE CASE-EQUIVALENCE +
+

+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use +of Unicode properties except for characters whose code points are less than 128 +and that have at most two case-equivalent values. For these, a direct table +lookup is used for speed. A few Unicode characters such as Greek sigma have +more than two code points that are case-equivalent, and these are treated +specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case +processing for non-UTF character encodings such as UCS-2. +

+

+There are two ASCII characters (S and K) that, in addition to their ASCII lower +case equivalents, have a non-ASCII one as well (long S and Kelvin sign). +Recognition of these non-ASCII characters as case-equivalent to their ASCII +counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT +option. When this is set, all characters in a case equivalence must either be +ASCII or non-ASCII; there can be no mixing. +

+
+SCRIPT RUNS +
+

+The pattern constructs (*script_run:...) and (*atomic_script_run:...), with +synonyms (*sr:...) and (*asr:...), verify that the string matched within the +parentheses is a script run. In concept, a script run is a sequence of +characters that are all from the same Unicode script. However, because some +scripts are commonly used together, and because some diacritical and other +marks are used with multiple scripts, it is not that simple. +

+

+Every Unicode character has a Script property, mostly with a value +corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There +are also three special values: +

+

+"Unknown" is used for code points that have not been assigned, and also for the +surrogate code points. In the PCRE2 32-bit library, characters whose code +points are greater than the Unicode maximum (U+10FFFF), which are accessible +only in non-UTF mode, are assigned the Unknown script. +

+

+"Common" is used for characters that are used with many scripts. These include +punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII +digits 0 to 9. +

+

+"Inherited" is used for characters such as diacritical marks that modify a +previous character. These are considered to take on the script of the character +that they modify. +

+

+Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of +characters, the list contains just one script, the same one as the Script +property. However, for characters such as U+102E0 more than one Script is +listed. There are also some Common characters that have a single, non-Common +script in their Script Extension list. +

+

+The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal +digits. These are covered in subsequent sections. +

+
+Basic script run rules +
+

+A string that is less than two characters long is a script run. This is the +only case in which an Unknown character can be part of a script run. Longer +strings are checked using only the Script Extensions property, not the basic +Script property. +

+

+If a character's Script Extension property is the single value "Inherited", it +is always accepted as part of a script run. This is also true for the property +"Common", subject to the checking of decimal digits described below. All the +remaining characters in a script run must have at least one script in common in +their Script Extension lists. In set-theoretic terminology, the intersection of +all the sets of scripts must not be empty. +

+

+A simple example is an Internet name such as "google.com". The letters are all +in the Latin script, and the dot is Common, so this string is a script run. +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +string that looks the same, but with Cyrillic "o"s is not a script run. +

+

+More interesting examples involve characters with more than one script in their +Script Extension. Consider the following characters: +

+  U+060C  Arabic comma
+  U+06D4  Arabic full stop
+
+The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could +appear in script runs of either Arabic or Hanifi Rohingya. The first could also +appear in Syriac or Thaana script runs, but the second could not. +

+
+The Chinese Han script +
+

+The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts +together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo +and Han. These three combinations are treated as special cases when checking +script runs and are, in effect, "virtual scripts". Thus, a script run may +contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and +Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of +Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical +Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/) +in allowing such mixtures. +

+
+Decimal digits +
+

+Unicode contains many sets of 10 decimal digits in different scripts, and some +scripts (including the Common script) contain more than one set. Some of these +decimal digits them are visually indistinguishable from the common ASCII +digits. In addition to the script checking described above, if a script run +contains any decimal digits, they must all come from the same set of 10 +adjacent characters. +

+
+VALIDITY OF UTF STRINGS +
+

+When the PCRE2_UTF option is set, the strings passed as patterns and subjects +are (by default) checked for validity on entry to the relevant functions. If an +invalid UTF string is passed, a negative error code is returned. The code unit +offset to the offending character can be extracted from the match data block by +calling pcre2_get_startchar(), which is used for this purpose after a UTF +error. +

+

+In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance, for +example in the case of a long subject string that is being scanned repeatedly. +If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time, +PCRE2 assumes that the pattern or subject it is given (respectively) contains +only valid UTF code unit sequences. +

+

+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result +is undefined and your program may crash or loop indefinitely or give incorrect +results. There is, however, one mode of matching that can handle invalid UTF +subject strings. This is enabled by passing PCRE2_MATCH_INVALID_UTF to +pcre2_compile() and is discussed below in the next section. The rest of +this section covers the case when PCRE2_MATCH_INVALID_UTF is not set. +

+

+Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF check +for the pattern; it does not also apply to subject strings. If you want to +disable the check for a subject string you must pass this same option to +pcre2_match() or pcre2_dfa_match(). +

+

+UTF-16 and UTF-32 strings can indicate their endianness by special code knows +as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting +strings to be in host byte order. +

+

+Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other +processing takes place. In the case of pcre2_match() and +pcre2_dfa_match() calls with a non-zero starting offset, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \b and \B are +one-character lookbehinds. +

+

+In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be. +

+

+Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, +where they are used in pairs to encode code points with values greater than +0xFFFF. The code points that are encoded by UTF-16 pairs are available +independently in the UTF-8 and UTF-32 encodings. (In other words, the whole +surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and +UTF-32.) +

+

+Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is +given if an escape sequence for an invalid Unicode code point is encountered in +the pattern. If you want to allow escape sequences such as \x{d800} (a +surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option. However, this is possible only in UTF-8 and UTF-32 modes, because these +values are not representable in UTF-16. +

+
+Errors in UTF-8 strings +
+

+The following negative error codes are given for invalid UTF-8 strings: +

+  PCRE2_ERROR_UTF8_ERR1
+  PCRE2_ERROR_UTF8_ERR2
+  PCRE2_ERROR_UTF8_ERR3
+  PCRE2_ERROR_UTF8_ERR4
+  PCRE2_ERROR_UTF8_ERR5
+
+The string ends with a truncated UTF-8 character; the code specifies how many +bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be +no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279) +allows for up to 6 bytes, and this is checked first; hence the possibility of +4 or 5 missing bytes. +
+  PCRE2_ERROR_UTF8_ERR6
+  PCRE2_ERROR_UTF8_ERR7
+  PCRE2_ERROR_UTF8_ERR8
+  PCRE2_ERROR_UTF8_ERR9
+  PCRE2_ERROR_UTF8_ERR10
+
+The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the +character do not have the binary value 0b10 (that is, either the most +significant bit is 0, or the next bit is 1). +
+  PCRE2_ERROR_UTF8_ERR11
+  PCRE2_ERROR_UTF8_ERR12
+
+A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; +these code points are excluded by RFC 3629. +
+  PCRE2_ERROR_UTF8_ERR13
+
+A 4-byte character has a value greater than 0x10ffff; these code points are +excluded by RFC 3629. +
+  PCRE2_ERROR_UTF8_ERR14
+
+A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of +code points are reserved by RFC 3629 for use with UTF-16, and so are excluded +from UTF-8. +
+  PCRE2_ERROR_UTF8_ERR15
+  PCRE2_ERROR_UTF8_ERR16
+  PCRE2_ERROR_UTF8_ERR17
+  PCRE2_ERROR_UTF8_ERR18
+  PCRE2_ERROR_UTF8_ERR19
+
+A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a +value that can be represented by fewer bytes, which is invalid. For example, +the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just +one byte. +
+  PCRE2_ERROR_UTF8_ERR20
+
+The two most significant bits of the first byte of a character have the binary +value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a +byte can only validly occur as the second or subsequent byte of a multi-byte +character. +
+  PCRE2_ERROR_UTF8_ERR21
+
+The first byte of a character has the value 0xfe or 0xff. These values can +never occur in a valid UTF-8 string. +

+
+Errors in UTF-16 strings +
+

+The following negative error codes are given for invalid UTF-16 strings: +

+  PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at end of string
+  PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate follows high surrogate
+  PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
+
+
+

+
+Errors in UTF-32 strings +
+

+The following negative error codes are given for invalid UTF-32 strings: +

+  PCRE2_ERROR_UTF32_ERR1  Surrogate character (0xd800 to 0xdfff)
+  PCRE2_ERROR_UTF32_ERR2  Code point is greater than 0x10ffff
+
+
+

+
+MATCHING IN INVALID UTF STRINGS +
+

+You can run pattern matches on subject strings that may contain invalid UTF +sequences if you call pcre2_compile() with the PCRE2_MATCH_INVALID_UTF +option. This is supported by pcre2_match(), including JIT matching, but +not by pcre2_dfa_match(). When PCRE2_MATCH_INVALID_UTF is set, it forces +PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a +valid UTF string. +

+

+If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, and +you are not certain that your subject strings are valid UTF sequences, you +should not make use of the JIT "fast path" function pcre2_jit_match() +because it bypasses sanity checks, including the one for UTF validity. An +invalid string may cause undefined behaviour, including looping, crashing, or +giving the wrong answer. +

+

+Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() +generates, but if pcre2_jit_compile() is subsequently called, it does +generate different code. If JIT is not used, the option affects the behaviour +of the interpretive code in pcre2_match(). When PCRE2_MATCH_INVALID_UTF +is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time. +

+

+In this mode, an invalid code unit sequence in the subject never matches any +pattern item. It does not match dot, it does not match \p{Any}, it does not +even match negative items such as [^X]. A lookbehind assertion fails if it +encounters an invalid sequence while moving the current point backwards. In +other words, an invalid UTF code unit sequence acts as a barrier which no match +can cross. +

+

+You can also think of this as the subject being split up into fragments of +valid UTF, delimited internally by invalid code unit sequences. The pattern is +matched fragment by fragment. The result of a successful match, however, is +given as code unit offsets in the entire subject string in the usual way. There +are a few points to consider: +

+

+The internal boundaries are not interpreted as the beginnings or ends of lines +and so do not match circumflex or dollar characters in the pattern. +

+

+If pcre2_match() is called with an offset that points to an invalid +UTF-sequence, that sequence is skipped, and the match starts at the next valid +UTF character, or the end of the subject. +

+

+At internal fragment boundaries, \b and \B behave in the same way as at the +beginning and end of the subject. For example, a sequence such as \bWORD\b +would match an instance of WORD that is surrounded by invalid UTF code units. +

+

+Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary +data, knowing that any matched strings that are returned are valid UTF. This +can be useful when searching for UTF text in executable or other binary files. +

+

+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 12 October 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/doc/index.html.src b/doc/index.html.src new file mode 100644 index 0000000..e4dc786 --- /dev/null +++ b/doc/index.html.src @@ -0,0 +1,318 @@ + + + +PCRE2 specification + + +

Perl-compatible Regular Expressions (revised API: PCRE2)

+

+The HTML documentation for PCRE2 consists of a number of pages that are listed +below in alphabetical order. If you are new to PCRE2, please read the first one +first. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2  Introductory page
pcre2-config  Information about the installation configuration
pcre2api  PCRE2's native API
pcre2build  Building PCRE2
pcre2callout  The callout facility
pcre2compat  Compability with Perl
pcre2convert  Experimental foreign pattern conversion functions
pcre2demo  A demonstration C program that uses the PCRE2 library
pcre2grep  The pcre2grep command
pcre2jit  Discussion of the just-in-time optimization support
pcre2limits  Details of size and other limits
pcre2matching  Discussion of the two matching algorithms
pcre2partial  Using PCRE2 for partial matching
pcre2pattern  Specification of the regular expressions supported by PCRE2
pcre2perform  Some comments on performance
pcre2posix  The POSIX API to the PCRE2 8-bit library
pcre2sample  Discussion of the pcre2demo program
pcre2serialize  Serializing functions for saving precompiled patterns
pcre2syntax  Syntax quick-reference summary
pcre2test  The pcre2test command for testing PCRE2
pcre2unicode  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support
+ +

+There are also individual pages that summarize the interface for each function +in the library. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2_callout_enumerate  Enumerate callouts in a compiled pattern
pcre2_code_copy  Copy a compiled pattern
pcre2_code_copy_with_tables  Copy a compiled pattern and its character tables
pcre2_code_free  Free a compiled pattern
pcre2_compile  Compile a regular expression pattern
pcre2_compile_context_copy  Copy a compile context
pcre2_compile_context_create  Create a compile context
pcre2_compile_context_free  Free a compile context
pcre2_config  Show build-time configuration options
pcre2_convert_context_copy  Copy a convert context
pcre2_convert_context_create  Create a convert context
pcre2_convert_context_free  Free a convert context
pcre2_converted_pattern_free  Free converted foreign pattern
pcre2_dfa_match  Match a compiled pattern to a subject string + (DFA algorithm; not Perl compatible)
pcre2_general_context_copy  Copy a general context
pcre2_general_context_create  Create a general context
pcre2_general_context_free  Free a general context
pcre2_get_error_message  Get textual error message for error number
pcre2_get_mark  Get a (*MARK) name
pcre2_get_match_data_size  Get the size of a match data block
pcre2_get_ovector_count  Get the ovector count
pcre2_get_ovector_pointer  Get a pointer to the ovector
pcre2_get_startchar  Get the starting character offset
pcre2_jit_compile  Process a compiled pattern with the JIT compiler
pcre2_jit_free_unused_memory  Free unused JIT memory
pcre2_jit_match  Fast path interface to JIT matching
pcre2_jit_stack_assign  Assign stack for JIT matching
pcre2_jit_stack_create  Create a stack for JIT matching
pcre2_jit_stack_free  Free a JIT matching stack
pcre2_maketables  Build character tables in current locale
pcre2_maketables_free  Free character tables
pcre2_match  Match a compiled pattern to a subject string + (Perl compatible)
pcre2_match_context_copy  Copy a match context
pcre2_match_context_create  Create a match context
pcre2_match_context_free  Free a match context
pcre2_match_data_create  Create a match data block
pcre2_match_data_create_from_pattern  Create a match data block getting size from pattern
pcre2_match_data_free  Free a match data block
pcre2_pattern_convert  Experimental foreign pattern converter
pcre2_pattern_info  Extract information about a pattern
pcre2_serialize_decode  Decode serialized compiled patterns
pcre2_serialize_encode  Serialize compiled patterns for save/restore
pcre2_serialize_free  Free serialized compiled patterns
pcre2_serialize_get_number_of_codes  Get number of serialized compiled patterns
pcre2_set_bsr  Set \R convention
pcre2_set_callout  Set up a callout function
pcre2_set_character_tables  Set character tables
pcre2_set_compile_extra_options  Set compile time extra options
pcre2_set_compile_recursion_guard  Set up a compile recursion guard function
pcre2_set_depth_limit  Set the match backtracking depth limit
pcre2_set_glob_escape  Set glob escape character
pcre2_set_glob_separator  Set glob separator character
pcre2_set_heap_limit  Set the match backtracking heap limit
pcre2_set_match_limit  Set the match limit
pcre2_set_max_pattern_compiled_length  Set the maximum length of a compiled pattern
pcre2_set_max_pattern_length  Set the maximum length of a pattern
pcre2_set_max_varlookbehind  Set the maximum match length for a variable-length lookbehind
pcre2_set_newline  Set the newline convention
pcre2_set_offset_limit  Set the offset limit
pcre2_set_parens_nest_limit  Set the parentheses nesting limit
pcre2_set_recursion_limit  Obsolete: use pcre2_set_depth_limit
pcre2_set_recursion_memory_management  Obsolete function that (from 10.30 onwards) does nothing
pcre2_substitute  Match a compiled pattern to a subject string and do + substitutions
pcre2_substring_copy_byname  Extract named substring into given buffer
pcre2_substring_copy_bynumber  Extract numbered substring into given buffer
pcre2_substring_free  Free extracted substring
pcre2_substring_get_byname  Extract named substring into new memory
pcre2_substring_get_bynumber  Extract numbered substring into new memory
pcre2_substring_length_byname  Find length of named substring
pcre2_substring_length_bynumber  Find length of numbered substring
pcre2_substring_list_free  Free list of extracted substrings
pcre2_substring_list_get  Extract all substrings into new memory
pcre2_substring_nametable_scan  Find table entries for given string name
pcre2_substring_number_from_name  Convert captured string name to number
+ + + diff --git a/doc/pcre2-config.1 b/doc/pcre2-config.1 new file mode 100644 index 0000000..7fa0a09 --- /dev/null +++ b/doc/pcre2-config.1 @@ -0,0 +1,86 @@ +.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.00" +.SH NAME +pcre2-config - program to return PCRE2 configuration +.SH SYNOPSIS +.rs +.sp +.nf +.B pcre2-config [--prefix] [--exec-prefix] [--version] +.B " [--libs8] [--libs16] [--libs32] [--libs-posix]" +.B " [--cflags] [--cflags-posix]" +.fi +. +. +.SH DESCRIPTION +.rs +.sp +\fBpcre2-config\fP returns the configuration of the installed PCRE2 libraries +and the options required to compile a program to use them. Some of the options +apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are +not available for libraries that have not been built. If an unavailable option +is encountered, the "usage" information is output. +. +. +.SH OPTIONS +.rs +.TP 10 +\fB--prefix\fP +Writes the directory prefix used in the PCRE2 installation for architecture +independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some +systems) to the standard output. +.TP 10 +\fB--exec-prefix\fP +Writes the directory prefix used in the PCRE2 installation for architecture +dependent files (normally the same as \fB--prefix\fP) to the standard output. +.TP 10 +\fB--version\fP +Writes the version number of the installed PCRE2 libraries to the standard +output. +.TP 10 +\fB--libs8\fP +Writes to the standard output the command line options required to link +with the 8-bit PCRE2 library (\fB-lpcre2-8\fP on many systems). +.TP 10 +\fB--libs16\fP +Writes to the standard output the command line options required to link +with the 16-bit PCRE2 library (\fB-lpcre2-16\fP on many systems). +.TP 10 +\fB--libs32\fP +Writes to the standard output the command line options required to link +with the 32-bit PCRE2 library (\fB-lpcre2-32\fP on many systems). +.TP 10 +\fB--libs-posix\fP +Writes to the standard output the command line options required to link with +PCRE2's POSIX API wrapper library (\fB-lpcre2-posix\fP \fB-lpcre2-8\fP on many +systems). +.TP 10 +\fB--cflags\fP +Writes to the standard output the command line options required to compile +files that use PCRE2 (this may include some \fB-I\fP options, but is blank on +many systems). +.TP 10 +\fB--cflags-posix\fP +Writes to the standard output the command line options required to compile +files that use PCRE2's POSIX API wrapper library (this may include some +\fB-I\fP options, but is blank on many systems). +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2(3)\fP +. +. +.SH AUTHOR +.rs +.sp +This manual page was originally written by Mark Baker for the Debian GNU/Linux +system. It has been subsequently revised as a generic PCRE2 man page. +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 28 September 2014 +.fi diff --git a/doc/pcre2-config.txt b/doc/pcre2-config.txt new file mode 100644 index 0000000..dc8cf8f --- /dev/null +++ b/doc/pcre2-config.txt @@ -0,0 +1,85 @@ + +PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1) + + +NAME + pcre2-config - program to return PCRE2 configuration + + +SYNOPSIS + + pcre2-config [--prefix] [--exec-prefix] [--version] + [--libs8] [--libs16] [--libs32] [--libs-posix] + [--cflags] [--cflags-posix] + + +DESCRIPTION + + pcre2-config returns the configuration of the installed PCRE2 libraries + and the options required to compile a program to use them. Some of the + options apply only to the 8-bit, or 16-bit, or 32-bit libraries, re- + spectively, and are not available for libraries that have not been + built. If an unavailable option is encountered, the "usage" information + is output. + + +OPTIONS + + --prefix Writes the directory prefix used in the PCRE2 installation + for architecture independent files (/usr on many systems, + /usr/local on some systems) to the standard output. + + --exec-prefix + Writes the directory prefix used in the PCRE2 installation + for architecture dependent files (normally the same as --pre- + fix) to the standard output. + + --version Writes the version number of the installed PCRE2 libraries to + the standard output. + + --libs8 Writes to the standard output the command line options re- + quired to link with the 8-bit PCRE2 library (-lpcre2-8 on + many systems). + + --libs16 Writes to the standard output the command line options re- + quired to link with the 16-bit PCRE2 library (-lpcre2-16 on + many systems). + + --libs32 Writes to the standard output the command line options re- + quired to link with the 32-bit PCRE2 library (-lpcre2-32 on + many systems). + + --libs-posix + Writes to the standard output the command line options re- + quired to link with PCRE2's POSIX API wrapper library + (-lpcre2-posix -lpcre2-8 on many systems). + + --cflags Writes to the standard output the command line options re- + quired to compile files that use PCRE2 (this may include some + -I options, but is blank on many systems). + + --cflags-posix + Writes to the standard output the command line options re- + quired to compile files that use PCRE2's POSIX API wrapper + library (this may include some -I options, but is blank on + many systems). + + +SEE ALSO + + pcre2(3) + + +AUTHOR + + This manual page was originally written by Mark Baker for the Debian + GNU/Linux system. It has been subsequently revised as a generic PCRE2 + man page. + + +REVISION + + Last updated: 28 September 2014 + + +PCRE2 10.00 28 September 2014 PCRE2-CONFIG(1) diff --git a/doc/pcre2.3 b/doc/pcre2.3 new file mode 100644 index 0000000..fa91450 --- /dev/null +++ b/doc/pcre2.3 @@ -0,0 +1,208 @@ +.TH PCRE2 3 "27 August 2021" "PCRE2 10.38" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH INTRODUCTION +.rs +.sp +PCRE2 is the name used for a revised API for the PCRE library, which is a set +of functions, written in C, that implement regular expression pattern matching +using the same syntax and semantics as Perl, with just a few differences. After +nearly two decades, the limitations of the original API were making development +increasingly difficult. The new API is more extensible, and it was simplified +by abolishing the separate "study" optimizing function; in PCRE2, patterns are +automatically optimized where possible. Since forking from PCRE1, the code has +been extensively refactored and new features introduced. The old library is now +obsolete and is no longer maintained. +.P +As well as Perl-style regular expression patterns, some features that appeared +in Python and the original PCRE before they appeared in Perl are available +using the Python syntax. There is also some support for one or two .NET and +Oniguruma syntax items, and there are options for requesting some minor changes +that give better ECMAScript (aka JavaScript) compatibility. +.P +The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit, +or 32-bit code units, which means that up to three separate libraries may be +installed, one for each code unit size. The size of code unit is not related to +the bit size of the underlying hardware. In a 64-bit environment that also +supports 32-bit applications, versions of PCRE2 that are compiled in both +64-bit and 32-bit modes may be needed. +.P +The original work to extend PCRE to 16-bit and 32-bit code units was done by +Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings +can be interpreted either as one character per code unit, or as UTF-encoded +Unicode, with support for Unicode general category properties. Unicode support +is optional at build time (but is the default). However, processing strings as +UTF code units must be enabled explicitly at run time. The version of Unicode +in use can be discovered by running +.sp + pcre2test -C +.P +The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +one code unit width can be written using generic names such as +\fBpcre2_compile()\fP, and the documentation is written assuming that this is +the case. +.P +In addition to the Perl-compatible matching function, PCRE2 contains an +alternative function that matches the same compiled patterns in a different +way. In certain circumstances, the alternative function has some advantages. +For a discussion of the two matching algorithms, see the +.\" HREF +\fBpcre2matching\fP +.\" +page. +.P +Details of exactly which Perl regular expression features are and are not +supported by PCRE2 are given in separate documents. See the +.\" HREF +\fBpcre2pattern\fP +.\" +and +.\" HREF +\fBpcre2compat\fP +.\" +pages. There is a syntax summary in the +.\" HREF +\fBpcre2syntax\fP +.\" +page. +.P +Some features of PCRE2 can be included, excluded, or changed when the library +is built. The +.\" HREF +\fBpcre2_config()\fP +.\" +function makes it possible for a client to discover which features are +available. The features themselves are described in the +.\" HREF +\fBpcre2build\fP +.\" +page. Documentation about building PCRE2 for various operating systems can be +found in the +.\" HTML +.\" +\fBREADME\fP +.\" +and +.\" HTML +.\" +\fBNON-AUTOTOOLS_BUILD\fP +.\" +files in the source distribution. +.P +The libraries contains a number of undocumented internal functions and data +tables that are used by more than one of the exported external functions, but +which are not intended for use by external callers. Their names all begin with +"_pcre2", which hopefully will not provoke any name clashes. In some +environments, it is possible to control which external symbols are exported +when a shared library is built, and in these cases the undocumented symbols are +not exported. +. +. +.SH "SECURITY CONSIDERATIONS" +.rs +.sp +If you are using PCRE2 in a non-UTF application that permits users to supply +arbitrary patterns for compilation, you should be aware of a feature that +allows users to turn on UTF support from within a pattern. For example, an +8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets +patterns and subjects as strings of UTF-8 code units instead of individual +8-bit characters. This causes both the pattern and any data against which it is +matched to be checked for UTF-8 validity. If the data string is very long, such +a check might use sufficiently many resources as to cause your application to +lose performance. +.P +One way of guarding against this possibility is to use the +\fBpcre2_pattern_info()\fP function to check the compiled pattern's options for +PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling +\fBpcre2_compile()\fP. This causes a compile time error if the pattern contains +a UTF-setting sequence. +.P +The use of Unicode properties for character types such as \ed can also be +enabled from within the pattern, by specifying "(*UCP)". This feature can be +disallowed by setting the PCRE2_NEVER_UCP option. +.P +If your application is one that supports UTF, be aware that validity checking +can take time. If the same data string is to be matched many times, you can use +the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid +running redundant checks. +.P +The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to +problems, because it may leave the current matching point in the middle of a +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \eC, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \eC permanently +disabled. +.P +Another way that performance can be hit is by running a pattern that has a very +large search tree against a string that will never match. Nested unlimited +repeats in a pattern are a common example. PCRE2 provides some protection +against this: see the \fBpcre2_set_match_limit()\fP function in the +.\" HREF +\fBpcre2api\fP +.\" +page. There is a similar function called \fBpcre2_set_depth_limit()\fP that can +be used to restrict the amount of memory that is used. +. +. +.SH "USER DOCUMENTATION" +.rs +.sp +The user documentation for PCRE2 comprises a number of different sections. In +the "man" format, each of these is a separate "man page". In the HTML format, +each is a separate page, linked from the index page. In the plain text format, +the descriptions of the \fBpcre2grep\fP and \fBpcre2test\fP programs are in +files called \fBpcre2grep.txt\fP and \fBpcre2test.txt\fP, respectively. The +remaining sections, except for the \fBpcre2demo\fP section (which is a program +listing), and the short pages for individual functions, are concatenated in +\fBpcre2.txt\fP, for ease of searching. The sections are as follows: +.sp + pcre2 this document + pcre2-config show PCRE2 installation configuration information + pcre2api details of PCRE2's native C API + pcre2build building PCRE2 + pcre2callout details of the pattern callout feature + pcre2compat discussion of Perl compatibility + pcre2convert details of pattern conversion functions + pcre2demo a demonstration C program that uses PCRE2 + pcre2grep description of the \fBpcre2grep\fP command (8-bit only) + pcre2jit discussion of just-in-time optimization support + pcre2limits details of size and other limits + pcre2matching discussion of the two matching algorithms + pcre2partial details of the partial matching facility +.\" JOIN + pcre2pattern syntax and semantics of supported regular + expression patterns + pcre2perform discussion of performance issues + pcre2posix the POSIX-compatible C API for the 8-bit library + pcre2sample discussion of the pcre2demo program + pcre2serialize details of pattern serialization + pcre2syntax quick syntax reference + pcre2test description of the \fBpcre2test\fP command + pcre2unicode discussion of Unicode and UTF support +.sp +In the "man" and HTML formats, there is also a short page for each C library +function, listing its arguments and results. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +.P +Putting an actual email address here is a spam magnet. If you want to email me, +use my two names separated by a dot at gmail.com. +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 August 2021 +Copyright (c) 1997-2021 University of Cambridge. +.fi diff --git a/doc/pcre2.txt b/doc/pcre2.txt new file mode 100644 index 0000000..85eead6 --- /dev/null +++ b/doc/pcre2.txt @@ -0,0 +1,11980 @@ +----------------------------------------------------------------------------- +This file contains a concatenation of the PCRE2 man pages, converted to plain +text format for ease of searching with a text editor, or for use on systems +that do not have a man page processor. The small individual files that give +synopses of each function in the library have not been included. Neither has +the pcre2demo program. There are separate text files for the pcre2grep and +pcre2test commands. +----------------------------------------------------------------------------- + + + +PCRE2(3) Library Functions Manual PCRE2(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +INTRODUCTION + + PCRE2 is the name used for a revised API for the PCRE library, which is + a set of functions, written in C, that implement regular expression + pattern matching using the same syntax and semantics as Perl, with just + a few differences. After nearly two decades, the limitations of the + original API were making development increasingly difficult. The new + API is more extensible, and it was simplified by abolishing the sepa- + rate "study" optimizing function; in PCRE2, patterns are automatically + optimized where possible. Since forking from PCRE1, the code has been + extensively refactored and new features introduced. The old library is + now obsolete and is no longer maintained. + + As well as Perl-style regular expression patterns, some features that + appeared in Python and the original PCRE before they appeared in Perl + are available using the Python syntax. There is also some support for + one or two .NET and Oniguruma syntax items, and there are options for + requesting some minor changes that give better ECMAScript (aka + JavaScript) compatibility. + + The source code for PCRE2 can be compiled to support strings of 8-bit, + 16-bit, or 32-bit code units, which means that up to three separate li- + braries may be installed, one for each code unit size. The size of code + unit is not related to the bit size of the underlying hardware. In a + 64-bit environment that also supports 32-bit applications, versions of + PCRE2 that are compiled in both 64-bit and 32-bit modes may be needed. + + The original work to extend PCRE to 16-bit and 32-bit code units was + done by Zoltan Herczeg and Christian Persch, respectively. In all three + cases, strings can be interpreted either as one character per code + unit, or as UTF-encoded Unicode, with support for Unicode general cate- + gory properties. Unicode support is optional at build time (but is the + default). However, processing strings as UTF code units must be enabled + explicitly at run time. The version of Unicode in use can be discovered + by running + + pcre2test -C + + The three libraries contain identical sets of functions, with names + ending in _8, _16, or _32, respectively (for example, pcre2_com- + pile_8()). However, by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or + 32, a program that uses just one code unit width can be written using + generic names such as pcre2_compile(), and the documentation is written + assuming that this is the case. + + In addition to the Perl-compatible matching function, PCRE2 contains an + alternative function that matches the same compiled patterns in a dif- + ferent way. In certain circumstances, the alternative function has some + advantages. For a discussion of the two matching algorithms, see the + pcre2matching page. + + Details of exactly which Perl regular expression features are and are + not supported by PCRE2 are given in separate documents. See the + pcre2pattern and pcre2compat pages. There is a syntax summary in the + pcre2syntax page. + + Some features of PCRE2 can be included, excluded, or changed when the + library is built. The pcre2_config() function makes it possible for a + client to discover which features are available. The features them- + selves are described in the pcre2build page. Documentation about build- + ing PCRE2 for various operating systems can be found in the README and + NON-AUTOTOOLS_BUILD files in the source distribution. + + The libraries contains a number of undocumented internal functions and + data tables that are used by more than one of the exported external + functions, but which are not intended for use by external callers. + Their names all begin with "_pcre2", which hopefully will not provoke + any name clashes. In some environments, it is possible to control which + external symbols are exported when a shared library is built, and in + these cases the undocumented symbols are not exported. + + +SECURITY CONSIDERATIONS + + If you are using PCRE2 in a non-UTF application that permits users to + supply arbitrary patterns for compilation, you should be aware of a + feature that allows users to turn on UTF support from within a pattern. + For example, an 8-bit pattern that begins with "(*UTF)" turns on UTF-8 + mode, which interprets patterns and subjects as strings of UTF-8 code + units instead of individual 8-bit characters. This causes both the pat- + tern and any data against which it is matched to be checked for UTF-8 + validity. If the data string is very long, such a check might use suf- + ficiently many resources as to cause your application to lose perfor- + mance. + + One way of guarding against this possibility is to use the pcre2_pat- + tern_info() function to check the compiled pattern's options for + PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when + calling pcre2_compile(). This causes a compile time error if the pat- + tern contains a UTF-setting sequence. + + The use of Unicode properties for character types such as \d can also + be enabled from within the pattern, by specifying "(*UCP)". This fea- + ture can be disallowed by setting the PCRE2_NEVER_UCP option. + + If your application is one that supports UTF, be aware that validity + checking can take time. If the same data string is to be matched many + times, you can use the PCRE2_NO_UTF_CHECK option for the second and + subsequent matches to avoid running redundant checks. + + The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead + to problems, because it may leave the current matching point in the + middle of a multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C op- + tion can be used by an application to lock out the use of \C, causing a + compile-time error if it is encountered. It is also possible to build + PCRE2 with the use of \C permanently disabled. + + Another way that performance can be hit is by running a pattern that + has a very large search tree against a string that will never match. + Nested unlimited repeats in a pattern are a common example. PCRE2 pro- + vides some protection against this: see the pcre2_set_match_limit() + function in the pcre2api page. There is a similar function called + pcre2_set_depth_limit() that can be used to restrict the amount of mem- + ory that is used. + + +USER DOCUMENTATION + + The user documentation for PCRE2 comprises a number of different sec- + tions. In the "man" format, each of these is a separate "man page". In + the HTML format, each is a separate page, linked from the index page. + In the plain text format, the descriptions of the pcre2grep and + pcre2test programs are in files called pcre2grep.txt and pcre2test.txt, + respectively. The remaining sections, except for the pcre2demo section + (which is a program listing), and the short pages for individual func- + tions, are concatenated in pcre2.txt, for ease of searching. The sec- + tions are as follows: + + pcre2 this document + pcre2-config show PCRE2 installation configuration information + pcre2api details of PCRE2's native C API + pcre2build building PCRE2 + pcre2callout details of the pattern callout feature + pcre2compat discussion of Perl compatibility + pcre2convert details of pattern conversion functions + pcre2demo a demonstration C program that uses PCRE2 + pcre2grep description of the pcre2grep command (8-bit only) + pcre2jit discussion of just-in-time optimization support + pcre2limits details of size and other limits + pcre2matching discussion of the two matching algorithms + pcre2partial details of the partial matching facility + pcre2pattern syntax and semantics of supported regular + expression patterns + pcre2perform discussion of performance issues + pcre2posix the POSIX-compatible C API for the 8-bit library + pcre2sample discussion of the pcre2demo program + pcre2serialize details of pattern serialization + pcre2syntax quick syntax reference + pcre2test description of the pcre2test command + pcre2unicode discussion of Unicode and UTF support + + In the "man" and HTML formats, there is also a short page for each C + library function, listing its arguments and results. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + Putting an actual email address here is a spam magnet. If you want to + email me, use my two names separated by a dot at gmail.com. + + +REVISION + + Last updated: 27 August 2021 + Copyright (c) 1997-2021 University of Cambridge. + + +PCRE2 10.38 27 August 2021 PCRE2(3) +------------------------------------------------------------------------------ + + + +PCRE2API(3) Library Functions Manual PCRE2API(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + #include + + PCRE2 is a new API for PCRE, starting at release 10.0. This document + contains a description of all its native functions. See the pcre2 docu- + ment for an overview of all the PCRE2 documentation. + + +PCRE2 NATIVE API BASIC FUNCTIONS + + pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); + + void pcre2_code_free(pcre2_code *code); + + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); + + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); + + int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); + + void pcre2_match_data_free(pcre2_match_data *match_data); + + +PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS + + PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); + + uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); + + PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); + + +PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS + + pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); + + pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); + + void pcre2_general_context_free(pcre2_general_context *gcontext); + + +PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS + + pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); + + pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); + + void pcre2_compile_context_free(pcre2_compile_context *ccontext); + + int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); + + int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); + + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + + int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); + + int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, + uint32_t value); + + int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); + + +PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS + + pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); + + pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); + + void pcre2_match_context_free(pcre2_match_context *mcontext); + + int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + + int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); + + +PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS + + int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); + + int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); + + int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); + + int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); + + int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); + + int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); + + int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); + + void pcre2_substring_list_free(PCRE2_UCHAR **list); + + int pcre2_substring_list_get(pcre2_match_data *match_data, + PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); + + +PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION + + int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacementz, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); + + +PCRE2 NATIVE API JIT FUNCTIONS + + int pcre2_jit_compile(pcre2_code *code, uint32_t options); + + int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); + + void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); + + void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); + + +PCRE2 NATIVE API SERIALIZATION FUNCTIONS + + int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); + + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); + + void pcre2_serialize_free(uint8_t *bytes); + + int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); + + +PCRE2 NATIVE API AUXILIARY FUNCTIONS + + pcre2_code *pcre2_code_copy(const pcre2_code *code); + + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); + + const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + + int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + int pcre2_config(uint32_t what, void *where); + + +PCRE2 NATIVE API OBSOLETE FUNCTIONS + + int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); + + These functions became obsolete at release 10.30 and are retained only + for backward compatibility. They should not be used in new code. The + first is replaced by pcre2_set_depth_limit(); the second is no longer + needed and has no effect (it always returns zero). + + +PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS + + pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); + + pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); + + void pcre2_convert_context_free(pcre2_convert_context *cvcontext); + + int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); + + int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); + + int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); + + void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); + + These functions provide a way of converting non-PCRE2 patterns into + patterns that can be processed by pcre2_compile(). This facility is ex- + perimental and may be changed in future releases. At present, "globs" + and POSIX basic and extended patterns can be converted. Details are + given in the pcre2convert documentation. + + +PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES + + There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit + code units, respectively. However, there is just one header file, + pcre2.h. This contains the function prototypes and other definitions + for all three libraries. One, two, or all three can be installed simul- + taneously. On Unix-like systems the libraries are called libpcre2-8, + libpcre2-16, and libpcre2-32, and they can also co-exist with the orig- + inal PCRE libraries. Every PCRE2 function comes in three different + forms, one for each library, for example: + + pcre2_compile_8() + pcre2_compile_16() + pcre2_compile_32() + + There are also three different sets of data types: + + PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32 + PCRE2_SPTR8, PCRE2_SPTR16, PCRE2_SPTR32 + + The UCHAR types define unsigned code units of the appropriate widths. + For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR + types are pointers to constants of the equivalent UCHAR types, that is, + they are pointers to vectors of unsigned code units. + + Character strings are passed to a PCRE2 library as sequences of un- + signed integers in code units of the appropriate width. The length of a + string may be given as a number of code units, or the string may be + specified as zero-terminated. + + Many applications use only one code unit width. For their convenience, + macros are defined whose names are the generic forms such as pcre2_com- + pile() and PCRE2_SPTR. These macros use the value of the macro + PCRE2_CODE_UNIT_WIDTH to generate the appropriate width-specific func- + tion and macro names. PCRE2_CODE_UNIT_WIDTH is not defined by default. + An application must define it to be 8, 16, or 32 before including + pcre2.h in order to make use of the generic names. + + Applications that use more than one code unit width can be linked with + more than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to + be 0 before including pcre2.h, and then use the real function names. + Any code that is to be included in an environment where the value of + PCRE2_CODE_UNIT_WIDTH is unknown should also use the real function + names. (Unfortunately, it is not possible in C code to save and restore + the value of a macro.) + + If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a + compiler error occurs. + + When using multiple libraries in an application, you must take care + when processing any particular pattern to use only functions from a + single library. For example, if you want to run a match using a pat- + tern that was compiled with pcre2_compile_16(), you must do so with + pcre2_match_16(), not pcre2_match_8() or pcre2_match_32(). + + In the function summaries above, and in the rest of this document and + other PCRE2 documents, functions and data types are described using + their generic names, without the _8, _16, or _32 suffix. + + +PCRE2 API OVERVIEW + + PCRE2 has its own native API, which is described in this document. + There are also some wrapper functions for the 8-bit library that corre- + spond to the POSIX regular expression API, but they do not give access + to all the functionality of PCRE2 and they are not thread-safe. They + are described in the pcre2posix documentation. Both these APIs define a + set of C function calls. + + The native API C data types, function prototypes, option values, and + error codes are defined in the header file pcre2.h, which also contains + definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release + numbers for the library. Applications can use these to include support + for different releases of PCRE2. + + In a Windows environment, if you want to statically link an application + program against a non-dll PCRE2 library, you must define PCRE2_STATIC + before including pcre2.h. + + The functions pcre2_compile() and pcre2_match() are used for compiling + and matching regular expressions in a Perl-compatible manner. A sample + program that demonstrates the simplest way of using them is provided in + the file called pcre2demo.c in the PCRE2 source distribution. A listing + of this program is given in the pcre2demo documentation, and the + pcre2sample documentation describes how to compile and run it. + + The compiling and matching functions recognize various options that are + passed as bits in an options argument. There are also some more compli- + cated parameters such as custom memory management functions and re- + source limits that are passed in "contexts" (which are just memory + blocks, described below). Simple applications do not need to make use + of contexts. + + Just-in-time (JIT) compiler support is an optional feature of PCRE2 + that can be built in appropriate hardware environments. It greatly + speeds up the matching performance of many patterns. Programs can re- + quest that it be used if available by calling pcre2_jit_compile() after + a pattern has been successfully compiled by pcre2_compile(). This does + nothing if JIT support is not available. + + More complicated programs might need to make use of the specialist + functions pcre2_jit_stack_create(), pcre2_jit_stack_free(), and + pcre2_jit_stack_assign() in order to control the JIT code's memory us- + age. + + JIT matching is automatically used by pcre2_match() if it is available, + unless the PCRE2_NO_JIT option is set. There is also a direct interface + for JIT matching, which gives improved performance at the expense of + less sanity checking. The JIT-specific functions are discussed in the + pcre2jit documentation. + + A second matching function, pcre2_dfa_match(), which is not Perl-com- + patible, is also provided. This uses a different algorithm for the + matching. The alternative algorithm finds all possible matches (at a + given point in the subject), and scans the subject just once (unless + there are lookaround assertions). However, this algorithm does not re- + turn captured substrings. A description of the two matching algorithms + and their advantages and disadvantages is given in the pcre2matching + documentation. There is no JIT support for pcre2_dfa_match(). + + In addition to the main compiling and matching functions, there are + convenience functions for extracting captured substrings from a subject + string that has been matched by pcre2_match(). They are: + + pcre2_substring_copy_byname() + pcre2_substring_copy_bynumber() + pcre2_substring_get_byname() + pcre2_substring_get_bynumber() + pcre2_substring_list_get() + pcre2_substring_length_byname() + pcre2_substring_length_bynumber() + pcre2_substring_nametable_scan() + pcre2_substring_number_from_name() + + pcre2_substring_free() and pcre2_substring_list_free() are also pro- + vided, to free memory used for extracted strings. If either of these + functions is called with a NULL argument, the function returns immedi- + ately without doing anything. + + The function pcre2_substitute() can be called to match a pattern and + return a copy of the subject string with substitutions for parts that + were matched. + + Functions whose names begin with pcre2_serialize_ are used for saving + compiled patterns on disc or elsewhere, and reloading them later. + + Finally, there are functions for finding out information about a com- + piled pattern (pcre2_pattern_info()) and about the configuration with + which PCRE2 was built (pcre2_config()). + + Functions with names ending with _free() are used for freeing memory + blocks of various sorts. In all cases, if one of these functions is + called with a NULL argument, it does nothing. + + +STRING LENGTHS AND OFFSETS + + The PCRE2 API uses string lengths and offsets into strings of code + units in several places. These values are always of type PCRE2_SIZE, + which is an unsigned integer type, currently always defined as size_t. + The largest value that can be stored in such a type (that is + ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated + strings and unset offsets. Therefore, the longest string that can be + handled is one less than this maximum. Note that string lengths are al- + ways given in code units. Only in the 8-bit library is such a length + the same as the number of bytes in the string. + + +NEWLINES + + PCRE2 supports five different conventions for indicating line breaks in + strings: a single CR (carriage return) character, a single LF (line- + feed) character, the two-character sequence CRLF, any of the three pre- + ceding, or any Unicode newline sequence. The Unicode newline sequences + are the three just mentioned, plus the single characters VT (vertical + tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line + separator, U+2028), and PS (paragraph separator, U+2029). + + Each of the first three conventions is used by at least one operating + system as its standard newline sequence. When PCRE2 is built, a default + can be specified. If it is not, the default is set to LF, which is the + Unix standard. However, the newline convention can be changed by an ap- + plication when calling pcre2_compile(), or it can be specified by spe- + cial text at the start of the pattern itself; this overrides any other + settings. See the pcre2pattern page for details of the special charac- + ter sequences. + + In the PCRE2 documentation the word "newline" is used to mean "the + character or pair of characters that indicate a line break". The choice + of newline convention affects the handling of the dot, circumflex, and + dollar metacharacters, the handling of #-comments in /x mode, and, when + CRLF is a recognized line ending sequence, the match position advance- + ment for a non-anchored pattern. There is more detail about this in the + section on pcre2_match() options below. + + The choice of newline convention does not affect the interpretation of + the \n or \r escape sequences, nor does it affect what \R matches; this + has its own separate convention. + + +MULTITHREADING + + In a multithreaded application it is important to keep thread-specific + data separate from data that can be shared between threads. The PCRE2 + library code itself is thread-safe: it contains no static or global + variables. The API is designed to be fairly simple for non-threaded ap- + plications while at the same time ensuring that multithreaded applica- + tions can use it. + + There are several different blocks of data that are used to pass infor- + mation between the application and the PCRE2 libraries. + + The compiled pattern + + A pointer to the compiled form of a pattern is returned to the user + when pcre2_compile() is successful. The data in the compiled pattern is + fixed, and does not change when the pattern is matched. Therefore, it + is thread-safe, that is, the same compiled pattern can be used by more + than one thread simultaneously. For example, an application can compile + all its patterns at the start, before forking off multiple threads that + use them. However, if the just-in-time (JIT) optimization feature is + being used, it needs separate memory stack areas for each thread. See + the pcre2jit documentation for more details. + + In a more complicated situation, where patterns are compiled only when + they are first needed, but are still shared between threads, pointers + to compiled patterns must be protected from simultaneous writing by + multiple threads. This is somewhat tricky to do correctly. If you know + that writing to a pointer is atomic in your environment, you can use + logic like this: + + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + if (pointer == NULL) pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() + + Of course, testing for compilation errors should also be included in + the code. + + The reason for checking the pointer a second time is as follows: Sev- + eral threads may have acquired the shared lock and tested the pointer + for being NULL, but only one of them will be given the write lock, with + the rest kept waiting. The winning thread will compile the pattern and + store the result. After this thread releases the write lock, another + thread will get it, and if it does not retest pointer for being NULL, + will recompile the pattern and overwrite the pointer, creating a memory + leak and possibly causing other issues. + + In an environment where writing to a pointer may not be atomic, the + above logic is not sufficient. The thread that is doing the compiling + may be descheduled after writing only part of the pointer, which could + cause other threads to use an invalid value. Instead of checking the + pointer itself, a separate "pointer is valid" flag (that can be updated + atomically) must be used: + + Get a read-only (shared) lock (mutex) for pointer + if (!pointer_is_valid) + { + Get a write (unique) lock for pointer + if (!pointer_is_valid) + { + pointer = pcre2_compile(... + pointer_is_valid = TRUE + } + } + Release the lock + Use pointer in pcre2_match() + + If JIT is being used, but the JIT compilation is not being done immedi- + ately (perhaps waiting to see if the pattern is used often enough), + similar logic is required. JIT compilation updates a value within the + compiled code block, so a thread must gain unique write access to the + pointer before calling pcre2_jit_compile(). Alternatively, + pcre2_code_copy() or pcre2_code_copy_with_tables() can be used to ob- + tain a private copy of the compiled code before calling the JIT com- + piler. + + Context blocks + + The next main section below introduces the idea of "contexts" in which + PCRE2 functions are called. A context is nothing more than a collection + of parameters that control the way PCRE2 operates. Grouping a number of + parameters together in a context is a convenient way of passing them to + a PCRE2 function without using lots of arguments. The parameters that + are stored in contexts are in some sense "advanced features" of the + API. Many straightforward applications will not need to use contexts. + + In a multithreaded application, if the parameters in a context are val- + ues that are never changed, the same context can be used by all the + threads. However, if any thread needs to change any value in a context, + it must make its own thread-specific copy. + + Match blocks + + The matching functions need a block of memory for storing the results + of a match. This includes details of what was matched, as well as addi- + tional information such as the name of a (*MARK) setting. Each thread + must provide its own copy of this memory. + + +PCRE2 CONTEXTS + + Some PCRE2 functions have a lot of parameters, many of which are used + only by specialist applications, for example, those that use custom + memory management or non-standard character tables. To keep function + argument lists at a reasonable size, and at the same time to keep the + API extensible, "uncommon" parameters are passed to certain functions + in a context instead of directly. A context is just a block of memory + that holds the parameter values. Applications that do not need to ad- + just any of the context parameters can pass NULL when a context pointer + is required. + + There are three different types of context: a general context that is + relevant for several PCRE2 operations, a compile-time context, and a + match-time context. + + The general context + + At present, this context just contains pointers to (and data for) ex- + ternal memory management functions that are called from several places + in the PCRE2 library. The context is named `general' rather than + specifically `memory' because in future other fields may be added. If + you do not want to supply your own custom memory management functions, + you do not need to bother with a general context. A general context is + created by: + + pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); + + The two function pointers specify custom memory management functions, + whose prototypes are: + + void *private_malloc(PCRE2_SIZE, void *); + void private_free(void *, void *); + + Whenever code in PCRE2 calls these functions, the final argument is the + value of memory_data. Either of the first two arguments of the creation + function may be NULL, in which case the system memory management func- + tions malloc() and free() are used. (This is not currently useful, as + there are no other fields in a general context, but in future there + might be.) The private_malloc() function is used (if supplied) to ob- + tain memory for storing the context, and all three values are saved as + part of the context. + + Whenever PCRE2 creates a data block of any kind, the block contains a + pointer to the free() function that matches the malloc() function that + was used. When the time comes to free the block, this function is + called. + + A general context can be copied by calling: + + pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); + + The memory used for a general context should be freed by calling: + + void pcre2_general_context_free(pcre2_general_context *gcontext); + + If this function is passed a NULL argument, it returns immediately + without doing anything. + + The compile context + + A compile context is required if you want to provide an external func- + tion for stack checking during compilation or to change the default + values of any of the following compile-time parameters: + + What \R matches (Unicode newlines or CR, LF, CRLF only) + PCRE2's character tables + The newline character sequence + The compile time nested parentheses limit + The maximum length of the pattern string + The extra options bits (none set by default) + + A compile context is also required if you are using custom memory man- + agement. If none of these apply, just pass NULL as the context argu- + ment of pcre2_compile(). + + A compile context is created, copied, and freed by the following func- + tions: + + pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); + + pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); + + void pcre2_compile_context_free(pcre2_compile_context *ccontext); + + A compile context is created with default values for its parameters. + These can be changed by calling the following functions, which return 0 + on success, or PCRE2_ERROR_BADDATA if invalid data is detected. + + int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); + + The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only + CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any + Unicode line ending sequence. The value is used by the JIT compiler and + by the two interpreted matching functions, pcre2_match() and + pcre2_dfa_match(). + + int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); + + The value must be the result of a call to pcre2_maketables(), whose + only argument is a general context. This function builds a set of char- + acter tables in the current locale. + + int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); + + As PCRE2 has developed, almost all the 32 option bits that are avail- + able in the options argument of pcre2_compile() have been used up. To + avoid running out, the compile context contains a set of extra option + bits which are used for some newer, assumed rarer, options. This func- + tion sets those bits. It always sets all the bits (either on or off). + It does not modify any existing setting. The available options are de- + fined in the section entitled "Extra compile options" below. + + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + + This sets a maximum length, in code units, for any pattern string that + is compiled with this context. If the pattern is longer, an error is + generated. This facility is provided so that applications that accept + patterns from external sources can limit their size. The default is the + largest number that a PCRE2_SIZE variable can hold, which is effec- + tively unlimited. + + int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); + + This sets a maximum size, in bytes, for the memory needed to hold the + compiled version of a pattern that is compiled with this context. If + the pattern needs more memory, an error is generated. This facility is + provided so that applications that accept patterns from external + sources can limit the amount of memory they use. The default is the + largest number that a PCRE2_SIZE variable can hold, which is effec- + tively unlimited. + + int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, + uint32_t value); + + This sets a maximum length for the number of characters matched by a + variable-length lookbehind assertion. The default is set when PCRE2 is + built, with the ultimate default being 255, the same as Perl. Lookbe- + hind assertions without a bounding length are not supported. + + int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); + + This specifies which characters or character sequences are to be recog- + nized as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage + return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the + two-character sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any + of the above), PCRE2_NEWLINE_ANY (any Unicode newline sequence), or + PCRE2_NEWLINE_NUL (the NUL character, that is a binary zero). + + A pattern can override the value set in the compile context by starting + with a sequence such as (*CRLF). See the pcre2pattern page for details. + + When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EX- + TENDED_MORE option, the newline convention affects the recognition of + the end of internal comments starting with #. The value is saved with + the compiled pattern for subsequent use by the JIT compiler and by the + two interpreted matching functions, pcre2_match() and + pcre2_dfa_match(). + + int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); + + This parameter adjusts the limit, set when PCRE2 is built (default + 250), on the depth of parenthesis nesting in a pattern. This limit + stops rogue patterns using up too much system stack when being com- + piled. The limit applies to parentheses of all kinds, not just captur- + ing parentheses. + + int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); + + There is at least one application that runs PCRE2 in threads with very + limited system stack, where running out of stack is to be avoided at + all costs. The parenthesis limit above cannot take account of how much + stack is actually available during compilation. For a finer control, + you can supply a function that is called whenever pcre2_compile() + starts to compile a parenthesized part of a pattern. This function can + check the actual stack size (or anything else that it wants to, of + course). + + The first argument to the callout function gives the current depth of + nesting, and the second is user data that is set up by the last argu- + ment of pcre2_set_compile_recursion_guard(). The callout function + should return zero if all is well, or non-zero to force an error. + + The match context + + A match context is required if you want to: + + Set up a callout function + Set an offset limit for matching an unanchored pattern + Change the limit on the amount of heap used when matching + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match + + If none of these apply, just pass NULL as the context argument of + pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match(). + + A match context is created, copied, and freed by the following func- + tions: + + pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); + + pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); + + void pcre2_match_context_free(pcre2_match_context *mcontext); + + A match context is created with default values for its parameters. + These can be changed by calling the following functions, which return 0 + on success, or PCRE2_ERROR_BADDATA if invalid data is detected. + + int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call at specified points + during a matching operation. Details are given in the pcre2callout doc- + umentation. + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call after each substitu- + tion made by pcre2_substitute(). Details are given in the section enti- + tled "Creating a new string with substitutions" below. + + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + + The offset_limit parameter limits how far an unanchored search can ad- + vance in the subject string. The default value is PCRE2_UNSET. The + pcre2_match() and pcre2_dfa_match() functions return PCRE2_ERROR_NO- + MATCH if a match with a starting point before or at the given offset is + not found. The pcre2_substitute() function makes no more substitutions. + + For example, if the pattern /abc/ is matched against "123abc" with an + offset limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match + can never be found if the startoffset argument of pcre2_match(), + pcre2_dfa_match(), or pcre2_substitute() is greater than the offset + limit set in the match context. + + When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT op- + tion when calling pcre2_compile() so that when JIT is in use, different + code can be compiled. If a match is started with a non-default match + limit when PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. + + The offset limit facility can be used to track progress when searching + large subject strings or to limit the extent of global substitutions. + See also the PCRE2_FIRSTLINE option, which requires a match to start + before or at the first newline that follows the start of matching in + the subject. If this is set with an offset limit, a match must occur in + the first line and also within the offset limit. In other words, + whichever limit comes first is used. + + int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); + + The heap_limit parameter specifies, in units of kibibytes (1024 bytes), + the maximum amount of heap memory that pcre2_match() may use to hold + backtracking information when running an interpretive match. This limit + also applies to pcre2_dfa_match(), which may use the heap when process- + ing patterns with a lot of nested pattern recursion or lookarounds or + atomic groups. This limit does not apply to matching with the JIT opti- + mization, which has its own memory control arrangements (see the + pcre2jit documentation for more details). If the limit is reached, the + negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default + limit can be set when PCRE2 is built; if it is not, the default is set + very large and is essentially unlimited. + + A value for the heap limit may also be supplied by an item at the start + of a pattern of the form + + (*LIMIT_HEAP=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or, + if no such limit is set, less than the default. + + The pcre2_match() function always needs some heap memory, so setting a + value of zero guarantees a "heap limit exceeded" error. Details of how + pcre2_match() uses the heap are given in the pcre2perform documenta- + tion. + + For pcre2_dfa_match(), a vector on the system stack is used when pro- + cessing pattern recursions, lookarounds, or atomic groups, and only if + this is not big enough is heap memory used. In this case, setting a + value of zero disables the use of the heap. + + int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); + + The match_limit parameter provides a means of preventing PCRE2 from us- + ing up too many computing resources when processing patterns that are + not going to match, but which have a very large number of possibilities + in their search trees. The classic example is a pattern that uses + nested unlimited repeats. + + There is an internal counter in pcre2_match() that is incremented each + time round its main matching loop. If this value reaches the match + limit, pcre2_match() returns the negative value PCRE2_ERROR_MATCHLIMIT. + This has the effect of limiting the amount of backtracking that can + take place. For patterns that are not anchored, the count restarts from + zero for each position in the subject string. This limit also applies + to pcre2_dfa_match(), though the counting is done in a different way. + + When pcre2_match() is called with a pattern that was successfully + processed by pcre2_jit_compile(), the way in which matching is executed + is entirely different. However, there is still the possibility of run- + away matching that goes on for a very long time, and so the match_limit + value is also used in this case (but in a different way) to limit how + long the matching can continue. + + The default value for the limit can be set when PCRE2 is built; the de- + fault is 10 million, which handles all but the most extreme cases. A + value for the match limit may also be supplied by an item at the start + of a pattern of the form + + (*LIMIT_MATCH=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or + pcre2_dfa_match() or, if no such limit is set, less than the default. + + int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); + + This parameter limits the depth of nested backtracking in + pcre2_match(). Each time a nested backtracking point is passed, a new + memory frame is used to remember the state of matching at that point. + Thus, this parameter indirectly limits the amount of memory that is + used in a match. However, because the size of each memory frame depends + on the number of capturing parentheses, the actual memory limit varies + from pattern to pattern. This limit was more useful in versions before + 10.30, where function recursion was used for backtracking. + + The depth limit is not relevant, and is ignored, when matching is done + using JIT compiled code. However, it is supported by pcre2_dfa_match(), + which uses it to limit the depth of nested internal recursive function + calls that implement atomic groups, lookaround assertions, and pattern + recursions. This limits, indirectly, the amount of system stack that is + used. It was more useful in versions before 10.32, when stack memory + was used for local workspace vectors for recursive function calls. From + version 10.32, only local variables are allocated on the stack and as + each call uses only a few hundred bytes, even a small stack can support + quite a lot of recursion. + + If the depth of internal recursive function calls is great enough, lo- + cal workspace vectors are allocated on the heap from version 10.32 on- + wards, so the depth limit also indirectly limits the amount of heap + memory that is used. A recursive pattern such as /(.(?2))((?1)|)/, when + matched to a very long string using pcre2_dfa_match(), can use a great + deal of memory. However, it is probably better to limit heap usage di- + rectly by calling pcre2_set_heap_limit(). + + The default value for the depth limit can be set when PCRE2 is built; + if it is not, the default is set to the same value as the default for + the match limit. If the limit is exceeded, pcre2_match() or + pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth + limit may also be supplied by an item at the start of a pattern of the + form + + (*LIMIT_DEPTH=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or + pcre2_dfa_match() or, if no such limit is set, less than the default. + + +CHECKING BUILD-TIME OPTIONS + + int pcre2_config(uint32_t what, void *where); + + The function pcre2_config() makes it possible for a PCRE2 client to + find the value of certain configuration parameters and to discover + which optional features have been compiled into the PCRE2 library. The + pcre2build documentation has more details about these features. + + The first argument for pcre2_config() specifies which information is + required. The second argument is a pointer to memory into which the in- + formation is placed. If NULL is passed, the function returns the amount + of memory that is needed for the requested information. For calls that + return numerical values, the value is in bytes; when requesting these + values, where should point to appropriately aligned memory. For calls + that return strings, the required length is given in code units, not + counting the terminating zero. + + When requesting information, the returned value from pcre2_config() is + non-negative on success, or the negative error code PCRE2_ERROR_BADOP- + TION if the value in the first argument is not recognized. The follow- + ing information is available: + + PCRE2_CONFIG_BSR + + The output is a uint32_t integer whose value indicates what character + sequences the \R escape sequence matches by default. A value of + PCRE2_BSR_UNICODE means that \R matches any Unicode line ending se- + quence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, + or CRLF. The default can be overridden when a pattern is compiled. + + PCRE2_CONFIG_COMPILED_WIDTHS + + The output is a uint32_t integer whose lower bits indicate which code + unit widths were selected when PCRE2 was built. The 1-bit indicates + 8-bit support, and the 2-bit and 4-bit indicate 16-bit and 32-bit sup- + port, respectively. + + PCRE2_CONFIG_DEPTHLIMIT + + The output is a uint32_t integer that gives the default limit for the + depth of nested backtracking in pcre2_match() or the depth of nested + recursions, lookarounds, and atomic groups in pcre2_dfa_match(). Fur- + ther details are given with pcre2_set_depth_limit() above. + + PCRE2_CONFIG_HEAPLIMIT + + The output is a uint32_t integer that gives, in kibibytes, the default + limit for the amount of heap memory used by pcre2_match() or + pcre2_dfa_match(). Further details are given with + pcre2_set_heap_limit() above. + + PCRE2_CONFIG_JIT + + The output is a uint32_t integer that is set to one if support for + just-in-time compiling is included in the library; otherwise it is set + to zero. Note that having the support in the library does not guarantee + that JIT will be used for any given match. See the pcre2jit documenta- + tion for more details. + + PCRE2_CONFIG_JITTARGET + + The where argument should point to a buffer that is at least 48 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with a + string that contains the name of the architecture for which the JIT + compiler is configured, for example "x86 32bit (little endian + un- + aligned)". If JIT support is not available, PCRE2_ERROR_BADOPTION is + returned, otherwise the number of code units used is returned. This is + the length of the string, plus one unit for the terminating zero. + + PCRE2_CONFIG_LINKSIZE + + The output is a uint32_t integer that contains the number of bytes used + for internal linkage in compiled regular expressions. When PCRE2 is + configured, the value can be set to 2, 3, or 4, with the default being + 2. This is the value that is returned by pcre2_config(). However, when + the 16-bit library is compiled, a value of 3 is rounded up to 4, and + when the 32-bit library is compiled, internal linkages always use 4 + bytes, so the configured value is not relevant. + + The default value of 2 for the 8-bit and 16-bit libraries is sufficient + for all but the most massive patterns, since it allows the size of the + compiled pattern to be up to 65535 code units. Larger values allow + larger regular expressions to be compiled by those two libraries, but + at the expense of slower matching. + + PCRE2_CONFIG_MATCHLIMIT + + The output is a uint32_t integer that gives the default match limit for + pcre2_match(). Further details are given with pcre2_set_match_limit() + above. + + PCRE2_CONFIG_NEWLINE + + The output is a uint32_t integer whose value specifies the default + character sequence that is recognized as meaning "newline". The values + are: + + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) + + The default should normally correspond to the standard sequence for + your operating system. + + PCRE2_CONFIG_NEVER_BACKSLASH_C + + The output is a uint32_t integer that is set to one if the use of \C + was permanently disabled when PCRE2 was built; otherwise it is set to + zero. + + PCRE2_CONFIG_PARENSLIMIT + + The output is a uint32_t integer that gives the maximum depth of nest- + ing of parentheses (of any kind) in a pattern. This limit is imposed to + cap the amount of system stack used when a pattern is compiled. It is + specified when PCRE2 is built; the default is 250. This limit does not + take into account the stack that may already be used by the calling ap- + plication. For finer control over compilation stack usage, see + pcre2_set_compile_recursion_guard(). + + PCRE2_CONFIG_STACKRECURSE + + This parameter is obsolete and should not be used in new code. The out- + put is a uint32_t integer that is always set to zero. + + PCRE2_CONFIG_TABLES_LENGTH + + The output is a uint32_t integer that gives the length of PCRE2's char- + acter processing tables in bytes. For details of these tables see the + section on locale support below. + + PCRE2_CONFIG_UNICODE_VERSION + + The where argument should point to a buffer that is at least 24 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) If PCRE2 has been compiled + without Unicode support, the buffer is filled with the text "Unicode + not supported". Otherwise, the Unicode version string (for example, + "8.0.0") is inserted. The number of code units used is returned. This + is the length of the string plus one unit for the terminating zero. + + PCRE2_CONFIG_UNICODE + + The output is a uint32_t integer that is set to one if Unicode support + is available; otherwise it is set to zero. Unicode support implies UTF + support. + + PCRE2_CONFIG_VERSION + + The where argument should point to a buffer that is at least 24 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with the + PCRE2 version string, zero-terminated. The number of code units used is + returned. This is the length of the string plus one unit for the termi- + nating zero. + + +COMPILING A PATTERN + + pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); + + void pcre2_code_free(pcre2_code *code); + + pcre2_code *pcre2_code_copy(const pcre2_code *code); + + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + + The pcre2_compile() function compiles a pattern into an internal form. + The pattern is defined by a pointer to a string of code units and a + length in code units. If the pattern is zero-terminated, the length can + be specified as PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a + length of zero is treated as an empty string (NULL with a non-zero + length causes an error return). The function returns a pointer to a + block of memory that contains the compiled pattern and related data, or + NULL if an error occurred. + + If the compile context argument ccontext is NULL, memory for the com- + piled pattern is obtained by calling malloc(). Otherwise, it is ob- + tained from the same memory function that was used for the compile con- + text. The caller must free the memory by calling pcre2_code_free() when + it is no longer needed. If pcre2_code_free() is called with a NULL ar- + gument, it returns immediately, without doing anything. + + The function pcre2_code_copy() makes a copy of the compiled code in new + memory, using the same memory allocator as was used for the original. + However, if the code has been processed by the JIT compiler (see be- + low), the JIT information cannot be copied (because it is position-de- + pendent). The new copy can initially be used only for non-JIT match- + ing, though it can be passed to pcre2_jit_compile() if required. If + pcre2_code_copy() is called with a NULL argument, it returns NULL. + + The pcre2_code_copy() function provides a way for individual threads in + a multithreaded application to acquire a private copy of shared com- + piled code. However, it does not make a copy of the character tables + used by the compiled pattern; the new pattern code points to the same + tables as the original code. (See "Locale Support" below for details + of these character tables.) In many applications the same tables are + used throughout, so this behaviour is appropriate. Nevertheless, there + are occasions when a copy of a compiled pattern and the relevant tables + are needed. The pcre2_code_copy_with_tables() provides this facility. + Copies of both the code and the tables are made, with the new code + pointing to the new tables. The memory for the new tables is automati- + cally freed when pcre2_code_free() is called for the new copy of the + compiled code. If pcre2_code_copy_with_tables() is called with a NULL + argument, it returns NULL. + + NOTE: When one of the matching functions is called, pointers to the + compiled pattern and the subject string are set in the match data block + so that they can be referenced by the substring extraction functions + after a successful match. After running a match, you must not free a + compiled pattern or a subject string until after all operations on the + match data block have taken place, unless, in the case of the subject + string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is + described in the section entitled "Option bits for pcre2_match()" be- + low. + + The options argument for pcre2_compile() contains various bit settings + that affect the compilation. It should be zero if none of them are re- + quired. The available options are described below. Some of them (in + particular, those that are compatible with Perl, but some others as + well) can also be set and unset from within the pattern (see the de- + tailed description in the pcre2pattern documentation). + + For those options that can be different in different parts of the pat- + tern, the contents of the options argument specifies their settings at + the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and + PCRE2_NO_UTF_CHECK options can be set at the time of matching as well + as at compile time. + + Some additional options and less frequently required compile-time para- + meters (for example, the newline setting) can be provided in a compile + context (as described above). + + If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme- + diately. Otherwise, the variables to which these point are set to an + error code and an offset (number of code units) within the pattern, re- + spectively, when pcre2_compile() returns NULL because a compilation er- + ror has occurred. + + There are nearly 100 positive error codes that pcre2_compile() may re- + turn if it finds an error in the pattern. There are also some negative + error codes that are used for invalid UTF strings when validity check- + ing is in force. These are the same as given by pcre2_match() and + pcre2_dfa_match(), and are described in the pcre2unicode documentation. + There is no separate documentation for the positive error codes, be- + cause the textual error messages that are obtained by calling the + pcre2_get_error_message() function (see "Obtaining a textual error mes- + sage" below) should be self-explanatory. Macro names starting with + PCRE2_ERROR_ are defined for both positive and negative error codes in + pcre2.h. When compilation is successful errorcode is set to a value + that returns the message "no error" if passed to pcre2_get_error_mes- + sage(). + + The value returned in erroroffset is an indication of where in the pat- + tern an error occurred. When there is no error, zero is returned. A + non-zero value is not necessarily the furthest point in the pattern + that was read. For example, after the error "lookbehind assertion is + not fixed length", the error offset points to the start of the failing + assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of + the first code unit of the failing character. + + Some errors are not detected until the whole pattern has been scanned; + in these cases, the offset passed back is the length of the pattern. + Note that the offset is in code units, not characters, even in a UTF + mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char- + acter. + + This code fragment shows a typical straightforward call to pcre2_com- + pile(): + + pcre2_code *re; + PCRE2_SIZE erroffset; + int errorcode; + re = pcre2_compile( + "^A.*Z", /* the pattern */ + PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ + 0, /* default options */ + &errorcode, /* for error code */ + &erroffset, /* for error offset */ + NULL); /* no compile context */ + + + Main compile options + + The following names for option bits are defined in the pcre2.h header + file: + + PCRE2_ANCHORED + + If this bit is set, the pattern is forced to be "anchored", that is, it + is constrained to match only at the first matching point in the string + that is being searched (the "subject string"). This effect can also be + achieved by appropriate constructs in the pattern itself, which is the + only way to do it in Perl. + + PCRE2_ALLOW_EMPTY_CLASS + + By default, for compatibility with Perl, a closing square bracket that + immediately follows an opening one is treated as a data character for + the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the + class, which therefore contains no characters and so can never match. + + PCRE2_ALT_BSUX + + This option request alternative handling of three escape sequences, + which makes PCRE2's behaviour more like ECMAscript (aka JavaScript). + When it is set: + + (1) \U matches an upper case "U" character; by default \U causes a com- + pile time error (Perl uses \U to upper case subsequent characters). + + (2) \u matches a lower case "u" character unless it is followed by four + hexadecimal digits, in which case the hexadecimal number defines the + code point to match. By default, \u causes a compile time error (Perl + uses it to upper case the following character). + + (3) \x matches a lower case "x" character unless it is followed by two + hexadecimal digits, in which case the hexadecimal number defines the + code point to match. By default, as in Perl, a hexadecimal number is + always expected after \x, but it may have zero, one, or two digits (so, + for example, \xz matches a binary zero character followed by z). + + ECMAscript 6 added additional functionality to \u. This can be accessed + using the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile op- + tions" below). Note that this alternative escape handling applies only + to patterns. Neither of these options affects the processing of re- + placement strings passed to pcre2_substitute(). + + PCRE2_ALT_CIRCUMFLEX + + In multiline mode (when PCRE2_MULTILINE is set), the circumflex + metacharacter matches at the start of the subject (unless PCRE2_NOTBOL + is set), and also after any internal newline. However, it does not + match after a newline at the end of the subject, for compatibility with + Perl. If you want a multiline circumflex also to match after a termi- + nating newline, you must set PCRE2_ALT_CIRCUMFLEX. + + PCRE2_ALT_VERBNAMES + + By default, for compatibility with Perl, the name in any verb sequence + such as (*MARK:NAME) is any sequence of characters that does not in- + clude a closing parenthesis. The name is not processed in any way, and + it is not possible to include a closing parenthesis in the name. How- + ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash pro- + cessing is applied to verb names and only an unescaped closing paren- + thesis terminates the name. A closing parenthesis can be included in a + name either as \) or between \Q and \E. If the PCRE2_EXTENDED or + PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped + whitespace in verb names is skipped and #-comments are recognized, ex- + actly as in the rest of the pattern. + + PCRE2_AUTO_CALLOUT + + If this bit is set, pcre2_compile() automatically inserts callout + items, all with number 255, before each pattern item, except immedi- + ately before or after an explicit callout in the pattern. For discus- + sion of the callout facility, see the pcre2callout documentation. + + PCRE2_CASELESS + + If this bit is set, letters in the pattern match both upper and lower + case letters in the subject. It is equivalent to Perl's /i option, and + it can be changed within a pattern by a (?i) option setting. If either + PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all + characters with more than one other case, and for all characters whose + code points are greater than U+007F. Note that there are two ASCII + characters, K and S, that, in addition to their lower case ASCII equiv- + alents, are case-equivalent with U+212A (Kelvin sign) and U+017F (long + S) respectively. If you do not want this case equivalence, you can sup- + press it by setting PCRE2_EXTRA_CASELESS_RESTRICT. + + For lower valued characters with only one other case, a lookup table is + used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup + table is used for all code points less than 256, and higher code points + (available only in 16-bit or 32-bit mode) are treated as not having an- + other case. + + PCRE2_DOLLAR_ENDONLY + + If this bit is set, a dollar metacharacter in the pattern matches only + at the end of the subject string. Without this option, a dollar also + matches immediately before a newline at the end of the string (but not + before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. There is no equivalent to this option in + Perl, and no way to set it within a pattern. + + PCRE2_DOTALL + + If this bit is set, a dot metacharacter in the pattern matches any + character, including one that indicates a newline. However, it only + ever matches one character, even if newlines are coded as CRLF. Without + this option, a dot does not match when the current position in the sub- + ject is at a newline. This option is equivalent to Perl's /s option, + and it can be changed within a pattern by a (?s) option setting. A neg- + ative class such as [^a] always matches newline characters, and the \N + escape sequence always matches a non-newline character, independent of + the setting of PCRE2_DOTALL. + + PCRE2_DUPNAMES + + If this bit is set, names used to identify capture groups need not be + unique. This can be helpful for certain types of pattern when it is + known that only one instance of the named group can ever be matched. + There are more details of named capture groups below; see also the + pcre2pattern documentation. + + PCRE2_ENDANCHORED + + If this bit is set, the end of any pattern match must be right at the + end of the string being searched (the "subject string"). If the pattern + match succeeds by reaching (*ACCEPT), but does not reach the end of the + subject, the match fails at the current starting point. For unanchored + patterns, a new match is then tried at the next starting point. How- + ever, if the match succeeds by reaching the end of the pattern, but not + the end of the subject, backtracking occurs and an alternative match + may be found. Consider these two patterns: + + .(*ACCEPT)|.. + .|.. + + If matched against "abc" with PCRE2_ENDANCHORED set, the first matches + "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED + can also be achieved by appropriate constructs in the pattern itself, + which is the only way to do it in Perl. + + For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only + to the first (that is, the longest) matched string. Other parallel + matches, which are necessarily substrings of the first one, must obvi- + ously end before the end of the subject. + + PCRE2_EXTENDED + + If this bit is set, most white space characters in the pattern are to- + tally ignored except when escaped, inside a character class, or inside + a \Q...\E sequence. However, white space is not allowed within se- + quences such as (?> that introduce various parenthesized groups, nor + within numerical quantifiers such as {1,3}. Ignorable white space is + permitted between an item and a following quantifier and between a + quantifier and a following + that indicates possessiveness. PCRE2_EX- + TENDED is equivalent to Perl's /x option, and it can be changed within + a pattern by a (?x) option setting. + + When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog- + nizes as white space only those characters with code points less than + 256 that are flagged as white space in its low-character table. The ta- + ble is normally created by pcre2_maketables(), which uses the isspace() + function to identify space characters. In most ASCII environments, the + relevant characters are those with code points 0x0009 (tab), 0x000A + (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage + return), and 0x0020 (space). + + When PCRE2 is compiled with Unicode support, in addition to these char- + acters, five more Unicode "Pattern White Space" characters are recog- + nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to- + right mark), U+200F (right-to-left mark), U+2028 (line separator), and + U+2029 (paragraph separator). This set of characters is the same as + recognized by Perl's /x option. Note that the horizontal and vertical + space characters that are matched by the \h and \v escapes in patterns + are a much bigger set. + + As well as ignoring most white space, PCRE2_EXTENDED also causes char- + acters between an unescaped # outside a character class and the next + newline, inclusive, to be ignored, which makes it possible to include + comments inside complicated patterns. Note that the end of this type of + comment is a literal newline sequence in the pattern; escape sequences + that happen to represent a newline do not count. + + Which characters are interpreted as newlines can be specified by a set- + ting in the compile context that is passed to pcre2_compile() or by a + special sequence at the start of the pattern, as described in the sec- + tion entitled "Newline conventions" in the pcre2pattern documentation. + A default is defined when PCRE2 is built. + + PCRE2_EXTENDED_MORE + + This option has the effect of PCRE2_EXTENDED, but, in addition, un- + escaped space and horizontal tab characters are ignored inside a char- + acter class. Note: only these two characters are ignored, not the full + set of pattern white space characters that are ignored outside a char- + acter class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx option, + and it can be changed within a pattern by a (?xx) option setting. + + PCRE2_FIRSTLINE + + If this option is set, the start of an unanchored pattern match must be + before or at the first newline in the subject string following the + start of matching, though the matched text may continue over the new- + line. If startoffset is non-zero, the limiting newline is not necessar- + ily the first newline in the subject. For example, if the subject + string is "abc\nxyz" (where \n represents a single-character newline) a + pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is + greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more + general limiting facility. If PCRE2_FIRSTLINE is set with an offset + limit, a match must occur in the first line and also within the offset + limit. In other words, whichever limit comes first is used. This option + has no effect for anchored patterns. + + PCRE2_LITERAL + + If this option is set, all meta-characters in the pattern are disabled, + and it is treated as a literal string. Matching literal strings with a + regular expression engine is not the most efficient way of doing it. If + you are doing a lot of literal matching and are worried about effi- + ciency, you should consider using other approaches. The only other main + options that are allowed with PCRE2_LITERAL are: PCRE2_ANCHORED, + PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE, + PCRE2_MATCH_INVALID_UTF, PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, + PCRE2_UTF, and PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EX- + TRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are also supported. Any other + options cause an error. + + PCRE2_MATCH_INVALID_UTF + + This option forces PCRE2_UTF (see below) and also enables support for + matching by pcre2_match() in subject strings that contain invalid UTF + sequences. Note, however, that the 16-bit and 32-bit PCRE2 libraries + process strings as sequences of uint16_t or uint32_t code points. They + cannot find valid UTF sequences within an arbitrary string of bytes un- + less such sequences are suitably aligned. This facility is not sup- + ported for DFA matching. For details, see the pcre2unicode documenta- + tion. + + PCRE2_MATCH_UNSET_BACKREF + + If this option is set, a backreference to an unset capture group + matches an empty string (by default this causes the current matching + alternative to fail). A pattern such as (\1)(a) succeeds when this op- + tion is set (assuming it can find an "a" in the subject), whereas it + fails by default, for Perl compatibility. Setting this option makes + PCRE2 behave more like ECMAscript (aka JavaScript). + + PCRE2_MULTILINE + + By default, for the purposes of matching "start of line" and "end of + line", PCRE2 treats the subject string as consisting of a single line + of characters, even if it actually contains newlines. The "start of + line" metacharacter (^) matches only at the start of the string, and + the "end of line" metacharacter ($) matches only at the end of the + string, or before a terminating newline (except when PCRE2_DOLLAR_EN- + DONLY is set). Note, however, that unless PCRE2_DOTALL is set, the "any + character" metacharacter (.) does not match at a newline. This behav- + iour (for ^, $, and dot) is the same as Perl. + + When PCRE2_MULTILINE it is set, the "start of line" and "end of line" + constructs match immediately following or immediately before internal + newlines in the subject string, respectively, as well as at the very + start and end. This is equivalent to Perl's /m option, and it can be + changed within a pattern by a (?m) option setting. Note that the "start + of line" metacharacter does not match after a newline at the end of the + subject, for compatibility with Perl. However, you can change this by + setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a + subject string, or no occurrences of ^ or $ in a pattern, setting + PCRE2_MULTILINE has no effect. + + PCRE2_NEVER_BACKSLASH_C + + This option locks out the use of \C in the pattern that is being com- + piled. This escape can cause unpredictable behaviour in UTF-8 or + UTF-16 modes, because it may leave the current matching point in the + middle of a multi-code-unit character. This option may be useful in ap- + plications that process patterns from external sources. Note that there + is also a build-time option that permanently locks out the use of \C. + + PCRE2_NEVER_UCP + + This option locks out the use of Unicode properties for handling \B, + \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as + described for the PCRE2_UCP option below. In particular, it prevents + the creator of the pattern from enabling this facility by starting the + pattern with (*UCP). This option may be useful in applications that + process patterns from external sources. The option combination PCRE_UCP + and PCRE_NEVER_UCP causes an error. + + PCRE2_NEVER_UTF + + This option locks out interpretation of the pattern as UTF-8, UTF-16, + or UTF-32, depending on which library is in use. In particular, it pre- + vents the creator of the pattern from switching to UTF interpretation + by starting the pattern with (*UTF). This option may be useful in ap- + plications that process patterns from external sources. The combination + of PCRE2_UTF and PCRE2_NEVER_UTF causes an error. + + PCRE2_NO_AUTO_CAPTURE + + If this option is set, it disables the use of numbered capturing paren- + theses in the pattern. Any opening parenthesis that is not followed by + ? behaves as if it were followed by ?: but named parentheses can still + be used for capturing (and they acquire numbers in the usual way). This + is the same as Perl's /n option. Note that, when this option is set, + references to capture groups (backreferences or recursion/subroutine + calls) may only refer to named groups, though the reference can be by + name or by number. + + PCRE2_NO_AUTO_POSSESS + + If this option is set, it disables "auto-possessification", which is an + optimization that, for example, turns a+b into a++b in order to avoid + backtracks into a+ that can never be successful. However, if callouts + are in use, auto-possessification means that some callouts are never + taken. You can set this option if you want the matching functions to do + a full unoptimized search and run all the callouts, but it is mainly + provided for testing purposes. + + PCRE2_NO_DOTSTAR_ANCHOR + + If this option is set, it disables an optimization that is applied when + .* is the first significant item in a top-level branch of a pattern, + and all the other branches also start with .* or with \A or \G or ^. + The optimization is automatically disabled for .* if it is inside an + atomic group or a capture group that is the subject of a backreference, + or if the pattern contains (*PRUNE) or (*SKIP). When the optimization + is not disabled, such a pattern is automatically anchored if + PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set + for any ^ items. Otherwise, the fact that any match must start either + at the start of the subject or following a newline is remembered. Like + other optimizations, this can cause callouts to be skipped. + + PCRE2_NO_START_OPTIMIZE + + This is an option whose main effect is at matching time. It does not + change what pcre2_compile() generates, but it does affect the output of + the JIT compiler. + + There are a number of optimizations that may occur at the start of a + match, in order to speed up the process. For example, if it is known + that an unanchored match must start with a specific code unit value, + the matching code searches the subject for that value, and fails imme- + diately if it cannot find it, without actually running the main match- + ing function. This means that a special item such as (*COMMIT) at the + start of a pattern is not considered until after a suitable starting + point for the match has been found. Also, when callouts or (*MARK) + items are in use, these "start-up" optimizations can cause them to be + skipped if the pattern is never actually used. The start-up optimiza- + tions are in effect a pre-scan of the subject that takes place before + the pattern is run. + + The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, + possibly causing performance to suffer, but ensuring that in cases + where the result is "no match", the callouts do occur, and that items + such as (*COMMIT) and (*MARK) are considered at every possible starting + position in the subject string. + + Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching + operation. Consider the pattern + + (*COMMIT)ABC + + When this is compiled, PCRE2 records the fact that a match must start + with the character "A". Suppose the subject string is "DEFABC". The + start-up optimization scans along the subject, finds "A" and runs the + first match attempt from there. The (*COMMIT) item means that the pat- + tern must match the current starting position, which in this case, it + does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE + set, the initial scan along the subject string does not happen. The + first match attempt is run starting from "D" and when this fails, + (*COMMIT) prevents any further matches being tried, so the overall re- + sult is "no match". + + As another start-up optimization makes use of a minimum length for a + matching subject, which is recorded when possible. Consider the pattern + + (*MARK:1)B(*MARK:2)(X|Y) + + The minimum length for a match is two characters. If the subject is + "XXBB", the "starting character" optimization skips "XX", then tries to + match "BB", which is long enough. In the process, (*MARK:2) is encoun- + tered and remembered. When the match attempt fails, the next "B" is + found, but there is only one character left, so there are no more at- + tempts, and "no match" is returned with the "last mark seen" set to + "2". If NO_START_OPTIMIZE is set, however, matches are tried at every + possible starting position, including at the end of the subject, where + (*MARK:1) is encountered, but there is no "B", so the "last mark seen" + that is returned is "1". In this case, the optimizations do not affect + the overall match result, which is still "no match", but they do affect + the auxiliary information that is returned. + + PCRE2_NO_UTF_CHECK + + When PCRE2_UTF is set, the validity of the pattern as a UTF string is + automatically checked. There are discussions about the validity of + UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode + document. If an invalid UTF sequence is found, pcre2_compile() returns + a negative error code. + + If you know that your pattern is a valid UTF string, and you want to + skip this check for performance reasons, you can set the + PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an in- + valid UTF string as a pattern is undefined. It may cause your program + to crash or loop. + + Note that this option can also be passed to pcre2_match() and + pcre2_dfa_match(), to suppress UTF validity checking of the subject + string. + + Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis- + able the error that is given if an escape sequence for an invalid Uni- + code code point is encountered in the pattern. In particular, the so- + called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you + want to allow escape sequences such as \x{d800} you can set the + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the + section entitled "Extra compile options" below. However, this is pos- + sible only in UTF-8 and UTF-32 modes, because these values are not rep- + resentable in UTF-16. + + PCRE2_UCP + + This option has two effects. Firstly, it change the way PCRE2 processes + \B, \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character + classes. By default, only ASCII characters are recognized, but if + PCRE2_UCP is set, Unicode properties are used to classify characters. + There are some PCRE2_EXTRA options (see below) that add finer control + to this behaviour. More details are given in the section on generic + character types in the pcre2pattern page. + + The second effect of PCRE2_UCP is to force the use of Unicode proper- + ties for upper/lower casing operations, even when PCRE2_UTF is not set. + This makes it possible to process strings in the 16-bit UCS-2 code. + This option is available only if PCRE2 has been compiled with Unicode + support (which is the default). The PCRE2_EXTRA_CASELESS_RESTRICT op- + tion (see below) restricts caseless matching such that ASCII characters + match only ASCII characters and non-ASCII characters match only non- + ASCII characters. + + PCRE2_UNGREEDY + + This option inverts the "greediness" of the quantifiers so that they + are not greedy by default, but become greedy if followed by "?". It is + not compatible with Perl. It can also be set by a (?U) option setting + within the pattern. + + PCRE2_USE_OFFSET_LIMIT + + This option must be set for pcre2_compile() if pcre2_set_offset_limit() + is going to be used to set a non-default offset limit in a match con- + text for matches that use this pattern. An error is generated if an + offset limit is set without this option. For more details, see the de- + scription of pcre2_set_offset_limit() in the section that describes + match contexts. See also the PCRE2_FIRSTLINE option above. + + PCRE2_UTF + + This option causes PCRE2 to regard both the pattern and the subject + strings that are subsequently processed as strings of UTF characters + instead of single-code-unit strings. It is available when PCRE2 is + built to include Unicode support (which is the default). If Unicode + support is not available, the use of this option provokes an error. De- + tails of how PCRE2_UTF changes the behaviour of PCRE2 are given in the + pcre2unicode page. In particular, note that it changes the way + PCRE2_CASELESS works. + + Extra compile options + + The option bits that can be set in a compile context by calling the + pcre2_set_compile_extra_options() function are as follows: + + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + + Since release 10.38 PCRE2 has forbidden the use of \K within lookaround + assertions, following Perl's lead. This option is provided to re-enable + the previous behaviour (act in positive lookarounds, ignore in negative + ones) in case anybody is relying on it. + + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + + This option applies when compiling a pattern in UTF-8 or UTF-32 mode. + It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode + "surrogate" code points in the range 0xd800 to 0xdfff are used in pairs + in UTF-16 to encode code points with values in the range 0x10000 to + 0x10ffff. The surrogates cannot therefore be represented in UTF-16. + They can be represented in UTF-8 and UTF-32, but are defined as invalid + code points, and cause errors if encountered in a UTF-8 or UTF-32 + string that is being checked for validity by PCRE2. + + These values also cause errors if encountered in escape sequences such + as \x{d912} within a pattern. However, it seems that some applications, + when using PCRE2 to check for unwanted characters in UTF-8 strings, ex- + plicitly test for the surrogates using escape sequences. The + PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be- + cause it applies only to the testing of input strings for UTF validity. + + If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro- + gate code point values in UTF-8 and UTF-32 patterns no longer provoke + errors and are incorporated in the compiled pattern. However, they can + only match subject characters if the matching function is called with + PCRE2_NO_UTF_CHECK set. + + PCRE2_EXTRA_ALT_BSUX + + The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and + \x in the way that ECMAscript (aka JavaScript) does. Additional func- + tionality was defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has + the effect of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} + as a hexadecimal character code, where hhh.. is any number of hexadeci- + mal digits. + + PCRE2_EXTRA_ASCII_BSD + + This option forces \d to match only ASCII digits, even when PCRE2_UCP + is set. It can be changed within a pattern by means of the (?aD) op- + tion setting. + + PCRE2_EXTRA_ASCII_BSS + + This option forces \s to match only ASCII space characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the + (?aS) option setting. + + PCRE2_EXTRA_ASCII_BSW + + This option forces \w to match only ASCII word characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the + (?aW) option setting. + + PCRE2_EXTRA_ASCII_DIGIT + + This option forces the POSIX character classes [:digit:] and [:xdigit:] + to match only ASCII digits, even when PCRE2_UCP is set. It can be + changed within a pattern by means of the (?aT) option setting. + + PCRE2_EXTRA_ASCII_POSIX + + This option forces all the POSIX character classes, including [:digit:] + and [:xdigit:], to match only ASCII characters, even when PCRE2_UCP is + set. It can be changed within a pattern by means of the (?aP) option + setting, but note that this also sets PCRE2_EXTRA_ASCII_DIGIT in order + to ensure that (?-aP) unsets all ASCII restrictions for POSIX classes. + + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + + This is a dangerous option. Use with care. By default, an unrecognized + escape such as \j or a malformed one such as \x{2z} causes a compile- + time error when detected by pcre2_compile(). Perl is somewhat inconsis- + tent in handling such items: for example, \j is treated as a literal + "j", and non-hexadecimal digits in \x{} are just ignored, though warn- + ings are given in both cases if Perl's warning switch is enabled. How- + ever, a malformed octal number after \o{ always causes an error in + Perl. + + If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to + pcre2_compile(), all unrecognized or malformed escape sequences are + treated as single-character escapes. For example, \j is a literal "j" + and \x{2z} is treated as the literal string "x{2z}". Setting this op- + tion means that typos in patterns may go undetected and have unexpected + results. Also note that a sequence such as [\N{] is interpreted as a + malformed attempt at [\N{...}] and so is treated as [N{] whereas [\N] + gives an error because an unqualified \N is a valid escape sequence but + is not supported in a character class. To reiterate: this is a danger- + ous option. Use with great care. + + PCRE2_EXTRA_CASELESS_RESTRICT + + When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows + Unicode rules, which allow for more than two cases per character. There + are two case-equivalent character sets that contain both ASCII and non- + ASCII characters. The ASCII letter S is case-equivalent to U+017f (long + S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). + This option disables recognition of case-equivalences that cross the + ASCII/non-ASCII boundary. In a caseless match, both characters must ei- + ther be ASCII or non-ASCII. The option can be changed with a pattern by + the (?r) option setting. + + PCRE2_EXTRA_ESCAPED_CR_IS_LF + + There are some legacy applications where the escape sequence \r in a + pattern is expected to match a newline. If this option is set, \r in a + pattern is converted to \n so that it matches a LF (linefeed) instead + of a CR (carriage return) character. The option does not affect a lit- + eral CR in the pattern, nor does it affect CR specified as an explicit + code point such as \x{0D}. + + PCRE2_EXTRA_MATCH_LINE + + This option is provided for use by the -x option of pcre2grep. It + causes the pattern only to match complete lines. This is achieved by + automatically inserting the code for "^(?:" at the start of the com- + piled pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, + the matched line may be in the middle of the subject string. This op- + tion can be used with PCRE2_LITERAL. + + PCRE2_EXTRA_MATCH_WORD + + This option is provided for use by the -w option of pcre2grep. It + causes the pattern only to match strings that have a word boundary at + the start and the end. This is achieved by automatically inserting the + code for "\b(?:" at the start of the compiled pattern and ")\b" at the + end. The option may be used with PCRE2_LITERAL. However, it is ignored + if PCRE2_EXTRA_MATCH_LINE is also set. + + +JUST-IN-TIME (JIT) COMPILATION + + int pcre2_jit_compile(pcre2_code *code, uint32_t options); + + int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); + + void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); + + void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); + + These functions provide support for JIT compilation, which, if the + just-in-time compiler is available, further processes a compiled pat- + tern into machine code that executes much faster than the pcre2_match() + interpretive matching function. Full details are given in the pcre2jit + documentation. + + JIT compilation is a heavyweight optimization. It can take some time + for patterns to be analyzed, and for one-off matches and simple pat- + terns the benefit of faster execution might be offset by a much slower + compilation time. Most (but not all) patterns can be optimized by the + JIT compiler. + + +LOCALE SUPPORT + + const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + + PCRE2 handles caseless matching, and determines whether characters are + letters, digits, or whatever, by reference to a set of tables, indexed + by character code point. However, this applies only to characters whose + code points are less than 256. By default, higher-valued code points + never match escapes such as \w or \d. + + When PCRE2 is built with Unicode support (the default), certain Unicode + character properties can be tested with \p and \P, or, alternatively, + the PCRE2_UCP option can be set when a pattern is compiled; this causes + \w and friends to use Unicode property support instead of the built-in + tables. PCRE2_UCP also causes upper/lower casing operations on charac- + ters with code points greater than 127 to use Unicode properties. These + effects apply even when PCRE2_UTF is not set. There are, however, some + PCRE2_EXTRA options (see above) that can be used to modify or suppress + them. + + The use of locales with Unicode is discouraged. If you are handling + characters with code points greater than 127, you should either use + Unicode support, or use locales, but not try to mix the two. + + PCRE2 contains a built-in set of character tables that are used by de- + fault. These are sufficient for many applications. Normally, the in- + ternal tables recognize only ASCII characters. However, when PCRE2 is + built, it is possible to cause the internal tables to be rebuilt in the + default "C" locale of the local system, which may cause them to be dif- + ferent. + + The built-in tables can be overridden by tables supplied by the appli- + cation that calls PCRE2. These may be created in a different locale + from the default. As more and more applications change to using Uni- + code, the need for this locale support is expected to die away. + + External tables are built by calling the pcre2_maketables() function, + in the relevant locale. The only argument to this function is a general + context, which can be used to pass a custom memory allocator. If the + argument is NULL, the system malloc() is used. The result can be passed + to pcre2_compile() as often as necessary, by creating a compile context + and calling pcre2_set_character_tables() to set the tables pointer + therein. + + For example, to build and use tables that are appropriate for the + French locale (where accented characters with values greater than 127 + are treated as letters), the following code could be used: + + setlocale(LC_CTYPE, "fr_FR"); + tables = pcre2_maketables(NULL); + ccontext = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(ccontext, tables); + re = pcre2_compile(..., ccontext); + + The locale name "fr_FR" is used on Linux and other Unix-like systems; + if you are using Windows, the name for the French locale is "french". + + The pointer that is passed (via the compile context) to pcre2_compile() + is saved with the compiled pattern, and the same tables are used by the + matching functions. Thus, for any single pattern, compilation and + matching both happen in the same locale, but different patterns can be + processed in different locales. + + It is the caller's responsibility to ensure that the memory containing + the tables remains available while they are still in use. When they are + no longer needed, you can discard them using pcre2_maketables_free(), + which should pass as its first parameter the same global context that + was used to create the tables. + + Saving locale tables + + The tables described above are just a sequence of binary bytes, which + makes them independent of hardware characteristics such as endianness + or whether the processor is 32-bit or 64-bit. A copy of the result of + pcre2_maketables() can therefore be saved in a file or elsewhere and + re-used later, even in a different program or on another computer. The + size of the tables (number of bytes) must be obtained by calling + pcre2_config() with the PCRE2_CONFIG_TABLES_LENGTH option because + pcre2_maketables() does not return this value. Note that the + pcre2_dftables program, which is part of the PCRE2 build system, can be + used stand-alone to create a file that contains a set of binary tables. + See the pcre2build documentation for details. + + +INFORMATION ABOUT A COMPILED PATTERN + + int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where); + + The pcre2_pattern_info() function returns general information about a + compiled pattern. For information about callouts, see the next section. + The first argument for pcre2_pattern_info() is a pointer to the com- + piled pattern. The second argument specifies which piece of information + is required, and the third argument is a pointer to a variable to re- + ceive the data. If the third argument is NULL, the first argument is + ignored, and the function returns the size in bytes of the variable + that is required for the information requested. Otherwise, the yield of + the function is zero for success, or one of the following negative num- + bers: + + PCRE2_ERROR_NULL the argument code was NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of what was invalid + PCRE2_ERROR_UNSET the requested field is not set + + The "magic number" is placed at the start of each compiled pattern as a + simple check against passing an arbitrary memory pointer. Here is a + typical call of pcre2_pattern_info(), to obtain the length of the com- + piled pattern: + + int rc; + size_t length; + rc = pcre2_pattern_info( + re, /* result of pcre2_compile() */ + PCRE2_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ + + The possible values for the second argument are defined in pcre2.h, and + are as follows: + + PCRE2_INFO_ALLOPTIONS + PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_EXTRAOPTIONS + + Return copies of the pattern's options. The third argument should point + to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the op- + tions that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP- + TIONS returns the compile options as modified by any top-level (*XXX) + option settings such as (*UTF) at the start of the pattern itself. + PCRE2_INFO_EXTRAOPTIONS returns the extra options that were set in the + compile context by calling the pcre2_set_compile_extra_options() func- + tion. + + For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EX- + TENDED option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED + and PCRE2_UTF. Option settings such as (?i) that can change within a + pattern do not affect the result of PCRE2_INFO_ALLOPTIONS, even if they + appear right at the start of the pattern. (This was different in some + earlier releases.) + + A pattern compiled without PCRE2_ANCHORED is automatically anchored by + PCRE2 if the first significant item in every top-level branch is one of + the following: + + ^ unless PCRE2_MULTILINE is set + \A always + \G always + .* sometimes - see below + + When .* is the first significant item, anchoring is possible only when + all the following are true: + + .* is not in an atomic group + .* is not in a capture group that is the subject + of a backreference + PCRE2_DOTALL is in force for .* + Neither (*PRUNE) nor (*SKIP) appears in the pattern + PCRE2_NO_DOTSTAR_ANCHOR is not set + + For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in + the options returned for PCRE2_INFO_ALLOPTIONS. + + PCRE2_INFO_BACKREFMAX + + Return the number of the highest backreference in the pattern. The + third argument should point to a uint32_t variable. Named capture + groups acquire numbers as well as names, and these count towards the + highest backreference. Backreferences such as \4 or \g{12} match the + captured characters of the given group, but in addition, the check that + a capture group is set in a conditional group such as (?(3)a|b) is also + a backreference. Zero is returned if there are no backreferences. + + PCRE2_INFO_BSR + + The output is a uint32_t integer whose value indicates what character + sequences the \R escape sequence matches. A value of PCRE2_BSR_UNICODE + means that \R matches any Unicode line ending sequence; a value of + PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. + + PCRE2_INFO_CAPTURECOUNT + + Return the highest capture group number in the pattern. In patterns + where (?| is not used, this is also the total number of capture groups. + The third argument should point to a uint32_t variable. + + PCRE2_INFO_DEPTHLIMIT + + If the pattern set a backtracking depth limit by including an item of + the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The + third argument should point to a uint32_t integer. If no such value has + been set, the call to pcre2_pattern_info() returns the error PCRE2_ER- + ROR_UNSET. Note that this limit will only be used during matching if it + is less than the limit set or defaulted by the caller of the match + function. + + PCRE2_INFO_FIRSTBITMAP + + In the absence of a single first code unit for a non-anchored pattern, + pcre2_compile() may construct a 256-bit table that defines a fixed set + of values for the first code unit in any match. For example, a pattern + that starts with [abc] results in a table with three bits set. When + code unit values greater than 255 are supported, the flag bit for 255 + means "any code unit of value 255 or above". If such a table was con- + structed, a pointer to it is returned. Otherwise NULL is returned. The + third argument should point to a const uint8_t * variable. + + PCRE2_INFO_FIRSTCODETYPE + + Return information about the first code unit of any matched string, for + a non-anchored pattern. The third argument should point to a uint32_t + variable. If there is a fixed first value, for example, the letter "c" + from a pattern such as (cat|cow|coyote), 1 is returned, and the value + can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed + first value, but it is known that a match can occur only at the start + of the subject or following a newline in the subject, 2 is returned. + Otherwise, and for anchored patterns, 0 is returned. + + PCRE2_INFO_FIRSTCODEUNIT + + Return the value of the first code unit of any matched string for a + pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. + The third argument should point to a uint32_t variable. In the 8-bit + library, the value is always less than 256. In the 16-bit library the + value can be up to 0xffff. In the 32-bit library in UTF-32 mode the + value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 + mode. + + PCRE2_INFO_FRAMESIZE + + Return the size (in bytes) of the data frames that are used to remember + backtracking positions when the pattern is processed by pcre2_match() + without the use of JIT. The third argument should point to a size_t + variable. The frame size depends on the number of capturing parentheses + in the pattern. Each additional capture group adds two PCRE2_SIZE vari- + ables. + + PCRE2_INFO_HASBACKSLASHC + + Return 1 if the pattern contains any instances of \C, otherwise 0. The + third argument should point to a uint32_t variable. + + PCRE2_INFO_HASCRORLF + + Return 1 if the pattern contains any explicit matches for CR or LF + characters, otherwise 0. The third argument should point to a uint32_t + variable. An explicit match is either a literal CR or LF character, or + \r or \n or one of the equivalent hexadecimal or octal escape se- + quences. + + PCRE2_INFO_HEAPLIMIT + + If the pattern set a heap memory limit by including an item of the form + (*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu- + ment should point to a uint32_t integer. If no such value has been set, + the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. + Note that this limit will only be used during matching if it is less + than the limit set or defaulted by the caller of the match function. + + PCRE2_INFO_JCHANGED + + Return 1 if the (?J) or (?-J) option setting is used in the pattern, + otherwise 0. The third argument should point to a uint32_t variable. + (?J) and (?-J) set and unset the local PCRE2_DUPNAMES option, respec- + tively. + + PCRE2_INFO_JITSIZE + + If the compiled pattern was successfully processed by pcre2_jit_com- + pile(), return the size of the JIT compiled code, otherwise return + zero. The third argument should point to a size_t variable. + + PCRE2_INFO_LASTCODETYPE + + Returns 1 if there is a rightmost literal code unit that must exist in + any matched string, other than at its start. The third argument should + point to a uint32_t variable. If there is no such value, 0 is returned. + When 1 is returned, the code unit value itself can be retrieved using + PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is + recorded only if it follows something of variable length. For example, + for the pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned + from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is + 0. + + PCRE2_INFO_LASTCODEUNIT + + Return the value of the rightmost literal code unit that must exist in + any matched string, other than at its start, for a pattern where + PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argu- + ment should point to a uint32_t variable. + + PCRE2_INFO_MATCHEMPTY + + Return 1 if the pattern might match an empty string, otherwise 0. The + third argument should point to a uint32_t variable. When a pattern con- + tains recursive subroutine calls it is not always possible to determine + whether or not it can match an empty string. PCRE2 takes a cautious ap- + proach and returns 1 in such cases. + + PCRE2_INFO_MATCHLIMIT + + If the pattern set a match limit by including an item of the form + (*LIMIT_MATCH=nnnn) at the start, the value is returned. The third ar- + gument should point to a uint32_t integer. If no such value has been + set, the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UN- + SET. Note that this limit will only be used during matching if it is + less than the limit set or defaulted by the caller of the match func- + tion. + + PCRE2_INFO_MAXLOOKBEHIND + + A lookbehind assertion moves back a certain number of characters (not + code units) when it starts to process each of its branches. This re- + quest returns the largest of these backward moves. The third argument + should point to a uint32_t integer. The simple assertions \b and \B re- + quire a one-character lookbehind and cause PCRE2_INFO_MAXLOOKBEHIND to + return 1 in the absence of anything longer. \A also registers a one- + character lookbehind, though it does not actually inspect the previous + character. + + Note that this information is useful for multi-segment matching only if + the pattern contains no nested lookbehinds. For example, the pattern + (?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is + processed, the first lookbehind moves back by two characters, matches + one character, then the nested lookbehind also moves back by two char- + acters. This puts the matching point three characters earlier than it + was at the start. PCRE2_INFO_MAXLOOKBEHIND is really only useful as a + debugging tool. See the pcre2partial documentation for a discussion of + multi-segment matching. + + PCRE2_INFO_MINLENGTH + + If a minimum length for matching subject strings was computed, its + value is returned. Otherwise the returned value is 0. This value is not + computed when PCRE2_NO_START_OPTIMIZE is set. The value is a number of + characters, which in UTF mode may be different from the number of code + units. The third argument should point to a uint32_t variable. The + value is a lower bound to the length of any matching string. There may + not be any strings of that length that do actually match, but every + string that does match is at least that long. + + PCRE2_INFO_NAMECOUNT + PCRE2_INFO_NAMEENTRYSIZE + PCRE2_INFO_NAMETABLE + + PCRE2 supports the use of named as well as numbered capturing parenthe- + ses. The names are just an additional way of identifying the parenthe- + ses, which still acquire numbers. Several convenience functions such as + pcre2_substring_get_byname() are provided for extracting captured sub- + strings by name. It is also possible to extract the data directly, by + first converting the name to a number in order to access the correct + pointers in the output vector (described with pcre2_match() below). To + do the conversion, you need to use the name-to-number map, which is de- + scribed by these three values. + + The map consists of a number of fixed-size entries. PCRE2_INFO_NAME- + COUNT gives the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives + the size of each entry in code units; both of these return a uint32_t + value. The entry size depends on the length of the longest name. + + PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. + This is a PCRE2_SPTR pointer to a block of code units. In the 8-bit li- + brary, the first two bytes of each entry are the number of the captur- + ing parenthesis, most significant byte first. In the 16-bit library, + the pointer points to 16-bit code units, the first of which contains + the parenthesis number. In the 32-bit library, the pointer points to + 32-bit code units, the first of which contains the parenthesis number. + The rest of the entry is the corresponding name, zero terminated. + + The names are in alphabetical order. If (?| is used to create multiple + capture groups with the same number, as described in the section on du- + plicate group numbers in the pcre2pattern page, the groups may be given + the same name, but there is only one entry in the table. Different + names for groups of the same number are not permitted. + + Duplicate names for capture groups with different numbers are permit- + ted, but only if PCRE2_DUPNAMES is set. They appear in the table in the + order in which they were found in the pattern. In the absence of (?| + this is the order of increasing number; when (?| is used this is not + necessarily the case because later capture groups may have lower num- + bers. + + As a simple example of the name/number table, consider the following + pattern after compilation by the 8-bit library (assume PCRE2_EXTENDED + is set, so white space - including newlines - is ignored): + + (? (?(\d\d)?\d\d) - + (?\d\d) - (?\d\d) ) + + There are four named capture groups, so the table has four entries, and + each entry in the table is eight bytes long. The table is as follows, + with non-printing bytes shows in hexadecimal, and undefined bytes shown + as ??: + + 00 01 d a t e 00 ?? + 00 05 d a y 00 ?? ?? + 00 04 m o n t h 00 + 00 02 y e a r 00 ?? + + When writing code to extract data from named capture groups using the + name-to-number map, remember that the length of the entries is likely + to be different for each compiled pattern. + + PCRE2_INFO_NEWLINE + + The output is one of the following uint32_t values: + + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) + + This identifies the character sequence that will be recognized as mean- + ing "newline" while matching. + + PCRE2_INFO_SIZE + + Return the size of the compiled pattern in bytes (for all three li- + braries). The third argument should point to a size_t variable. This + value includes the size of the general data block that precedes the + code units of the compiled pattern itself. The value that is used when + pcre2_compile() is getting memory in which to place the compiled pat- + tern may be slightly larger than the value returned by this option, be- + cause there are cases where the code that calculates the size has to + over-estimate. Processing a pattern with the JIT compiler does not al- + ter the value returned by this option. + + +INFORMATION ABOUT A PATTERN'S CALLOUTS + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + A script language that supports the use of string arguments in callouts + might like to scan all the callouts in a pattern before running the + match. This can be done by calling pcre2_callout_enumerate(). The first + argument is a pointer to a compiled pattern, the second points to a + callback function, and the third is arbitrary user data. The callback + function is called for every callout in the pattern in the order in + which they appear. Its first argument is a pointer to a callout enumer- + ation block, and its second argument is the user_data value that was + passed to pcre2_callout_enumerate(). The contents of the callout enu- + meration block are described in the pcre2callout documentation, which + also gives further details about callouts. + + +SERIALIZATION AND PRECOMPILING + + It is possible to save compiled patterns on disc or elsewhere, and re- + load them later, subject to a number of restrictions. The host on which + the patterns are reloaded must be running the same version of PCRE2, + with the same code unit width, and must also have the same endianness, + pointer width, and PCRE2_SIZE type. Before compiled patterns can be + saved, they must be converted to a "serialized" form, which in the case + of PCRE2 is really just a bytecode dump. The functions whose names be- + gin with pcre2_serialize_ are used for converting to and from the seri- + alized form. They are described in the pcre2serialize documentation. + Note that PCRE2 serialization does not convert compiled patterns to an + abstract format like Java or .NET serialization. + + +THE MATCH DATA BLOCK + + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); + + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); + + void pcre2_match_data_free(pcre2_match_data *match_data); + + Information about a successful or unsuccessful match is placed in a + match data block, which is an opaque structure that is accessed by + function calls. In particular, the match data block contains a vector + of offsets into the subject string that define the matched parts of the + subject. This is known as the ovector. + + Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match() + you must create a match data block by calling one of the creation func- + tions above. For pcre2_match_data_create(), the first argument is the + number of pairs of offsets in the ovector. + + When using pcre2_match(), one pair of offsets is required to identify + the string that matched the whole pattern, with an additional pair for + each captured substring. For example, a value of 4 creates enough space + to record the matched portion of the subject plus three captured sub- + strings. + + When using pcre2_dfa_match() there may be multiple matched substrings + of different lengths at the same point in the subject. The ovector + should be made large enough to hold as many as are expected. + + A minimum of at least 1 pair is imposed by pcre2_match_data_create(), + so it is always possible to return the overall matched string in the + case of pcre2_match() or the longest match in the case of + pcre2_dfa_match(). The maximum number of pairs is 65535; if the first + argument of pcre2_match_data_create() is greater than this, 65535 is + used. + + The second argument of pcre2_match_data_create() is a pointer to a gen- + eral context, which can specify custom memory management for obtaining + the memory for the match data block. If you are not using custom memory + management, pass NULL, which causes malloc() to be used. + + For pcre2_match_data_create_from_pattern(), the first argument is a + pointer to a compiled pattern. The ovector is created to be exactly the + right size to hold all the substrings a pattern might capture when + matched using pcre2_match(). You should not use this call when matching + with pcre2_dfa_match(). The second argument is again a pointer to a + general context, but in this case if NULL is passed, the memory is ob- + tained using the same allocator that was used for the compiled pattern + (custom or default). + + A match data block can be used many times, with the same or different + compiled patterns. You can extract information from a match data block + after a match operation has finished, using functions that are de- + scribed in the sections on matched strings and other match data below. + + When a call of pcre2_match() fails, valid data is available in the + match block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ER- + ROR_PARTIAL, or one of the error codes for an invalid UTF string. Ex- + actly what is available depends on the error, and is detailed below. + + When one of the matching functions is called, pointers to the compiled + pattern and the subject string are set in the match data block so that + they can be referenced by the extraction functions after a successful + match. After running a match, you must not free a compiled pattern or a + subject string until after all operations on the match data block (for + that match) have taken place, unless, in the case of the subject + string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is + described in the section entitled "Option bits for pcre2_match()" be- + low. + + When a match data block itself is no longer needed, it should be freed + by calling pcre2_match_data_free(). If this function is called with a + NULL argument, it returns immediately, without doing anything. + + +MEMORY USE FOR MATCH DATA BLOCKS + + PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); + + The size of a match data block depends on the size of the ovector that + it contains. The function pcre2_get_match_data_size() returns the size, + in bytes, of the block that is its argument. + + When pcre2_match() runs interpretively (that is, without using JIT), it + makes use of a vector of data frames for remembering backtracking posi- + tions. The size of each individual frame depends on the number of cap- + turing parentheses in the pattern and can be obtained by calling + pcre2_pattern_info() with the PCRE2_INFO_FRAMESIZE option (see the sec- + tion entitled "Information about a compiled pattern" above). + + Heap memory is used for the frames vector; if the initial memory block + turns out to be too small during matching, it is automatically ex- + panded. When pcre2_match() returns, the memory is not freed, but re- + mains attached to the match data block, for use by any subsequent + matches that use the same block. It is automatically freed when the + match data block itself is freed. + + You can find the current size of the frames vector that a match data + block owns by calling pcre2_get_match_data_heapframes_size(). For a + newly created match data block the size will be zero. Some types of + match may require a lot of frames and thus a large vector; applications + that run in environments where memory is constrained can check this and + free the match data block if the heap frames vector has become too big. + + +MATCHING A PATTERN: THE TRADITIONAL FUNCTION + + int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + The function pcre2_match() is called to match a subject string against + a compiled pattern, which is passed in the code argument. You can call + pcre2_match() with the same code argument as many times as you like, in + order to find multiple matches in the subject string or to match dif- + ferent subject strings with the same pattern. + + This function is the main matching facility of the library, and it op- + erates in a Perl-like manner. For specialist use there is also an al- + ternative matching function, which is described below in the section + about the pcre2_dfa_match() function. + + Here is an example of a simple call to pcre2_match(): + + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL); /* a match context; NULL means use defaults */ + + If the subject string is zero-terminated, the length can be given as + PCRE2_ZERO_TERMINATED. A match context must be provided if certain less + common matching parameters are to be changed. For details, see the sec- + tion on the match context above. + + The string to be matched by pcre2_match() + + The subject string is passed to pcre2_match() as a pointer in subject, + a length in length, and a starting offset in startoffset. The length + and offset are in code units, not characters. That is, they are in + bytes for the 8-bit library, 16-bit code units for the 16-bit library, + and 32-bit code units for the 32-bit library, whether or not UTF pro- + cessing is enabled. As a special case, if subject is NULL and length is + zero, the subject is assumed to be an empty string. If length is non- + zero, an error occurs if subject is NULL. + + If startoffset is greater than the length of the subject, pcre2_match() + returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the + search for a match starts at the beginning of the subject, and this is + by far the most common case. In UTF-8 or UTF-16 mode, the starting off- + set must point to the start of a character, or to the end of the sub- + ject (in UTF-32 mode, one code unit equals one character, so all off- + sets are valid). Like the pattern string, the subject may contain bi- + nary zeros. + + A non-zero starting offset is useful when searching for another match + in the same subject by calling pcre2_match() again after a previous + success. Setting startoffset differs from passing over a shortened + string and setting PCRE2_NOTBOL in the case of a pattern that begins + with any kind of lookbehind. For example, consider the pattern + + \Biss\B + + which finds occurrences of "iss" in the middle of words. (\B matches + only if the current position in the subject is not a word boundary.) + When applied to the string "Mississippi" the first call to + pcre2_match() finds the first occurrence. If pcre2_match() is called + again with just the remainder of the subject, namely "issippi", it does + not match, because \B is always false at the start of the subject, + which is deemed to be a word boundary. However, if pcre2_match() is + passed the entire string again, but with startoffset set to 4, it finds + the second occurrence of "iss" because it is able to look behind the + starting point to discover that it is preceded by a letter. + + Finding all the matches in a subject is tricky when the pattern can + match an empty string. It is possible to emulate Perl's /g behaviour by + first trying the match again at the same offset, with the + PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED options, and then if that + fails, advancing the starting offset and trying an ordinary match + again. There is some code that demonstrates how to do this in the + pcre2demo sample program. In the most general case, you have to check + to see if the newline convention recognizes CRLF as a newline, and if + so, and the current character is CR followed by LF, advance the start- + ing offset by two characters instead of one. + + If a non-zero starting offset is passed when the pattern is anchored, a + single attempt to match at the given offset is made. This can only suc- + ceed if the pattern does not require the match to be at the start of + the subject. In other words, the anchoring must be the result of set- + ting the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not + by starting the pattern with ^ or \A. + + Option bits for pcre2_match() + + The unused bits of the options argument for pcre2_match() must be zero. + The only bits that may be set are PCRE2_ANCHORED, + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_EN- + DANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, + PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PAR- + TIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. + + Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not sup- + ported by the just-in-time (JIT) compiler. If it is set, JIT matching + is disabled and the interpretive code in pcre2_match() is run. + PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from + PCRE2_NO_JIT (obviously), the remaining options are supported for JIT + matching. + + PCRE2_ANCHORED + + The PCRE2_ANCHORED option limits pcre2_match() to matching at the first + matching position. If a pattern was compiled with PCRE2_ANCHORED, or + turned out to be anchored by virtue of its contents, it cannot be made + unachored at matching time. Note that setting the option at match time + disables JIT matching. + + PCRE2_COPY_MATCHED_SUBJECT + + By default, a pointer to the subject is remembered in the match data + block so that, after a successful match, it can be referenced by the + substring extraction functions. This means that the subject's memory + must not be freed until all such operations are complete. For some ap- + plications where the lifetime of the subject string is not guaranteed, + it may be necessary to make a copy of the subject string, but it is + wasteful to do this unless the match is successful. After a successful + match, if PCRE2_COPY_MATCHED_SUBJECT is set, the subject is copied and + the new pointer is remembered in the match data block instead of the + original subject pointer. The memory allocator that was used for the + match block itself is used. The copy is automatically freed when + pcre2_match_data_free() is called to free the match data block. It is + also automatically freed if the match data block is re-used for another + match operation. + + PCRE2_DISABLE_RECURSELOOP_CHECK + + This option is relevant only to pcre2_match() for interpretive match- + ing. It is ignored when JIT is used, and is forbidden for + pcre2_dfa_match(). + + The use of recursion in patterns can lead to infinite loops. In the in- + terpretive matcher these would be eventually caught by the match or + heap limits, but this could take a long time and/or use a lot of memory + if the limits are large. There is therefore a check at the start of + each recursion. If the same group is still active from a previous + call, and the current subject pointer is the same as it was at the + start of that group, and the furthest inspected character of the sub- + ject has not changed, an error is generated. + + There are rare cases of matches that would complete, but nevertheless + trigger this error. This option disables the check. It is provided + mainly for testing when comparing JIT and interpretive behaviour. + + PCRE2_ENDANCHORED + + If the PCRE2_ENDANCHORED option is set, any string that pcre2_match() + matches must be right at the end of the subject string. Note that set- + ting the option at match time disables JIT matching. + + PCRE2_NOTBOL + + This option specifies that first character of the subject string is not + the beginning of a line, so the circumflex metacharacter should not + match before it. Setting this without having set PCRE2_MULTILINE at + compile time causes circumflex never to match. This option affects only + the behaviour of the circumflex metacharacter. It does not affect \A. + + PCRE2_NOTEOL + + This option specifies that the end of the subject string is not the end + of a line, so the dollar metacharacter should not match it nor (except + in multiline mode) a newline immediately before it. Setting this with- + out having set PCRE2_MULTILINE at compile time causes dollar never to + match. This option affects only the behaviour of the dollar metacharac- + ter. It does not affect \Z or \z. + + PCRE2_NOTEMPTY + + An empty string is not considered to be a valid match if this option is + set. If there are alternatives in the pattern, they are tried. If all + the alternatives match the empty string, the entire match fails. For + example, if the pattern + + a?b? + + is applied to a string not beginning with "a" or "b", it matches an + empty string at the start of the subject. With PCRE2_NOTEMPTY set, this + match is not valid, so pcre2_match() searches further into the string + for occurrences of "a" or "b". + + PCRE2_NOTEMPTY_ATSTART + + This is like PCRE2_NOTEMPTY, except that it locks out an empty string + match only at the first matching position, that is, at the start of the + subject plus the starting offset. An empty string match later in the + subject is permitted. If the pattern is anchored, such a match can oc- + cur only if the pattern contains \K. + + PCRE2_NO_JIT + + By default, if a pattern has been successfully processed by + pcre2_jit_compile(), JIT is automatically used when pcre2_match() is + called with options that JIT supports. Setting PCRE2_NO_JIT disables + the use of JIT; it forces matching to be done by the interpreter. + + PCRE2_NO_UTF_CHECK + + When PCRE2_UTF is set at compile time, the validity of the subject as a + UTF string is checked unless PCRE2_NO_UTF_CHECK is passed to + pcre2_match() or PCRE2_MATCH_INVALID_UTF was passed to pcre2_compile(). + The latter special case is discussed in detail in the pcre2unicode doc- + umentation. + + In the default case, if a non-zero starting offset is given, the check + is applied only to that part of the subject that could be inspected + during matching, and there is a check that the starting offset points + to the first code unit of a character or to the end of the subject. If + there are no lookbehind assertions in the pattern, the check starts at + the starting offset. Otherwise, it starts at the length of the longest + lookbehind before the starting offset, or at the start of the subject + if there are not that many characters before the starting offset. Note + that the sequences \b and \B are one-character lookbehinds. + + The check is carried out before any other processing takes place, and a + negative error code is returned if the check fails. There are several + UTF error codes for each code unit width, corresponding to different + problems with the code unit sequence. There are discussions about the + validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the + pcre2unicode documentation. + + If you know that your subject is valid, and you want to skip this check + for performance reasons, you can set the PCRE2_NO_UTF_CHECK option when + calling pcre2_match(). You might want to do this for the second and + subsequent calls to pcre2_match() if you are making repeated calls to + find multiple matches in the same subject string. + + Warning: Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when + PCRE2_NO_UTF_CHECK is set at match time the effect of passing an in- + valid string as a subject, or an invalid value of startoffset, is unde- + fined. Your program may crash or loop indefinitely or give wrong re- + sults. + + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT + + These options turn on the partial matching feature. A partial match oc- + curs if the end of the subject string is reached successfully, but + there are not enough subject characters to complete the match. In addi- + tion, either at least one character must have been inspected or the + pattern must contain a lookbehind, or the pattern must be one that + could match an empty string. + + If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PAR- + TIAL_HARD) is set, matching continues by testing any remaining alterna- + tives. Only if no complete match can be found is PCRE2_ERROR_PARTIAL + returned instead of PCRE2_ERROR_NOMATCH. In other words, PCRE2_PAR- + TIAL_SOFT specifies that the caller is prepared to handle a partial + match, but only if no complete match can be found. + + If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this + case, if a partial match is found, pcre2_match() immediately returns + PCRE2_ERROR_PARTIAL, without considering any other alternatives. In + other words, when PCRE2_PARTIAL_HARD is set, a partial match is consid- + ered to be more important that an alternative complete match. + + There is a more detailed discussion of partial and multi-segment match- + ing, with examples, in the pcre2partial documentation. + + +NEWLINE HANDLING WHEN MATCHING + + When PCRE2 is built, a default newline convention is set; this is usu- + ally the standard convention for the operating system. The default can + be overridden in a compile context by calling pcre2_set_newline(). It + can also be overridden by starting a pattern string with, for example, + (*CRLF), as described in the section on newline conventions in the + pcre2pattern page. During matching, the newline choice affects the be- + haviour of the dot, circumflex, and dollar metacharacters. It may also + alter the way the match starting position is advanced after a match + failure for an unanchored pattern. + + When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is + set as the newline convention, and a match attempt for an unanchored + pattern fails when the current starting position is at a CRLF sequence, + and the pattern contains no explicit matches for CR or LF characters, + the match position is advanced by two characters instead of one, in + other words, to after the CRLF. + + The above rule is a compromise that makes the most common cases work as + expected. For example, if the pattern is .+A (and the PCRE2_DOTALL op- + tion is not set), it does not match the string "\r\nA" because, after + failing at the start, it skips both the CR and the LF before retrying. + However, the pattern [\r\n]A does match that string, because it con- + tains an explicit CR or LF reference, and so advances only by one char- + acter after the first failure. + + An explicit match for CR of LF is either a literal appearance of one of + those characters in the pattern, or one of the \r or \n or equivalent + octal or hexadecimal escape sequences. Implicit matches such as [^X] do + not count, nor does \s, even though it includes CR and LF in the char- + acters that it matches. + + Notwithstanding the above, anomalous effects may still occur when CRLF + is a valid newline sequence and explicit \r or \n escapes appear in the + pattern. + + +HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS + + uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); + + PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); + + In general, a pattern matches a certain portion of the subject, and in + addition, further substrings from the subject may be picked out by + parenthesized parts of the pattern. Following the usage in Jeffrey + Friedl's book, this is called "capturing" in what follows, and the + phrase "capture group" (Perl terminology) is used for a fragment of a + pattern that picks out a substring. PCRE2 supports several other kinds + of parenthesized group that do not cause substrings to be captured. The + pcre2_pattern_info() function can be used to find out how many capture + groups there are in a compiled pattern. + + You can use auxiliary functions for accessing captured substrings by + number or by name, as described in sections below. + + Alternatively, you can make direct use of the vector of PCRE2_SIZE val- + ues, called the ovector, which contains the offsets of captured + strings. It is part of the match data block. The function + pcre2_get_ovector_pointer() returns the address of the ovector, and + pcre2_get_ovector_count() returns the number of pairs of values it con- + tains. + + Within the ovector, the first in each pair of values is set to the off- + set of the first code unit of a substring, and the second is set to the + offset of the first code unit after the end of a substring. These val- + ues are always code unit offsets, not character offsets. That is, they + are byte offsets in the 8-bit library, 16-bit offsets in the 16-bit li- + brary, and 32-bit offsets in the 32-bit library. + + After a partial match (error return PCRE2_ERROR_PARTIAL), only the + first pair of offsets (that is, ovector[0] and ovector[1]) are set. + They identify the part of the subject that was partially matched. See + the pcre2partial documentation for details of partial matching. + + After a fully successful match, the first pair of offsets identifies + the portion of the subject string that was matched by the entire pat- + tern. The next pair is used for the first captured substring, and so + on. The value returned by pcre2_match() is one more than the highest + numbered pair that has been set. For example, if two substrings have + been captured, the returned value is 3. If there are no captured sub- + strings, the return value from a successful match is 1, indicating that + just the first pair of offsets has been set. + + If a pattern uses the \K escape sequence within a positive assertion, + the reported start of a successful match can be greater than the end of + the match. For example, if the pattern (?=ab\K) is matched against + "ab", the start and end offset values for the match are 2 and 0. + + If a capture group is matched repeatedly within a single match opera- + tion, it is the last portion of the subject that it matched that is re- + turned. + + If the ovector is too small to hold all the captured substring offsets, + as much as possible is filled in, and the function returns a value of + zero. If captured substrings are not of interest, pcre2_match() may be + called with a match data block whose ovector is of minimum length (that + is, one pair). + + It is possible for capture group number n+1 to match some part of the + subject when group n has not been used at all. For example, if the + string "abc" is matched against the pattern (a|(z))(bc) the return from + the function is 4, and groups 1 and 3 are matched, but 2 is not. When + this happens, both values in the offset pairs corresponding to unused + groups are set to PCRE2_UNSET. + + Offset values that correspond to unused groups at the end of the ex- + pression are also set to PCRE2_UNSET. For example, if the string "abc" + is matched against the pattern (abc)(x(yz)?)? groups 2 and 3 are not + matched. The return from the function is 2, because the highest used + capture group number is 1. The offsets for the second and third capture + groups (assuming the vector is large enough, of course) are set to + PCRE2_UNSET. + + Elements in the ovector that do not correspond to capturing parentheses + in the pattern are never changed. That is, if a pattern contains n cap- + turing parentheses, no more than ovector[0] to ovector[2n+1] are set by + pcre2_match(). The other elements retain whatever values they previ- + ously had. After a failed match attempt, the contents of the ovector + are unchanged. + + +OTHER INFORMATION ABOUT A MATCH + + PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); + + As well as the offsets in the ovector, other information about a match + is retained in the match data block and can be retrieved by the above + functions in appropriate circumstances. If they are called at other + times, the result is undefined. + + After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a + failure to match (PCRE2_ERROR_NOMATCH), a mark name may be available. + The function pcre2_get_mark() can be called to access this name, which + can be specified in the pattern by any of the backtracking control + verbs, not just (*MARK). The same function applies to all the verbs. It + returns a pointer to the zero-terminated name, which is within the com- + piled pattern. If no name is available, NULL is returned. The length of + the name (excluding the terminating zero) is stored in the code unit + that precedes the name. You should use this length instead of relying + on the terminating zero if the name might contain a binary zero. + + After a successful match, the name that is returned is the last mark + name encountered on the matching path through the pattern. Instances of + backtracking verbs without names do not count. Thus, for example, if + the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned. + After a "no match" or a partial match, the last encountered name is re- + turned. For example, consider this pattern: + + ^(*MARK:A)((*MARK:B)a|b)c + + When it matches "bc", the returned name is A. The B mark is "seen" in + the first branch of the group, but it is not on the matching path. On + the other hand, when this pattern fails to match "bx", the returned + name is B. + + Warning: By default, certain start-of-match optimizations are used to + give a fast "no match" result in some situations. For example, if the + anchoring is removed from the pattern above, there is an initial check + for the presence of "c" in the subject before running the matching en- + gine. This check fails for "bx", causing a match failure without seeing + any marks. You can disable the start-of-match optimizations by setting + the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or by starting + the pattern with (*NO_START_OPT). + + After a successful match, a partial match, or one of the invalid UTF + errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can + be called. After a successful or partial match it returns the code unit + offset of the character at which the match started. For a non-partial + match, this can be different to the value of ovector[0] if the pattern + contains the \K escape sequence. After a partial match, however, this + value is always the same as ovector[0] because \K does not affect the + result of a partial match. + + After a UTF check failure, pcre2_get_startchar() can be used to obtain + the code unit offset of the invalid UTF character. Details are given in + the pcre2unicode page. + + +ERROR RETURNS FROM pcre2_match() + + If pcre2_match() fails, it returns a negative number. This can be con- + verted to a text string by calling the pcre2_get_error_message() func- + tion (see "Obtaining a textual error message" below). Negative error + codes are also returned by other functions, and are documented with + them. The codes are given names in the header file. If UTF checking is + in force and an invalid UTF subject string is detected, one of a number + of UTF-specific negative error codes is returned. Details are given in + the pcre2unicode page. The following are the other errors that may be + returned by pcre2_match(): + + PCRE2_ERROR_NOMATCH + + The subject string did not match the pattern. + + PCRE2_ERROR_PARTIAL + + The subject string did not match, but it did match partially. See the + pcre2partial documentation for details of partial matching. + + PCRE2_ERROR_BADMAGIC + + PCRE2 stores a 4-byte "magic number" at the start of the compiled code, + to catch the case when it is passed a junk pointer. This is the error + that is returned when the magic number is not present. + + PCRE2_ERROR_BADMODE + + This error is given when a compiled pattern is passed to a function in + a library of a different code unit width, for example, a pattern com- + piled by the 8-bit library is passed to a 16-bit or 32-bit library + function. + + PCRE2_ERROR_BADOFFSET + + The value of startoffset was greater than the length of the subject. + + PCRE2_ERROR_BADOPTION + + An unrecognized bit was set in the options argument. + + PCRE2_ERROR_BADUTFOFFSET + + The UTF code unit sequence that was passed as a subject was checked and + found to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the + value of startoffset did not point to the beginning of a UTF character + or the end of the subject. + + PCRE2_ERROR_CALLOUT + + This error is never generated by pcre2_match() itself. It is provided + for use by callout functions that want to cause pcre2_match() or + pcre2_callout_enumerate() to return a distinctive error code. See the + pcre2callout documentation for details. + + PCRE2_ERROR_DEPTHLIMIT + + The nested backtracking depth limit was reached. + + PCRE2_ERROR_HEAPLIMIT + + The heap limit was reached. + + PCRE2_ERROR_INTERNAL + + An unexpected internal error has occurred. This error could be caused + by a bug in PCRE2 or by overwriting of the compiled pattern. + + PCRE2_ERROR_JIT_STACKLIMIT + + This error is returned when a pattern that was successfully studied us- + ing JIT is being matched, but the memory available for the just-in-time + processing stack is not large enough. See the pcre2jit documentation + for more details. + + PCRE2_ERROR_MATCHLIMIT + + The backtracking match limit was reached. + + PCRE2_ERROR_NOMEMORY + + Heap memory is used to remember backtracking points. This error is + given when the memory allocation function (default or custom) fails. + Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given if the + amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is + also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca- + tion fails. + + PCRE2_ERROR_NULL + + Either the code, subject, or match_data argument was passed as NULL. + + PCRE2_ERROR_RECURSELOOP + + This error is returned when pcre2_match() detects a recursion loop + within the pattern. Specifically, it means that either the whole pat- + tern or a capture group has been called recursively for the second time + at the same position in the subject string. Some simple patterns that + might do this are detected and faulted at compile time, but more com- + plicated cases, in particular mutual recursions between two different + groups, cannot be detected until matching is attempted. + + +OBTAINING A TEXTUAL ERROR MESSAGE + + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); + + A text message for an error code from any PCRE2 function (compile, + match, or auxiliary) can be obtained by calling pcre2_get_error_mes- + sage(). The code is passed as the first argument, with the remaining + two arguments specifying a code unit buffer and its length in code + units, into which the text message is placed. The message is returned + in code units of the appropriate width for the library that is being + used. + + The returned message is terminated with a trailing zero, and the func- + tion returns the number of code units used, excluding the trailing + zero. If the error number is unknown, the negative error code PCRE2_ER- + ROR_BADDATA is returned. If the buffer is too small, the message is + truncated (but still with a trailing zero), and the negative error code + PCRE2_ERROR_NOMEMORY is returned. None of the messages are very long; + a buffer size of 120 code units is ample. + + +EXTRACTING CAPTURED SUBSTRINGS BY NUMBER + + int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); + + int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); + + int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + Captured substrings can be accessed directly by using the ovector as + described above. For convenience, auxiliary functions are provided for + extracting captured substrings as new, separate, zero-terminated + strings. A substring that contains a binary zero is correctly extracted + and has a further zero added on the end, but the result is not, of + course, a C string. + + The functions in this section identify substrings by number. The number + zero refers to the entire matched substring, with higher numbers refer- + ring to substrings captured by parenthesized groups. After a partial + match, only substring zero is available. An attempt to extract any + other substring gives the error PCRE2_ERROR_PARTIAL. The next section + describes similar functions for extracting captured substrings by name. + + If a pattern uses the \K escape sequence within a positive assertion, + the reported start of a successful match can be greater than the end of + the match. For example, if the pattern (?=ab\K) is matched against + "ab", the start and end offset values for the match are 2 and 0. In + this situation, calling these functions with a zero substring number + extracts a zero-length empty string. + + You can find the length in code units of a captured substring without + extracting it by calling pcre2_substring_length_bynumber(). The first + argument is a pointer to the match data block, the second is the group + number, and the third is a pointer to a variable into which the length + is placed. If you just want to know whether or not the substring has + been captured, you can pass the third argument as NULL. + + The pcre2_substring_copy_bynumber() function copies a captured sub- + string into a supplied buffer, whereas pcre2_substring_get_bynumber() + copies it into new memory, obtained using the same memory allocation + function that was used for the match data block. The first two argu- + ments of these functions are a pointer to the match data block and a + capture group number. + + The final arguments of pcre2_substring_copy_bynumber() are a pointer to + the buffer and a pointer to a variable that contains its length in code + units. This is updated to contain the actual number of code units used + for the extracted substring, excluding the terminating zero. + + For pcre2_substring_get_bynumber() the third and fourth arguments point + to variables that are updated with a pointer to the new memory and the + number of code units that comprise the substring, again excluding the + terminating zero. When the substring is no longer needed, the memory + should be freed by calling pcre2_substring_free(). + + The return value from all these functions is zero for success, or a + negative error code. If the pattern match failed, the match failure + code is returned. If a substring number greater than zero is used af- + ter a partial match, PCRE2_ERROR_PARTIAL is returned. Other possible + error codes are: + + PCRE2_ERROR_NOMEMORY + + The buffer was too small for pcre2_substring_copy_bynumber(), or the + attempt to get memory failed for pcre2_substring_get_bynumber(). + + PCRE2_ERROR_NOSUBSTRING + + There is no substring with that number in the pattern, that is, the + number is greater than the number of capturing parentheses. + + PCRE2_ERROR_UNAVAILABLE + + The substring number, though not greater than the number of captures in + the pattern, is greater than the number of slots in the ovector, so the + substring could not be captured. + + PCRE2_ERROR_UNSET + + The substring did not participate in the match. For example, if the + pattern is (abc)|(def) and the subject is "def", and the ovector con- + tains at least two capturing slots, substring number 1 is unset. + + +EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS + + int pcre2_substring_list_get(pcre2_match_data *match_data, + PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); + + void pcre2_substring_list_free(PCRE2_UCHAR **list); + + The pcre2_substring_list_get() function extracts all available sub- + strings and builds a list of pointers to them. It also (optionally) + builds a second list that contains their lengths (in code units), ex- + cluding a terminating zero that is added to each of them. All this is + done in a single block of memory that is obtained using the same memory + allocation function that was used to get the match data block. + + This function must be called only after a successful match. If called + after a partial match, the error code PCRE2_ERROR_PARTIAL is returned. + + The address of the memory block is returned via listptr, which is also + the start of the list of string pointers. The end of the list is marked + by a NULL pointer. The address of the list of lengths is returned via + lengthsptr. If your strings do not contain binary zeros and you do not + therefore need the lengths, you may supply NULL as the lengthsptr argu- + ment to disable the creation of a list of lengths. The yield of the + function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the mem- + ory block could not be obtained. When the list is no longer needed, it + should be freed by calling pcre2_substring_list_free(). + + If this function encounters a substring that is unset, which can happen + when capture group number n+1 matches some part of the subject, but + group n has not been used at all, it returns an empty string. This can + be distinguished from a genuine zero-length substring by inspecting the + appropriate offset in the ovector, which contain PCRE2_UNSET for unset + substrings, or by calling pcre2_substring_length_bynumber(). + + +EXTRACTING CAPTURED SUBSTRINGS BY NAME + + int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); + + int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); + + int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); + + int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + To extract a substring by name, you first have to find associated num- + ber. For example, for this pattern: + + (a+)b(?\d+)... + + the number of the capture group called "xxx" is 2. If the name is known + to be unique (PCRE2_DUPNAMES was not set), you can find the number from + the name by calling pcre2_substring_number_from_name(). The first argu- + ment is the compiled pattern, and the second is the name. The yield of + the function is the group number, PCRE2_ERROR_NOSUBSTRING if there is + no group with that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is + more than one group with that name. Given the number, you can extract + the substring directly from the ovector, or use one of the "bynumber" + functions described above. + + For convenience, there are also "byname" functions that correspond to + the "bynumber" functions, the only difference being that the second ar- + gument is a name instead of a number. If PCRE2_DUPNAMES is set and + there are duplicate names, these functions scan all the groups with the + given name, and return the captured substring from the first named + group that is set. + + If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is + returned. If all groups with the name have numbers that are greater + than the number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is re- + turned. If there is at least one group with a slot in the ovector, but + no group is found to be set, PCRE2_ERROR_UNSET is returned. + + Warning: If the pattern uses the (?| feature to set up multiple capture + groups with the same number, as described in the section on duplicate + group numbers in the pcre2pattern page, you cannot use names to distin- + guish the different capture groups, because names are not included in + the compiled code. The matching process uses only numbers. For this + reason, the use of different names for groups with the same number + causes an error at compile time. + + +CREATING A NEW STRING WITH SUBSTITUTIONS + + int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); + + This function optionally calls pcre2_match() and then makes a copy of + the subject string in outputbuffer, replacing parts that were matched + with the replacement string, whose length is supplied in rlength, which + can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As + a special case, if replacement is NULL and rlength is zero, the re- + placement is assumed to be an empty string. If rlength is non-zero, an + error occurs if replacement is NULL. + + There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re- + turn just the replacement string(s). The default action is to perform + just one replacement if the pattern matches, but there is an option + that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL be- + low). + + If successful, pcre2_substitute() returns the number of substitutions + that were carried out. This may be zero if no match was found, and is + never greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A nega- + tive value is returned if an error is detected. + + Matches in which a \K item in a lookahead in the pattern causes the + match to end before it starts are not supported, and give rise to an + error return. For global replacements, matches in which \K in a lookbe- + hind causes the match to start earlier than the point that was reached + in the previous iteration are also not supported. + + The first seven arguments of pcre2_substitute() are the same as for + pcre2_match(), except that the partial matching options are not permit- + ted, and match_data may be passed as NULL, in which case a match data + block is obtained and freed within this function, using memory manage- + ment functions from the match context, if provided, or else those that + were used to allocate memory for the compiled code. + + If match_data is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the + provided block is used for all calls to pcre2_match(), and its contents + afterwards are the result of the final call. For global changes, this + will always be a no-match error. The contents of the ovector within the + match data block may or may not have been changed. + + As well as the usual options for pcre2_match(), a number of additional + options can be set in the options argument of pcre2_substitute(). One + such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external + match_data block must be provided, and it must have already been used + for an external call to pcre2_match() with the same pattern and subject + arguments. The data in the match_data block (return code, offset vec- + tor) is then used for the first substitution instead of calling + pcre2_match() from within pcre2_substitute(). This allows an applica- + tion to check for a match before choosing to substitute, without having + to repeat the match. + + The contents of the externally supplied match data block are not + changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- + TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- + stitution to check for further matches, but this is done using an in- + ternally obtained match data block, thus always leaving the external + block unchanged. + + The code argument is not used for matching before the first substitu- + tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, + even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- + formation such as the UTF setting and the number of capturing parenthe- + ses in the pattern. + + The default action of pcre2_substitute() is to return a copy of the + subject string with matched substrings replaced. However, if PCRE2_SUB- + STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are + returned. In the global case, multiple replacements are concatenated in + the output buffer. Substitution callouts (see below) can be used to + separate them if necessary. + + The outlengthptr argument of pcre2_substitute() must point to a vari- + able that contains the length, in code units, of the output buffer. If + the function is successful, the value is updated to contain the length + in code units of the new string, excluding the trailing zero that is + automatically added. + + If the function is not successful, the value set via outlengthptr de- + pends on the type of error. For syntax errors in the replacement + string, the value is the offset in the replacement string where the er- + ror was detected. For other errors, the value is PCRE2_UNSET by de- + fault. This includes the case of the output buffer being too small, un- + less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. + + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output + buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- + ORY immediately. If this option is set, however, pcre2_substitute() + continues to go through the motions of matching and substituting (with- + out, of course, writing anything) in order to compute the size of + buffer that is needed. This value is passed back via the outlengthptr + variable, with the result of the function still being PCRE2_ER- + ROR_NOMEMORY. + + Passing a buffer size of zero is a permitted way of finding out how + much memory is needed for given substitution. However, this does mean + that the entire operation is carried out twice. Depending on the appli- + cation, it may be more efficient to allocate a large buffer and free + the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- + FLOW_LENGTH. + + The replacement string, which is interpreted as a UTF string in UTF + mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An + invalid UTF replacement string causes an immediate return with the rel- + evant UTF error code. + + If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- + terpreted in any way. By default, however, a dollar character is an es- + cape character that can specify the insertion of characters from cap- + ture groups and names from (*MARK) or other control verbs in the pat- + tern. Dollar is the only escape character (backslash is treated as lit- + eral). The following forms are always recognized: + + $$ insert a dollar character + $ or ${} insert the contents of group + $*MARK or ${*MARK} insert a control verb name + + Either a group number or a group name can be given for . Curly + brackets are required only if the following character would be inter- + preted as part of the number or name. The number may be zero to include + the entire matched string. For example, if the pattern a(b)c is + matched with "=abc=" and the replacement string "+$1$0$1+", the result + is "=+babcb+=". + + $*MARK inserts the name from the last encountered backtracking control + verb on the matching path that has a name. (*MARK) must always include + a name, but the other verbs need not. For example, in the case of + (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B) + the relevant name is "B". This facility can be used to perform simple + simultaneous substitutions, as this pcre2test example shows: + + /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange + + PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject + string, replacing every matching substring. If this option is not set, + only the first matching substring is replaced. The search for matches + takes place in the original subject string (that is, previous replace- + ments do not affect it). Iteration is implemented by advancing the + startoffset value for each search, which is always passed the entire + subject string. If an offset limit is set in the match context, search- + ing stops when that limit is reached. + + You can restrict the effect of a global substitution to a portion of + the subject string by setting either or both of startoffset and an off- + set limit. Here is a pcre2test example: + + /B/g,replace=!,use_offset_limit + ABC ABC ABC ABC\=offset=3,offset_limit=12 + 2: ABC A!C A!C ABC + + When continuing with global substitutions after matching a substring + with zero length, an attempt to find a non-empty match at the same off- + set is performed. If this is not successful, the offset is advanced by + one character except when CRLF is a valid newline sequence and the next + two characters are CR, LF. In this case, the offset is advanced by two + characters. + + PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that + do not appear in the pattern to be treated as unset groups. This option + should be used with care, because it means that a typo in a group name + or number no longer causes the PCRE2_ERROR_NOSUBSTRING error. + + PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un- + known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated + as empty strings when inserted as described above. If this option is + not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN- + SET error. This option does not influence the extended substitution + syntax described below. + + PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the + replacement string. Without this option, only the dollar character is + special, and only the group insertion forms listed above are valid. + When PCRE2_SUBSTITUTE_EXTENDED is set, two things change: + + Firstly, backslash in a replacement string is interpreted as an escape + character. The usual forms such as \n or \x{ddd} can be used to specify + particular character codes, and backslash followed by any non-alphanu- + meric character quotes that character. Extended quoting can be coded + using \Q...\E, exactly as in pattern strings. + + There are also four escape sequences for forcing the case of inserted + letters. The insertion mechanism has three states: no case forcing, + force upper case, and force lower case. The escape sequences change the + current state: \U and \L change to upper or lower case forcing, respec- + tively, and \E (when not terminating a \Q quoted sequence) reverts to + no case forcing. The sequences \u and \l force the next character (if + it is a letter) to upper or lower case, respectively, and then the + state automatically reverts to no case forcing. Case forcing applies to + all inserted characters, including those from capture groups and let- + ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP + was set when the pattern was compiled, Unicode properties are used for + case forcing characters whose code points are greater than 127. + + Note that case forcing sequences such as \U...\E do not nest. For exam- + ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final + \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX- + TRA_ALT_BSUX options do not apply to replacement strings. + + The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more + flexibility to capture group substitution. The syntax is similar to + that used by Bash: + + ${:-} + ${:+:} + + As before, may be a group number or a name. The first form speci- + fies a default value. If group is set, its value is inserted; if + not, is expanded and the result inserted. The second form + specifies strings that are expanded and inserted when group is set + or unset, respectively. The first form is just a convenient shorthand + for + + ${:+${}:} + + Backslash can be used to escape colons and closing curly brackets in + the replacement strings. A change of the case forcing state within a + replacement string remains in force afterwards, as shown in this + pcre2test example: + + /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo + body + 1: hello + somebody + 1: HELLO + + The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended + substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- + known groups in the extended syntax forms to be treated as unset. + + If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, + PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrele- + vant and are ignored. + + Substitution errors + + In the event of an error, pcre2_substitute() returns a negative error + code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors + from pcre2_match() are passed straight back. + + PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser- + tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. + + PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ- + ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) + when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- + SET_EMPTY is not set. + + PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big + enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size + of buffer that is needed is returned via outlengthptr. Note that this + does not happen by default. + + PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the + match_data argument is NULL or if the subject or replacement arguments + are NULL. For backward compatibility reasons an exception is made for + the replacement argument if the rlength argument is also 0. + + PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in + the replacement string, with more particular errors being PCRE2_ER- + ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE + (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax + error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN + (the pattern match ended before it started or the match started earlier + than the current position in the subject, which can happen if \K is + used in an assertion). + + As for all PCRE2 errors, a text message that describes the error can be + obtained by calling the pcre2_get_error_message() function (see "Ob- + taining a textual error message" above). + + Substitution callouts + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + The pcre2_set_substitution_callout() function can be used to specify a + callout function for pcre2_substitute(). This information is passed in + a match context. The callout function is called after each substitution + has been processed, but it can cause the replacement not to happen. The + callout function is not called for simulated substitutions that happen + as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. + + The first argument of the callout function is a pointer to a substitute + callout block structure, which contains the following fields, not nec- + essarily in this order: + + uint32_t version; + uint32_t subscount; + PCRE2_SPTR input; + PCRE2_SPTR output; + PCRE2_SIZE *ovector; + uint32_t oveccount; + PCRE2_SIZE output_offsets[2]; + + The version field contains the version number of the block format. The + current version is 0. The version number will increase in future if + more fields are added, but the intention is never to remove any of the + existing fields. + + The subscount field is the number of the current match. It is 1 for the + first callout, 2 for the second, and so on. The input and output point- + ers are copies of the values passed to pcre2_substitute(). + + The ovector field points to the ovector, which contains the result of + the most recent match. The oveccount field contains the number of pairs + that are set in the ovector, and is always greater than zero. + + The output_offsets vector contains the offsets of the replacement in + the output string. This has already been processed for dollar and (if + requested) backslash substitutions as described above. + + The second argument of the callout function is the value passed as + callout_data when the function was registered. The value returned by + the callout function is interpreted as follows: + + If the value is zero, the replacement is accepted, and, if PCRE2_SUB- + STITUTE_GLOBAL is set, processing continues with a search for the next + match. If the value is not zero, the current replacement is not ac- + cepted. If the value is greater than zero, processing continues when + PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero + or PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied + to the output and the call to pcre2_substitute() exits, returning the + number of matches so far. + + +DUPLICATE CAPTURE GROUP NAMES + + int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); + + When a pattern is compiled with the PCRE2_DUPNAMES option, names for + capture groups are not required to be unique. Duplicate names are al- + ways allowed for groups with the same number, created by using the (?| + feature. Indeed, if such groups are named, they are required to use the + same names. + + Normally, patterns that use duplicate names are such that in any one + match, only one of each set of identically-named groups participates. + An example is shown in the pcre2pattern documentation. + + When duplicates are present, pcre2_substring_copy_byname() and + pcre2_substring_get_byname() return the first substring corresponding + to the given name that is set. Only if none are set is PCRE2_ERROR_UN- + SET is returned. The pcre2_substring_number_from_name() function re- + turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate + names. + + If you want to get full details of all captured substrings for a given + name, you must use the pcre2_substring_nametable_scan() function. The + first argument is the compiled pattern, and the second is the name. If + the third and fourth arguments are NULL, the function returns a group + number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. + + When the third and fourth arguments are not NULL, they must be pointers + to variables that are updated by the function. After it has run, they + point to the first and last entries in the name-to-number table for the + given name, and the function returns the length of each entry in code + units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are + no entries for the given name. + + The format of the name table is described above in the section entitled + Information about a pattern. Given all the relevant entries for the + name, you can extract each of their numbers, and hence the captured + data. + + +FINDING ALL POSSIBLE MATCHES AT ONE POSITION + + The traditional matching function uses a similar algorithm to Perl, + which stops when it finds the first match at a given point in the sub- + ject. If you want to find all possible matches, or the longest possible + match at a given position, consider using the alternative matching + function (see below) instead. If you cannot use the alternative func- + tion, you can kludge it up by making use of the callout facility, which + is described in the pcre2callout documentation. + + What you have to do is to insert a callout right at the end of the pat- + tern. When your callout function is called, extract and save the cur- + rent matched substring. Then return 1, which forces pcre2_match() to + backtrack and try other alternatives. Ultimately, when it runs out of + matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH. + + +MATCHING A PATTERN: THE ALTERNATIVE FUNCTION + + int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); + + The function pcre2_dfa_match() is called to match a subject string + against a compiled pattern, using a matching algorithm that scans the + subject string just once (not counting lookaround assertions), and does + not backtrack (except when processing lookaround assertions). This has + different characteristics to the normal algorithm, and is not compati- + ble with Perl. Some of the features of PCRE2 patterns are not sup- + ported. Nevertheless, there are times when this kind of matching can be + useful. For a discussion of the two matching algorithms, and a list of + features that pcre2_dfa_match() does not support, see the pcre2matching + documentation. + + The arguments for the pcre2_dfa_match() function are the same as for + pcre2_match(), plus two extras. The ovector within the match data block + is used in a different way, and this is described below. The other com- + mon arguments are used in the same way as for pcre2_match(), so their + description is not repeated here. + + The two additional arguments provide workspace for the function. The + workspace vector should contain at least 20 elements. It is used for + keeping track of multiple paths through the pattern tree. More work- + space is needed for patterns and subjects where there are a lot of po- + tential matches. + + Here is an example of a simple call to pcre2_dfa_match(): + + int wspace[20]; + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_dfa_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL, /* a match context; NULL means use defaults */ + wspace, /* working space vector */ + 20); /* number of elements (NOT size in bytes) */ + + Option bits for pcre2_dfa_match() + + The unused bits of the options argument for pcre2_dfa_match() must be + zero. The only bits that may be set are PCRE2_ANCHORED, + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO- + TEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, + PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and + PCRE2_DFA_RESTART. All but the last four of these are exactly the same + as for pcre2_match(), so their description is not repeated here. + + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT + + These have the same general effect as they do for pcre2_match(), but + the details are slightly different. When PCRE2_PARTIAL_HARD is set for + pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the + subject is reached and there is still at least one matching possibility + that requires additional characters. This happens even if some complete + matches have already been found. When PCRE2_PARTIAL_SOFT is set, the + return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL + if the end of the subject is reached, there have been no complete + matches, but there is still at least one matching possibility. The por- + tion of the string that was inspected when the longest partial match + was found is set as the first matching string in both cases. There is a + more detailed discussion of partial and multi-segment matching, with + examples, in the pcre2partial documentation. + + PCRE2_DFA_SHORTEST + + Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to + stop as soon as it has found one match. Because of the way the alterna- + tive algorithm works, this is necessarily the shortest possible match + at the first possible matching point in the subject string. + + PCRE2_DFA_RESTART + + When pcre2_dfa_match() returns a partial match, it is possible to call + it again, with additional subject characters, and have it continue with + the same match. The PCRE2_DFA_RESTART option requests this action; when + it is set, the workspace and wscount options must reference the same + vector as before because data about the match so far is left in them + after a partial match. There is more discussion of this facility in the + pcre2partial documentation. + + Successful returns from pcre2_dfa_match() + + When pcre2_dfa_match() succeeds, it may have matched more than one sub- + string in the subject. Note, however, that all the matches from one run + of the function start at the same point in the subject. The shorter + matches are all initial substrings of the longer matches. For example, + if the pattern + + <.*> + + is matched against the string + + This is no more + + the three matched strings are + + + + + + On success, the yield of the function is a number greater than zero, + which is the number of matched substrings. The offsets of the sub- + strings are returned in the ovector, and can be extracted by number in + the same way as for pcre2_match(), but the numbers bear no relation to + any capture groups that may exist in the pattern, because DFA matching + does not support capturing. + + Calls to the convenience functions that extract substrings by name re- + turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af- + ter a DFA match. The convenience functions that extract substrings by + number never return PCRE2_ERROR_NOSUBSTRING. + + The matched strings are stored in the ovector in reverse order of + length; that is, the longest matching string is first. If there were + too many matches to fit into the ovector, the yield of the function is + zero, and the vector is filled with the longest matches. + + NOTE: PCRE2's "auto-possessification" optimization usually applies to + character repeats at the end of a pattern (as well as internally). For + example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA + matching, this means that only one possible match is found. If you re- + ally do want multiple matches in such cases, either use an ungreedy re- + peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com- + piling. + + Error returns from pcre2_dfa_match() + + The pcre2_dfa_match() function returns a negative number when it fails. + Many of the errors are the same as for pcre2_match(), as described + above. There are in addition the following errors that are specific to + pcre2_dfa_match(): + + PCRE2_ERROR_DFA_UITEM + + This return is given if pcre2_dfa_match() encounters an item in the + pattern that it does not support, for instance, the use of \C in a UTF + mode or a backreference. + + PCRE2_ERROR_DFA_UCOND + + This return is given if pcre2_dfa_match() encounters a condition item + that uses a backreference for the condition, or a test for recursion in + a specific capture group. These are not supported. + + PCRE2_ERROR_DFA_UINVALID_UTF + + This return is given if pcre2_dfa_match() is called for a pattern that + was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for + DFA matching. + + PCRE2_ERROR_DFA_WSSIZE + + This return is given if pcre2_dfa_match() runs out of space in the + workspace vector. + + PCRE2_ERROR_DFA_RECURSE + + When a recursion or subroutine call is processed, the matching function + calls itself recursively, using private memory for the ovector and + workspace. This error is given if the internal ovector is not large + enough. This should be extremely rare, as a vector of size 1000 is + used. + + PCRE2_ERROR_DFA_BADRESTART + + When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, + some plausibility checks are made on the contents of the workspace, + which should contain data about the previous partial match. If any of + these checks fail, this error is given. + + +SEE ALSO + + pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3), + pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 24 April 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.44 24 April 2024 PCRE2API(3) +------------------------------------------------------------------------------ + + + +PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +BUILDING PCRE2 + + PCRE2 is distributed with a configure script that can be used to build + the library in Unix-like environments using the applications known as + Autotools. Also in the distribution are files to support building using + CMake instead of configure. The text file README contains general in- + formation about building with Autotools (some of which is repeated be- + low), and also has some comments about building on various operating + systems. The files in the vms directory support building under OpenVMS. + There is a lot more information about building PCRE2 without using Au- + totools (including information about using CMake and building "by + hand") in the text file called NON-AUTOTOOLS-BUILD. You should consult + this file as well as the README file if you are building in a non-Unix- + like environment. + + +PCRE2 BUILD-TIME OPTIONS + + The rest of this document describes the optional features of PCRE2 that + can be selected when the library is compiled. It assumes use of the + configure script, where the optional features are selected or dese- + lected by providing options to configure before running the make com- + mand. However, the same options can be selected in both Unix-like and + non-Unix-like environments if you are using CMake instead of configure + to build PCRE2. + + If you are not using Autotools or CMake, option selection can be done + by editing the config.h file, or by passing parameter settings to the + compiler, as described in NON-AUTOTOOLS-BUILD. + + The complete list of options for configure (which includes the standard + ones such as the selection of the installation directory) can be ob- + tained by running + + ./configure --help + + The following sections include descriptions of "on/off" options whose + names begin with --enable or --disable. Because of the way that config- + ure works, --enable and --disable always come in pairs, so the comple- + mentary option always exists as well, but as it specifies the default, + it is not described. Options that specify values have names that start + with --with. At the end of a configure run, a summary of the configura- + tion is output. + + +BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES + + By default, a library called libpcre2-8 is built, containing functions + that take string arguments contained in arrays of bytes, interpreted + either as single-byte characters, or UTF-8 strings. You can also build + two other libraries, called libpcre2-16 and libpcre2-32, which process + strings that are contained in arrays of 16-bit and 32-bit code units, + respectively. These can be interpreted either as single-unit characters + or UTF-16/UTF-32 strings. To build these additional libraries, add one + or both of the following to the configure command: + + --enable-pcre2-16 + --enable-pcre2-32 + + If you do not want the 8-bit library, add + + --disable-pcre2-8 + + as well. At least one of the three libraries must be built. Note that + the POSIX wrapper is for the 8-bit library only, and that pcre2grep is + an 8-bit program. Neither of these are built if you select only the + 16-bit or 32-bit libraries. + + +BUILDING SHARED AND STATIC LIBRARIES + + The Autotools PCRE2 building process uses libtool to build both shared + and static libraries by default. You can suppress an unwanted library + by adding one of + + --disable-shared + --disable-static + + to the configure command. Setting --disable-shared ensures that PCRE2 + libraries are built as static libraries. The binaries that are then + created as part of the build process (for example, pcre2test and + pcre2grep) are linked statically with one or more PCRE2 libraries, but + may also be dynamically linked with other libraries such as libc. If + you want these binaries to be fully statically linked, you can set LD- + FLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. + + +UNICODE AND UTF SUPPORT + + By default, PCRE2 is built with support for Unicode and UTF character + strings. To build it without Unicode support, add + + --disable-unicode + + to the configure command. This setting applies to all three libraries. + It is not possible to build one library with Unicode support and an- + other without in the same configuration. + + Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, + UTF-16 or UTF-32. To do that, applications that use the library can set + the PCRE2_UTF option when they call pcre2_compile() to compile a pat- + tern. Alternatively, patterns may be started with (*UTF) unless the + application has locked this out by setting PCRE2_NEVER_UTF. + + UTF support allows the libraries to process character code points up to + 0x10ffff in the strings that they handle. Unicode support also gives + access to the Unicode properties of characters, using pattern escapes + such as \P, \p, and \X. Only the general category properties such as Lu + and Nd, script names, and some bi-directional properties are supported. + Details are given in the pcre2pattern documentation. + + Pattern escapes such as \d and \w do not by default make use of Unicode + properties. The application can request that they do by setting the + PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a + pattern may also request this by starting with (*UCP). + + +DISABLING THE USE OF \C + + The \C escape sequence, which matches a single code unit, even in a UTF + mode, can cause unpredictable behaviour because it may leave the cur- + rent matching point in the middle of a multi-code-unit character. The + application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C op- + tion when calling pcre2_compile(). There is also a build-time option + + --enable-never-backslash-C + + (note the upper case C) which locks out the use of \C entirely. + + +JUST-IN-TIME COMPILER SUPPORT + + Just-in-time (JIT) compiler support is included in the build by speci- + fying + + --enable-jit + + This support is available only for certain hardware architectures. If + this option is set for an unsupported architecture, a building error + occurs. If in doubt, use + + --enable-jit=auto + + which enables JIT only if the current hardware is supported. You can + check if JIT is enabled in the configuration summary that is output at + the end of a configure run. If you are enabling JIT under SELinux you + may also want to add + + --enable-jit-sealloc + + which enables the use of an execmem allocator in JIT that is compatible + with SELinux. This has no effect if JIT is not enabled. See the + pcre2jit documentation for a discussion of JIT usage. When JIT support + is enabled, pcre2grep automatically makes use of it, unless you add + + --disable-pcre2grep-jit + + to the configure command. + + +NEWLINE RECOGNITION + + By default, PCRE2 interprets the linefeed (LF) character as indicating + the end of a line. This is the normal newline character on Unix-like + systems. You can compile PCRE2 to use carriage return (CR) instead, by + adding + + --enable-newline-is-cr + + to the configure command. There is also an --enable-newline-is-lf op- + tion, which explicitly specifies linefeed as the newline character. + + Alternatively, you can specify that line endings are to be indicated by + the two-character sequence CRLF (CR immediately followed by LF). If you + want this, add + + --enable-newline-is-crlf + + to the configure command. There is a fourth option, specified by + + --enable-newline-is-anycrlf + + which causes PCRE2 to recognize any of the three sequences CR, LF, or + CRLF as indicating a line ending. A fifth option, specified by + + --enable-newline-is-any + + causes PCRE2 to recognize any Unicode newline sequence. The Unicode + newline sequences are the three just mentioned, plus the single charac- + ters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line, + U+0085), LS (line separator, U+2028), and PS (paragraph separator, + U+2029). The final option is + + --enable-newline-is-nul + + which causes NUL (binary zero) to be set as the default line-ending + character. + + Whatever default line ending convention is selected when PCRE2 is built + can be overridden by applications that use the library. At build time + it is recommended to use the standard for your operating system. + + +WHAT \R MATCHES + + By default, the sequence \R in a pattern matches any Unicode newline + sequence, independently of what has been selected as the line ending + sequence. If you specify + + --enable-bsr-anycrlf + + the default is changed so that \R matches only CR, LF, or CRLF. What- + ever is selected when PCRE2 is built can be overridden by applications + that use the library. + + +HANDLING VERY LARGE PATTERNS + + Within a compiled pattern, offset values are used to point from one + part to another (for example, from an opening parenthesis to an alter- + nation metacharacter). By default, in the 8-bit and 16-bit libraries, + two-byte values are used for these offsets, leading to a maximum size + for a compiled pattern of around 64 thousand code units. This is suffi- + cient to handle all but the most gigantic patterns. Nevertheless, some + people do want to process truly enormous patterns, so it is possible to + compile PCRE2 to use three-byte or four-byte offsets by adding a set- + ting such as + + --with-link-size=3 + + to the configure command. The value given must be 2, 3, or 4. For the + 16-bit library, a value of 3 is rounded up to 4. In these libraries, + using longer offsets slows down the operation of PCRE2 because it has + to load additional data when handling them. For the 32-bit library the + value is always 4 and cannot be overridden; the value of --with-link- + size is ignored. + + +LIMITING PCRE2 RESOURCE USAGE + + The pcre2_match() function increments a counter each time it goes round + its main loop. Putting a limit on this counter controls the amount of + computing resource used by a single call to pcre2_match(). The limit + can be changed at run time, as described in the pcre2api documentation. + The default is 10 million, but this can be changed by adding a setting + such as + + --with-match-limit=500000 + + to the configure command. This setting also applies to the + pcre2_dfa_match() matching function, and to JIT matching (though the + counting is done differently). + + The pcre2_match() function uses heap memory to record backtracking + points. The more nested backtracking points there are (that is, the + deeper the search tree), the more memory is needed. There is an upper + limit, specified in kibibytes (units of 1024 bytes). This limit can be + changed at run time, as described in the pcre2api documentation. The + default limit (in effect unlimited) is 20 million. You can change this + by a setting such as + + --with-heap-limit=500 + + which limits the amount of heap to 500 KiB. This limit applies only to + interpretive matching in pcre2_match() and pcre2_dfa_match(), which may + also use the heap for internal workspace when processing complicated + patterns. This limit does not apply when JIT (which has its own memory + arrangements) is used. + + You can also explicitly limit the depth of nested backtracking in the + pcre2_match() interpreter. This limit defaults to the value that is set + for --with-match-limit. You can set a lower default limit by adding, + for example, + + --with-match-limit-depth=10000 + + to the configure command. This value can be overridden at run time. + This depth limit indirectly limits the amount of heap memory that is + used, but because the size of each backtracking "frame" depends on the + number of capturing parentheses in a pattern, the amount of heap that + is used before the limit is reached varies from pattern to pattern. + This limit was more useful in versions before 10.30, where function re- + cursion was used for backtracking. + + As well as applying to pcre2_match(), the depth limit also controls the + depth of recursive function calls in pcre2_dfa_match(). These are used + for lookaround assertions, atomic groups, and recursion within pat- + terns. The limit does not apply to JIT matching. + + +LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS + + Lookbehind assertions in which one or more branches can match a vari- + able number of characters are supported only if there is a maximum + matching length for each top-level branch. There is a limit to this + maximum that defaults to 255 characters. You can alter this default by + a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbe- + hind(). Lookbehind assertions in which every branch matches a fixed + number of characters (not necessarily all the same) are not constrained + by this limit. + + +CREATING CHARACTER TABLES AT BUILD TIME + + PCRE2 uses fixed tables for processing characters whose code points are + less than 256. By default, PCRE2 is built with a set of tables that are + distributed in the file src/pcre2_chartables.c.dist. These tables are + for ASCII codes only. If you add + + --enable-rebuild-chartables + + to the configure command, the distributed tables are no longer used. + Instead, a program called pcre2_dftables is compiled and run. This out- + puts the source for new set of tables, created in the default locale of + your C run-time system. This method of replacing the tables does not + work if you are cross compiling, because pcre2_dftables needs to be run + on the local host and therefore not compiled with the cross compiler. + + If you need to create alternative tables when cross compiling, you will + have to do so "by hand". There may also be other reasons for creating + tables manually. To cause pcre2_dftables to be built on the local + host, run a normal compiling command, and then run the program with the + output file as its argument, for example: + + cc src/pcre2_dftables.c -o pcre2_dftables + ./pcre2_dftables src/pcre2_chartables.c + + This builds the tables in the default locale of the local host. If you + want to specify a locale, you must use the -L option: + + LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c + + You can also specify -b (with or without -L). This causes the tables to + be written in binary instead of as source code. A set of binary tables + can be loaded into memory by an application and passed to pcre2_com- + pile() in the same way as tables created by calling pcre2_maketables(). + The tables are just a string of bytes, independent of hardware charac- + teristics such as endianness. This means they can be bundled with an + application that runs in different environments, to ensure consistent + behaviour. + + +USING EBCDIC CODE + + PCRE2 assumes by default that it will run in an environment where the + character code is ASCII or Unicode, which is a superset of ASCII. This + is the case for most computer operating systems. PCRE2 can, however, be + compiled to run in an 8-bit EBCDIC environment by adding + + --enable-ebcdic --disable-unicode + + to the configure command. This setting implies --enable-rebuild-charta- + bles. You should only use it if you know that you are in an EBCDIC en- + vironment (for example, an IBM mainframe operating system). + + It is not possible to support both EBCDIC and UTF-8 codes in the same + version of the library. Consequently, --enable-unicode and --enable- + ebcdic are mutually exclusive. + + The EBCDIC character that corresponds to an ASCII LF is assumed to have + the value 0x15 by default. However, in some EBCDIC environments, 0x25 + is used. In such an environment you should use + + --enable-ebcdic-nl25 + + as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR + has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and + 0x25 is not chosen as LF is made to correspond to the Unicode NEL char- + acter (which, in Unicode, is 0x85). + + The options that select newline behaviour, such as --enable-newline-is- + cr, and equivalent run-time options, refer to these character values in + an EBCDIC environment. + + +PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS + + By default pcre2grep supports the use of callouts with string arguments + within the patterns it is matching. There are two kinds: one that gen- + erates output using local code, and another that calls an external pro- + gram or script. If --disable-pcre2grep-callout-fork is added to the + configure command, only the first kind of callout is supported; if + --disable-pcre2grep-callout is used, all callouts are completely ig- + nored. For more details of pcre2grep callouts, see the pcre2grep docu- + mentation. + + +PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT + + By default, pcre2grep reads all files as plain text. You can build it + so that it recognizes files whose names end in .gz or .bz2, and reads + them with libz or libbz2, respectively, by adding one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + to the configure command. These options naturally require that the rel- + evant libraries are installed on your system. Configuration will fail + if they are not. + + +PCRE2GREP BUFFER SIZE + + pcre2grep uses an internal buffer to hold a "window" on the file it is + scanning, in order to be able to output "before" and "after" lines when + it finds a match. The default starting size of the buffer is 20KiB. The + buffer itself is three times this size, but because of the way it is + used for holding "before" lines, the longest line that is guaranteed to + be processable is the notional buffer size. If a longer line is encoun- + tered, pcre2grep automatically expands the buffer, up to a specified + maximum size, whose default is 1MiB or the starting size, whichever is + the larger. You can change the default parameter values by adding, for + example, + + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 + + to the configure command. The caller of pcre2grep can override these + values by using --buffer-size and --max-buffer-size on the command + line. + + +PCRE2TEST OPTION FOR LIBREADLINE SUPPORT + + If you add one of + + --enable-pcre2test-libreadline + --enable-pcre2test-libedit + + to the configure command, pcre2test is linked with the libreadline or- + libedit library, respectively, and when its input is from a terminal, + it reads it using the readline() function. This provides line-editing + and history facilities. Note that libreadline is GPL-licensed, so if + you distribute a binary of pcre2test linked in this way, there may be + licensing issues. These can be avoided by linking instead with libedit, + which has a BSD licence. + + Setting --enable-pcre2test-libreadline causes the -lreadline option to + be added to the pcre2test build. In many operating environments with a + system-installed readline library this is sufficient. However, in some + environments (e.g. if an unmodified distribution version of readline is + in use), some extra configuration may be necessary. The INSTALL file + for libreadline says this: + + "Readline uses the termcap functions, but does not link with + the termcap or curses library itself, allowing applications + which link with readline the to choose an appropriate library." + + If your environment has not been set up so that an appropriate library + is automatically included, you may need to add something like + + LIBS="-ncurses" + + immediately before the configure command. + + +INCLUDING DEBUGGING CODE + + If you add + + --enable-debug + + to the configure command, additional debugging code is included in the + build. This feature is intended for use by the PCRE2 maintainers. + + +DEBUGGING WITH VALGRIND SUPPORT + + If you add + + --enable-valgrind + + to the configure command, PCRE2 will use valgrind annotations to mark + certain memory regions as unaddressable. This allows it to detect in- + valid memory accesses, and is mostly useful for debugging PCRE2 itself. + + +CODE COVERAGE REPORTING + + If your C compiler is gcc, you can build a version of PCRE2 that can + generate a code coverage report for its test suite. To enable this, you + must install lcov version 1.6 or above. Then specify + + --enable-coverage + + to the configure command and build PCRE2 in the usual way. + + Note that using ccache (a caching C compiler) is incompatible with code + coverage reporting. If you have configured ccache to run automatically + on your system, you must set the environment variable + + CCACHE_DISABLE=1 + + before running make to build PCRE2, so that ccache is not used. + + When --enable-coverage is used, the following addition targets are + added to the Makefile: + + make coverage + + This creates a fresh coverage report for the PCRE2 test suite. It is + equivalent to running "make coverage-reset", "make coverage-baseline", + "make check", and then "make coverage-report". + + make coverage-reset + + This zeroes the coverage counters, but does nothing else. + + make coverage-baseline + + This captures baseline coverage information. + + make coverage-report + + This creates the coverage report. + + make coverage-clean-report + + This removes the generated coverage report without cleaning the cover- + age data itself. + + make coverage-clean-data + + This removes the captured coverage data without removing the coverage + files created at compile time (*.gcno). + + make coverage-clean + + This cleans all coverage data including the generated coverage report. + For more information about code coverage, see the gcov and lcov docu- + mentation. + + +DISABLING THE Z AND T FORMATTING MODIFIERS + + The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers + in environments other than old versions of Microsoft Visual Studio when + __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating support for C99). However, there is at least one + environment that claims to be C99 but does not support these modifiers. + If + + --disable-percent-zt + + is specified, no use is made of the z or t modifiers. Instead of %td or + %zu, a suitable format is used depending in the size of long for the + platform. + + +SUPPORT FOR FUZZERS + + There is a special option for use by people who want to run fuzzing + tests on PCRE2: + + --enable-fuzz-support + + At present this applies only to the 8-bit library. If set, it causes an + extra library called libpcre2-fuzzsupport.a to be built, but not in- + stalled. This contains a single function called LLVMFuzzerTestOneIn- + put() whose arguments are a pointer to a string and the length of the + string. When called, this function tries to compile the string as a + pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the + string. + + Setting --enable-fuzz-support also causes a binary called pcre2fuz- + zcheck to be created. This is normally run under valgrind or used when + PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing + function and outputs information about what it is doing. The input + strings are specified by arguments: if an argument starts with "=" the + rest of it is a literal input string. Otherwise, it is assumed to be a + file name, and the contents of the file are the test string. + + +OBSOLETE OPTION + + In versions of PCRE2 prior to 10.30, there were two ways of handling + backtracking in the pcre2_match() function. The default was to use the + system stack, but if + + --disable-stack-for-recursion + + was set, memory on the heap was used. From release 10.30 onwards this + has changed (the stack is no longer used) and this option now does + nothing except give a warning. + + +SEE ALSO + + pcre2api(3), pcre2-config(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 15 April 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.44 15 April 2024 PCRE2BUILD(3) +------------------------------------------------------------------------------ + + + +PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SYNOPSIS + + #include + + int (*pcre2_callout)(pcre2_callout_block *, void *); + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + +DESCRIPTION + + PCRE2 provides a feature called "callout", which is a means of tem- + porarily passing control to the caller of PCRE2 in the middle of pat- + tern matching. The caller of PCRE2 provides an external function by + putting its entry point in a match context (see pcre2_set_callout() in + the pcre2api documentation). + + When using the pcre2_substitute() function, an additional callout fea- + ture is available. This does a callout after each change to the subject + string and is described in the pcre2api documentation; the rest of this + document is concerned with callouts during pattern matching. + + Within a regular expression, (?C) indicates a point at which the + external function is to be called. Different callout points can be + identified by putting a number less than 256 after the letter C. The + default value is zero. Alternatively, the argument may be a delimited + string. The starting delimiter must be one of ` ' " ^ % # $ { and the + ending delimiter is the same as the start, except for {, where the end- + ing delimiter is }. If the ending delimiter is needed within the + string, it must be doubled. For example, this pattern has two callout + points: + + (?C1)abc(?C"some ""arbitrary"" text")def + + If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, + PCRE2 automatically inserts callouts, all with number 255, before each + item in the pattern except for immediately before or after an explicit + callout. For example, if PCRE2_AUTO_CALLOUT is used with the pattern + + A(?C3)B + + it is processed as if it were + + (?C255)A(?C3)B(?C255) + + Here is a more complicated example: + + A(\d{2}|--) + + With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were + + (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) + + Notice that there is a callout before and after each parenthesis and + alternation bar. If the pattern contains a conditional group whose con- + dition is an assertion, an automatic callout is inserted immediately + before the condition. Such a callout may also be inserted explicitly, + for example: + + (?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de) + + This applies only to assertion conditions (because they are themselves + independent groups). + + Callouts can be useful for tracking the progress of pattern matching. + The pcre2test program has a pattern qualifier (/auto_callout) that sets + automatic callouts. When any callouts are present, the output from + pcre2test indicates how the pattern is being matched. This is useful + information when you are trying to optimize the performance of a par- + ticular pattern. + + +MISSING CALLOUTS + + You should be aware that, because of optimizations in the way PCRE2 + compiles and matches patterns, callouts sometimes do not happen exactly + as you might expect. + + Auto-possessification + + At compile time, PCRE2 "auto-possessifies" repeated items when it knows + that what follows cannot be part of the repeat. For example, a+[bc] is + compiled as if it were a++[bc]. The pcre2test output when this pattern + is compiled with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied + to the string "aaaa" is: + + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + No match + + This indicates that when matching [bc] fails, there is no backtracking + into a+ (because it is being treated as a++) and therefore the callouts + that would be taken for the backtracks do not occur. You can disable + the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to + pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In + this case, the output changes to this: + + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^^ [bc] + No match + + This time, when matching [bc] fails, the matcher backtracks into a+ and + tries again, repeatedly, until a+ itself fails. + + Automatic .* anchoring + + By default, an optimization is applied when .* is the first significant + item in a pattern. If PCRE2_DOTALL is set, so that the dot can match + any character, the pattern is automatically anchored. If PCRE2_DOTALL + is not set, a match can start only after an internal newline or at the + beginning of the subject, and pcre2_compile() remembers this. If a pat- + tern has more than one top-level branch, automatic anchoring occurs if + all branches are anchorable. + + This optimization is disabled, however, if .* is in an atomic group or + if there is a backreference to the capture group in which it appears. + It is also disabled if the pattern contains (*PRUNE) or (*SKIP). How- + ever, the presence of callouts does not affect it. + + For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT + and applied to the string "aa", the pcre2test output is: + + --->aa + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + No match + + This shows that all match attempts start at the beginning of the sub- + ject. In other words, the pattern is anchored. You can disable this op- + timization by passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or + starting the pattern with (*NO_DOTSTAR_ANCHOR). In this case, the out- + put changes to: + + --->aa + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^^ \d + +2 ^ \d + No match + + This shows more match attempts, starting at the second subject charac- + ter. Another optimization, described in the next section, means that + there is no subsequent attempt to match with an empty subject. + + Other optimizations + + Other optimizations that provide fast "no match" results also affect + callouts. For example, if the pattern is + + ab(?C4)cd + + PCRE2 knows that any matching string must contain the letter "d". If + the subject string is "abyz", the lack of "d" means that matching + doesn't ever start, and the callout is never reached. However, with + "abyd", though the result is still no match, the callout is obeyed. + + For most patterns PCRE2 also knows the minimum length of a matching + string, and will immediately give a "no match" return without actually + running a match if the subject is not long enough, or, for unanchored + patterns, if it has been scanned far enough. + + You can disable these optimizations by passing the PCRE2_NO_START_OPTI- + MIZE option to pcre2_compile(), or by starting the pattern with + (*NO_START_OPT). This slows down the matching process, but does ensure + that callouts such as the example above are obeyed. + + +THE CALLOUT INTERFACE + + During matching, when PCRE2 reaches a callout point, if an external + function is provided in the match context, it is called. This applies + to both normal, DFA, and JIT matching. The first argument to the call- + out function is a pointer to a pcre2_callout block. The second argument + is the void * callout data that was supplied when the callout was set + up by calling pcre2_set_callout() (see the pcre2api documentation). The + callout block structure contains the following fields, not necessarily + in this order: + + uint32_t version; + uint32_t callout_number; + uint32_t capture_top; + uint32_t capture_last; + uint32_t callout_flags; + PCRE2_SIZE *offset_vector; + PCRE2_SPTR mark; + PCRE2_SPTR subject; + PCRE2_SIZE subject_length; + PCRE2_SIZE start_match; + PCRE2_SIZE current_position; + PCRE2_SIZE pattern_position; + PCRE2_SIZE next_item_length; + PCRE2_SIZE callout_string_offset; + PCRE2_SIZE callout_string_length; + PCRE2_SPTR callout_string; + + The version field contains the version number of the block format. The + current version is 2; the three callout string fields were added for + version 1, and the callout_flags field for version 2. If you are writ- + ing an application that might use an earlier release of PCRE2, you + should check the version number before accessing any of these fields. + The version number will increase in future if more fields are added, + but the intention is never to remove any of the existing fields. + + Fields for numerical callouts + + For a numerical callout, callout_string is NULL, and callout_number + contains the number of the callout, in the range 0-255. This is the + number that follows (?C for callouts that part of the pattern; it is + 255 for automatically generated callouts. + + Fields for string callouts + + For callouts with string arguments, callout_number is always zero, and + callout_string points to the string that is contained within the com- + piled pattern. Its length is given by callout_string_length. Duplicated + ending delimiters that were present in the original pattern string have + been turned into single characters, but there is no other processing of + the callout string argument. An additional code unit containing binary + zero is present after the string, but is not included in the length. + The delimiter that was used to start the string is also stored within + the pattern, immediately before the string itself. You can access this + delimiter as callout_string[-1] if you need it. + + The callout_string_offset field is the code unit offset to the start of + the callout argument string within the original pattern string. This is + provided for the benefit of applications such as script languages that + might need to report errors in the callout string within the pattern. + + Fields for all callouts + + The remaining fields in the callout block are the same for both kinds + of callout. + + The offset_vector field is a pointer to a vector of capturing offsets + (the "ovector"). You may read the elements in this vector, but you must + not change any of them. + + For calls to pcre2_match(), the offset_vector field is not (since re- + lease 10.30) a pointer to the actual ovector that was passed to the + matching function in the match data block. Instead it points to an in- + ternal ovector of a size large enough to hold all possible captured + substrings in the pattern. Note that whenever a recursion or subroutine + call within a pattern completes, the capturing state is reset to what + it was before. + + The capture_last field contains the number of the most recently cap- + tured substring, and the capture_top field contains one more than the + number of the highest numbered captured substring so far. If no sub- + strings have yet been captured, the value of capture_last is 0 and the + value of capture_top is 1. The values of these fields do not always + differ by one; for example, when the callout in the pattern + ((a)(b))(?C2) is taken, capture_last is 1 but capture_top is 4. + + The contents of ovector[2] to ovector[*2-1] can be in- + spected in order to extract substrings that have been matched so far, + in the same way as extracting substrings after a match has completed. + The values in ovector[0] and ovector[1] are always PCRE2_UNSET because + the match is by definition not complete. Substrings that have not been + captured but whose numbers are less than capture_top also have both of + their ovector slots set to PCRE2_UNSET. + + For DFA matching, the offset_vector field points to the ovector that + was passed to the matching function in the match data block for call- + outs at the top level, but to an internal ovector during the processing + of pattern recursions, lookarounds, and atomic groups. However, these + ovectors hold no useful information because pcre2_dfa_match() does not + support substring capturing. The value of capture_top is always 1 and + the value of capture_last is always 0 for DFA matching. + + The subject and subject_length fields contain copies of the values that + were passed to the matching function. + + The start_match field normally contains the offset within the subject + at which the current match attempt started. However, if the escape se- + quence \K has been encountered, this value is changed to reflect the + modified starting point. If the pattern is not anchored, the callout + function may be called several times from the same point in the pattern + for different starting points in the subject. + + The current_position field contains the offset within the subject of + the current match pointer. + + The pattern_position field contains the offset in the pattern string to + the next item to be matched. + + The next_item_length field contains the length of the next item to be + processed in the pattern string. When the callout is at the end of the + pattern, the length is zero. When the callout precedes an opening + parenthesis, the length includes meta characters that follow the paren- + thesis. For example, in a callout before an assertion such as (?=ab) + the length is 3. For an alternation bar or a closing parenthesis, the + length is one, unless a closing parenthesis is followed by a quanti- + fier, in which case its length is included. (This changed in release + 10.23. In earlier releases, before an opening parenthesis the length + was that of the entire group, and before an alternation bar or a clos- + ing parenthesis the length was zero.) + + The pattern_position and next_item_length fields are intended to help + in distinguishing between different automatic callouts, which all have + the same callout number. However, they are set for all callouts, and + are used by pcre2test to show the next item to be matched when display- + ing callout information. + + In callouts from pcre2_match() the mark field contains a pointer to the + zero-terminated name of the most recently passed (*MARK), (*PRUNE), or + (*THEN) item in the match, or NULL if no such items have been passed. + Instances of (*PRUNE) or (*THEN) without a name do not obliterate a + previous (*MARK). In callouts from the DFA matching function this field + always contains NULL. + + The callout_flags field is always zero in callouts from + pcre2_dfa_match() or when JIT is being used. When pcre2_match() without + JIT is used, the following bits may be set: + + PCRE2_CALLOUT_STARTMATCH + + This is set for the first callout after the start of matching for each + new starting position in the subject. + + PCRE2_CALLOUT_BACKTRACK + + This is set if there has been a matching backtrack since the previous + callout, or since the start of matching if this is the first callout + from a pcre2_match() run. + + Both bits are set when a backtrack has caused a "bumpalong" to a new + starting position in the subject. Output from pcre2test does not indi- + cate the presence of these bits unless the callout_extra modifier is + set. + + The information in the callout_flags field is provided so that applica- + tions can track and tell their users how matching with backtracking is + done. This can be useful when trying to optimize patterns, or just to + understand how PCRE2 works. There is no support in pcre2_dfa_match() + because there is no backtracking in DFA matching, and there is no sup- + port in JIT because JIT is all about maximimizing matching performance. + In both these cases the callout_flags field is always zero. + + +RETURN VALUES FROM CALLOUTS + + The external callout function returns an integer to PCRE2. If the value + is zero, matching proceeds as normal. If the value is greater than + zero, matching fails at the current point, but the testing of other + matching possibilities goes ahead, just as if a lookahead assertion had + failed. If the value is less than zero, the match is abandoned, and the + matching function returns the negative value. + + Negative values should normally be chosen from the set of PCRE2_ER- + ROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a standard + "no match" failure. The error number PCRE2_ERROR_CALLOUT is reserved + for use by callout functions; it will never be used by PCRE2 itself. + + +CALLOUT ENUMERATION + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + A script language that supports the use of string arguments in callouts + might like to scan all the callouts in a pattern before running the + match. This can be done by calling pcre2_callout_enumerate(). The first + argument is a pointer to a compiled pattern, the second points to a + callback function, and the third is arbitrary user data. The callback + function is called for every callout in the pattern in the order in + which they appear. Its first argument is a pointer to a callout enumer- + ation block, and its second argument is the user_data value that was + passed to pcre2_callout_enumerate(). The data block contains the fol- + lowing fields: + + version Block version number + pattern_position Offset to next item in pattern + next_item_length Length of next item in pattern + callout_number Number for numbered callouts + callout_string_offset Offset to string within pattern + callout_string_length Length of callout string + callout_string Points to callout string or is NULL + + The version number is currently 0. It will increase if new fields are + ever added to the block. The remaining fields are the same as their + namesakes in the pcre2_callout block that is used for callouts during + matching, as described above. + + Note that the value of pattern_position is unique for each callout. + However, if a callout occurs inside a group that is quantified with a + non-zero minimum or a fixed maximum, the group is replicated inside the + compiled pattern. For example, a pattern such as /(a){2}/ is compiled + as if it were /(a)(a)/. This means that the callout will be enumerated + more than once, but with the same value for pattern_position in each + case. + + The callback function should normally return zero. If it returns a non- + zero value, scanning the pattern stops, and that value is returned from + pcre2_callout_enumerate(). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 19 January 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.43 19 January 2024 PCRE2CALLOUT(3) +------------------------------------------------------------------------------ + + + +PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +DIFFERENCES BETWEEN PCRE2 AND PERL + + This document describes some of the known differences in the ways that + PCRE2 and Perl handle regular expressions. The differences described + here are with respect to Perl version 5.38.0, but as both Perl and + PCRE2 are continually changing, the information may at times be out of + date. + + 1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, + the behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' + matches the next character unless it is the start of a newline se- + quence. This means that, if the newline setting is CR, CRLF, or NUL, + '.' will match the code point LF (0x0A) in ASCII/Unicode environments, + and NL (either 0x15 or 0x25) when using EBCDIC. In Perl, '.' appears + never to match LF, even when 0x0A is not a newline indicator. + + 2. PCRE2 has only a subset of Perl's Unicode support. Details of what + it does have are given in the pcre2unicode page. + + 3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized asser- + tions, but they do not mean what you might think. For example, (?!a){3} + does not assert that the next three characters are not "a". It just as- + serts that the next character is not "a" three times (in principle; + PCRE2 optimizes this to run the assertion just once). Perl allows some + repeat quantifiers on other assertions, for example, \b* , but these do + not seem to have any use. PCRE2 does not allow any kind of quantifier + on non-lookaround assertions. + + 4. If a braced quantifier such as {1,2} appears where there is nothing + to repeat (for example, at the start of a branch), PCRE2 raises an er- + ror whereas Perl treats the quantifier characters as literal. + + 5. Capture groups that occur inside negative lookaround assertions are + counted, but their entries in the offsets vector are set only when a + negative assertion is a condition that has a matching branch (that is, + the condition is false). Perl may set such capture groups in other + circumstances. + + 6. The following Perl escape sequences are not supported: \F, \l, \L, + \u, \U, and \N when followed by a character name. \N on its own, match- + ing a non-newline character, and \N{U+dd..}, matching a Unicode code + point, are supported. The escapes that modify the case of following + letters are implemented by Perl's general string-handling and are not + part of its pattern matching engine. If any of these are encountered by + PCRE2, an error is generated by default. However, if either of the + PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are + interpreted as ECMAScript interprets them. + + 7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 + is built with Unicode support (the default). The properties that can be + tested with \p and \P are limited to the general category properties + such as Lu and Nd, the derived properties Any and LC (synonym L&), + script names such as Greek or Han, Bidi_Class, Bidi_Control, and a few + binary properties. Both PCRE2 and Perl support the Cs (surrogate) prop- + erty, but in PCRE2 its use is limited. See the pcre2pattern documenta- + tion for details. The long synonyms for property names that Perl sup- + ports (such as \p{Letter}) are not supported by PCRE2, nor is it per- + mitted to prefix any of these properties with "Is". + + 8. PCRE2 supports the \Q...\E escape for quoting substrings. Characters + in between are treated as literals. However, this is slightly different + from Perl in that $ and @ are also handled as literals inside the + quotes. In Perl, they cause variable interpolation (PCRE2 does not have + variables). Also, Perl does "double-quotish backslash interpolation" on + any backslashes between \Q and \E which, its documentation says, "may + lead to confusing results". PCRE2 treats a backslash between \Q and \E + just like any other character. Note the following examples: + + Pattern PCRE2 matches Perl matches + + \Qabc$xyz\E abc$xyz abc followed by the + contents of $xyz + \Qabc\$xyz\E abc\$xyz abc\$xyz + \Qabc\E\$\Qxyz\E abc$xyz abc$xyz + \QA\B\E A\B A\B + \Q\\E \ \\E + + The \Q...\E sequence is recognized both inside and outside character + classes by both PCRE2 and Perl. + + 9. Fairly obviously, PCRE2 does not support the (?{code}) and + (??{code}) constructions. However, PCRE2 does have a "callout" feature, + which allows an external function to be called during pattern matching. + See the pcre2callout documentation for details. + + 10. Subroutine calls (whether recursive or not) were treated as atomic + groups up to PCRE2 release 10.23, but from release 10.30 this changed, + and backtracking into subroutine calls is now supported, as in Perl. + + 11. In PCRE2, if any of the backtracking control verbs are used in a + group that is called as a subroutine (whether or not recursively), + their effect is confined to that group; it does not extend to the sur- + rounding pattern. This is not always the case in Perl. In particular, + if (*THEN) is present in a group that is called as a subroutine, its + action is limited to that group, even if the group does not contain any + | characters. Note that such groups are processed as anchored at the + point where they are tested. + + 12. If a pattern contains more than one backtracking control verb, the + first one that is backtracked onto acts. For example, in the pattern + A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure + in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases + it is the same as PCRE2, but there are cases where it differs. + + 13. There are some differences that are concerned with the settings of + captured strings when part of a pattern is repeated. For example, + matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- + set, but in PCRE2 it is set to "b". + + 14. PCRE2's handling of duplicate capture group numbers and names is + not as general as Perl's. This is a consequence of the fact the PCRE2 + works internally just with numbers, using an external table to trans- + late between numbers and names. In particular, a pattern such as + (?|(?A)|(?B)), where the two capture groups have the same number + but different names, is not supported, and causes an error at compile + time. If it were allowed, it would not be possible to distinguish which + group matched, because both names map to capture group number 1. To + avoid this confusing situation, an error is given at compile time. + + 15. Perl used to recognize comments in some places that PCRE2 does not, + for example, between the ( and ? at the start of a group. If the /x + modifier is set, Perl allowed white space between ( and ? though the + latest Perls give an error (for a while it was just deprecated). There + may still be some cases where Perl behaves differently. + + 16. Perl, when in warning mode, gives warnings for character classes + such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- + als. PCRE2 has no warning features, so it gives an error in these cases + because they are almost certainly user mistakes. + + 17. In PCRE2, the upper/lower case character properties Lu and Ll are + not affected when case-independent matching is specified. For example, + \p{Lu} always matches an upper case letter. I think Perl has changed in + this respect; in the release at the time of writing (5.38), \p{Lu} and + \p{Ll} match all letters, regardless of case, when case independence is + specified. + + 18. From release 5.32.0, Perl locks out the use of \K in lookaround as- + sertions. From release 10.38 PCRE2 does the same by default. However, + there is an option for re-enabling the previous behaviour. When this + option is set, \K is acted on when it occurs in positive assertions, + but is ignored in negative assertions. + + 19. PCRE2 provides some extensions to the Perl regular expression fa- + cilities. Perl 5.10 included new features that were not in earlier + versions of Perl, some of which (such as named parentheses) were in + PCRE2 for some time before. This list is with respect to Perl 5.38: + + (a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the + $ meta-character matches only at the very end of the string. + + (b) A backslash followed by a letter with no special meaning is + faulted. (Perl can be made to issue a warning.) + + (c) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- + fiers is inverted, that is, by default they are not greedy, but if fol- + lowed by a question mark they are. + + (d) PCRE2_ANCHORED can be used at matching time to force a pattern to + be tried only at the first matching position in the subject string. + + (e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and + PCRE2_NOTEMPTY_ATSTART options have no Perl equivalents. + + (f) The \R escape sequence can be restricted to match only CR, LF, or + CRLF by the PCRE2_BSR_ANYCRLF option. + + (g) The callout facility is PCRE2-specific. Perl supports codeblocks + and variable interpolation, but not general hooks on every match. + + (h) The partial matching facility is PCRE2-specific. + + (i) The alternative matching function (pcre2_dfa_match() matches in a + different way and is not Perl-compatible. + + (j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) + at the start of a pattern. These set overall options that cannot be + changed within the pattern. + + (k) PCRE2 supports non-atomic positive lookaround assertions. This is + an extension to the lookaround facilities. The default, Perl-compatible + lookarounds are atomic. + + (l) There are three syntactical items in patterns that can refer to a + capturing group by number: back references such as \g{2}, subroutine + calls such as (?3), and condition references such as (?(4)...). PCRE2 + supports relative group numbers such as +2 and -4 in all three cases. + Perl supports both plus and minus for subroutine calls, but only minus + for back references, and no relative numbering at all for conditions. + + 20. Perl has different limits than PCRE2. See the pcre2limit documenta- + tion for details. Perl went with 5.10 from recursion to iteration keep- + ing the intermediate matches on the heap, which is ~10% slower but does + not fall into any stack-overflow limit. PCRE2 made a similar change at + release 10.30, and also has many build-time and run-time customizable + limits. + + 21. Unlike Perl, PCRE2 doesn't have character set modifiers and spe- + cially no way to set characters by context just like Perl's "/d". A + regular expression using PCRE2_UTF and PCRE2_UCP will use similar rules + to Perl's "/u"; something closer to "/a" could be selected by adding + other PCRE2_EXTRA_ASCII* options on top. + + 22. Some recursive patterns that Perl diagnoses as infinite recursions + can be handled by PCRE2, either by the interpreter or the JIT. An exam- + ple is /(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number + of repeated "abcd" substrings at the end of the subject. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 30 November 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.43 30 November 2023 PCRE2COMPAT(3) +------------------------------------------------------------------------------ + + + +PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 JUST-IN-TIME COMPILER SUPPORT + + Just-in-time compiling is a heavyweight optimization that can greatly + speed up pattern matching. However, it comes at the cost of extra pro- + cessing before the match is performed, so it is of most benefit when + the same pattern is going to be matched many times. This does not nec- + essarily mean many calls of a matching function; if the pattern is not + anchored, matching attempts may take place many times at various posi- + tions in the subject, even for a single call. Therefore, if the subject + string is very long, it may still pay to use JIT even for one-off + matches. JIT support is available for all of the 8-bit, 16-bit and + 32-bit PCRE2 libraries. + + JIT support applies only to the traditional Perl-compatible matching + function. It does not apply when the DFA matching function is being + used. The code for JIT support was written by Zoltan Herczeg. + + +AVAILABILITY OF JIT SUPPORT + + JIT support is an optional feature of PCRE2. The "configure" option + --enable-jit (or equivalent CMake option) must be set when PCRE2 is + built if you want to use JIT. The support is limited to the following + hardware platforms: + + ARM 32-bit (v7, and Thumb2) + ARM 64-bit + IBM s390x 64 bit + Intel x86 32-bit and 64-bit + LoongArch 64 bit + MIPS 32-bit and 64-bit + Power PC 32-bit and 64-bit + RISC-V 32-bit and 64-bit + + If --enable-jit is set on an unsupported platform, compilation fails. + + A client program can tell if JIT support is available by calling + pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if + PCRE2 was built with JIT support, and zero otherwise. However, having + the JIT code available does not guarantee that it will be used for any + particular match. One reason for this is that there are a number of op- + tions and pattern items that are not supported by JIT (see below). An- + other reason is that in some environments JIT is unable to get memory + in which to build its compiled code. The only guarantee from pcre2_con- + fig() is that if it returns zero, JIT will definitely not be used. + + A simple program does not need to check availability in order to use + JIT when possible. The API is implemented in a way that falls back to + the interpretive code if JIT is not available or cannot be used for a + given match. For programs that need the best possible performance, + there is a "fast path" API that is JIT-specific. + + +SIMPLE USE OF JIT + + To make use of the JIT support in the simplest way, all you have to do + is to call pcre2_jit_compile() after successfully compiling a pattern + with pcre2_compile(). This function has two arguments: the first is the + compiled pattern pointer that was returned by pcre2_compile(), and the + second is zero or more of the following option bits: PCRE2_JIT_COM- + PLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. + + If JIT support is not available, a call to pcre2_jit_compile() does + nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled + pattern is passed to the JIT compiler, which turns it into machine code + that executes much faster than the normal interpretive code, but yields + exactly the same results. The returned value from pcre2_jit_compile() + is zero on success, or a negative error code. + + There is a limit to the size of pattern that JIT supports, imposed by + the size of machine stack that it uses. The exact rules are not docu- + mented because they may change at any time, in particular, when new op- + timizations are introduced. If a pattern is too big, a call to + pcre2_jit_compile() returns PCRE2_ERROR_NOMEMORY. + + PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com- + plete matches. If you want to run partial matches using the PCRE2_PAR- + TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should + set one or both of the other options as well as, or instead of + PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code + for each of the three modes (normal, soft partial, hard partial). When + pcre2_match() is called, the appropriate code is run if it is avail- + able. Otherwise, the pattern is matched using interpretive code. + + You can call pcre2_jit_compile() multiple times for the same compiled + pattern. It does nothing if it has previously compiled code for any of + the option bits. For example, you can call it once with PCRE2_JIT_COM- + PLETE and (perhaps later, when you find you need partial matching) + again with PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it + will ignore PCRE2_JIT_COMPLETE and just compile code for partial match- + ing. If pcre2_jit_compile() is called with no option bits set, it imme- + diately returns zero. This is an alternative way of testing whether JIT + is available. + + At present, it is not possible to free JIT compiled code except when + the entire compiled pattern is freed by calling pcre2_code_free(). + + In some circumstances you may need to call additional functions. These + are described in the section entitled "Controlling the JIT stack" be- + low. + + There are some pcre2_match() options that are not supported by JIT, and + there are also some pattern items that JIT cannot handle. Details are + given below. In both cases, matching automatically falls back to the + interpretive code. If you want to know whether JIT was actually used + for a particular match, you should arrange for a JIT callback function + to be set up as described in the section entitled "Controlling the JIT + stack" below, even if you do not need to supply a non-default JIT + stack. Such a callback function is called whenever JIT code is about to + be obeyed. If the match-time options are not right for JIT execution, + the callback function is not obeyed. + + If the JIT compiler finds an unsupported item, no JIT data is gener- + ated. You can find out if JIT compilation was successful for a compiled + pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JITSIZE op- + tion. A non-zero result means that JIT compilation was successful. A + result of 0 means that JIT support is not available, or the pattern was + not processed by pcre2_jit_compile(), or the JIT compiler was not able + to handle the pattern. Successful JIT compilation does not, however, + guarantee the use of JIT at match time because there are some match + time options that are not supported by JIT. + + +MATCHING SUBJECTS CONTAINING INVALID UTF + + When a pattern is compiled with the PCRE2_UTF option, subject strings + are normally expected to be a valid sequence of UTF code units. By de- + fault, this is checked at the start of matching and an error is gener- + ated if invalid UTF is detected. The PCRE2_NO_UTF_CHECK option can be + passed to pcre2_match() to skip the check (for improved performance) if + you are sure that a subject string is valid. If this option is used + with an invalid string, the result is undefined. The calling program + may crash or loop or otherwise misbehave. + + However, a way of running matches on strings that may contain invalid + UTF sequences is available. Calling pcre2_compile() with the + PCRE2_MATCH_INVALID_UTF option has two effects: it tells the inter- + preter in pcre2_match() to support invalid UTF, and, if pcre2_jit_com- + pile() is subsequently called, the compiled JIT code also supports in- + valid UTF. Details of how this support works, in both the JIT and the + interpretive cases, is given in the pcre2unicode documentation. + + There is also an obsolete option for pcre2_jit_compile() called + PCRE2_JIT_INVALID_UTF, which currently exists only for backward compat- + ibility. It is superseded by the pcre2_compile() option + PCRE2_MATCH_INVALID_UTF and should no longer be used. It may be removed + in future. + + +UNSUPPORTED OPTIONS AND PATTERN ITEMS + + The pcre2_match() options that are supported for JIT matching are + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, + PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and + PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options + are not supported at match time. + + If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the + use of JIT, forcing matching by the interpreter code. + + The only unsupported pattern items are \C (match a single data unit) + when running in a UTF mode, and a callout immediately before an asser- + tion condition in a conditional group. + + +RETURN VALUES FROM JIT MATCHING + + When a pattern is matched using JIT, the return values are the same as + those given by the interpretive pcre2_match() code, with the addition + of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the + memory used for the JIT stack was insufficient. See "Controlling the + JIT stack" below for a discussion of JIT stack usage. + + The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if + searching a very large pattern tree goes on for too long, as it is in + the same circumstance when JIT is not used, but the details of exactly + what is counted are not the same. The PCRE2_ERROR_DEPTHLIMIT error code + is never returned when JIT matching is used. + + +CONTROLLING THE JIT STACK + + When the compiled JIT code runs, it needs a block of memory to use as a + stack. By default, it uses 32KiB on the machine stack. However, some + large or complicated patterns need more than this. The error PCRE2_ER- + ROR_JIT_STACKLIMIT is given when there is not enough stack. Three func- + tions are provided for managing blocks of memory for use as JIT stacks. + There is further discussion about the use of JIT stacks in the section + entitled "JIT stack FAQ" below. + + The pcre2_jit_stack_create() function creates a JIT stack. Its argu- + ments are a starting size, a maximum size, and a general context (for + memory allocation functions, or NULL for standard memory allocation). + It returns a pointer to an opaque structure of type pcre2_jit_stack, or + NULL if there is an error. The pcre2_jit_stack_free() function is used + to free a stack that is no longer needed. If its argument is NULL, this + function returns immediately, without doing anything. (For the techni- + cally minded: the address space is allocated by mmap or VirtualAlloc.) + A maximum stack size of 512KiB to 1MiB should be more than enough for + any pattern. + + The pcre2_jit_stack_assign() function specifies which stack JIT code + should use. Its arguments are as follows: + + pcre2_match_context *mcontext + pcre2_jit_callback callback + void *data + + The first argument is a pointer to a match context. When this is subse- + quently passed to a matching function, its information determines which + JIT stack is used. If this argument is NULL, the function returns imme- + diately, without doing anything. There are three cases for the values + of the other two options: + + (1) If callback is NULL and data is NULL, an internal 32KiB block + on the machine stack is used. This is the default when a match + context is created. + + (2) If callback is NULL and data is not NULL, data must be + a pointer to a valid JIT stack, the result of calling + pcre2_jit_stack_create(). + + (3) If callback is not NULL, it must point to a function that is + called with data as an argument at the start of matching, in + order to set up a JIT stack. If the return from the callback + function is NULL, the internal 32KiB stack is used; otherwise the + return value must be a valid JIT stack, the result of calling + pcre2_jit_stack_create(). + + A callback function is obeyed whenever JIT code is about to be run; it + is not obeyed when pcre2_match() is called with options that are incom- + patible for JIT matching. A callback function can therefore be used to + determine whether a match operation was executed by JIT or by the in- + terpreter. + + You may safely use the same JIT stack for more than one pattern (either + by assigning directly or by callback), as long as the patterns are + matched sequentially in the same thread. Currently, the only way to set + up non-sequential matches in one thread is to use callouts: if a call- + out function starts another match, that match must use a different JIT + stack to the one used for currently suspended match(es). + + In a multithread application, if you do not specify a JIT stack, or if + you assign or pass back NULL from a callback, that is thread-safe, be- + cause each thread has its own machine stack. However, if you assign or + pass back a non-NULL JIT stack, this must be a different stack for each + thread so that the application is thread-safe. + + Strictly speaking, even more is allowed. You can assign the same non- + NULL stack to a match context that is used by any number of patterns, + as long as they are not used for matching by multiple threads at the + same time. For example, you could use the same stack in all compiled + patterns, with a global mutex in the callback to wait until the stack + is available for use. However, this is an inefficient solution, and not + recommended. + + This is a suggestion for how a multithreaded program that needs to set + up non-default JIT stacks might operate: + + During thread initialization + thread_local_var = pcre2_jit_stack_create(...) + + During thread exit + pcre2_jit_stack_free(thread_local_var) + + Use a one-line callback function + return thread_local_var + + All the functions described in this section do nothing if JIT is not + available. + + +JIT STACK FAQ + + (1) Why do we need JIT stacks? + + PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack + where the local data of the current node is pushed before checking its + child nodes. Allocating real machine stack on some platforms is diffi- + cult. For example, the stack chain needs to be updated every time if we + extend the stack on PowerPC. Although it is possible, its updating + time overhead decreases performance. So we do the recursion in memory. + + (2) Why don't we simply allocate blocks of memory with malloc()? + + Modern operating systems have a nice feature: they can reserve an ad- + dress space instead of allocating memory. We can safely allocate memory + pages inside this address space, so the stack could grow without moving + memory data (this is important because of pointers). Thus we can allo- + cate 1MiB address space, and use only a single memory page (usually + 4KiB) if that is enough. However, we can still grow up to 1MiB anytime + if needed. + + (3) Who "owns" a JIT stack? + + The owner of the stack is the user program, not the JIT studied pattern + or anything else. The user program must ensure that if a stack is being + used by pcre2_match(), (that is, it is assigned to a match context that + is passed to the pattern currently running), that stack must not be + used by any other threads (to avoid overwriting the same memory area). + The best practice for multithreaded programs is to allocate a stack for + each thread, and return this stack through the JIT callback function. + + (4) When should a JIT stack be freed? + + You can free a JIT stack at any time, as long as it will not be used by + pcre2_match() again. When you assign the stack to a match context, only + a pointer is set. There is no reference counting or any other magic. + You can free compiled patterns, contexts, and stacks in any order, any- + time. Just do not call pcre2_match() with a match context pointing to + an already freed stack, as that will cause SEGFAULT. (Also, do not free + a stack currently used by pcre2_match() in another thread). You can + also replace the stack in a context at any time when it is not in use. + You should free the previous stack before assigning a replacement. + + (5) Should I allocate/free a stack every time before/after calling + pcre2_match()? + + No, because this is too costly in terms of resources. However, you + could implement some clever idea which release the stack if it is not + used in let's say two minutes. The JIT callback can help to achieve + this without keeping a list of patterns. + + (6) OK, the stack is for long term memory allocation. But what happens + if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB + kept until the stack is freed? + + Especially on embedded systems, it might be a good idea to release mem- + ory sometimes without freeing the stack. There is no API for this at + the moment. Probably a function call which returns with the currently + allocated memory for any stack and another which allows releasing mem- + ory (shrinking the stack) would be a good idea if someone needs this. + + (7) This is too much of a headache. Isn't there any better solution for + JIT stack handling? + + No, thanks to Windows. If POSIX threads were used everywhere, we could + throw out this complicated API. + + +FREEING JIT SPECULATIVE MEMORY + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + The JIT executable allocator does not free all memory when it is possi- + ble. It expects new allocations, and keeps some free memory around to + improve allocation speed. However, in low memory conditions, it might + be better to free all possible memory. You can cause this to happen by + calling pcre2_jit_free_unused_memory(). Its argument is a general con- + text, for custom memory management, or NULL for standard memory manage- + ment. + + +EXAMPLE CODE + + This is a single-threaded example that specifies a JIT stack without + using a callback. A real program should include error checking after + all the function calls. + + int rc; + pcre2_code *re; + pcre2_match_data *match_data; + pcre2_match_context *mcontext; + pcre2_jit_stack *jit_stack; + + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + &errornumber, &erroffset, NULL); + rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); + mcontext = pcre2_match_context_create(NULL); + jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(mcontext, NULL, jit_stack); + match_data = pcre2_match_data_create(re, 10); + rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext); + /* Process result */ + + pcre2_code_free(re); + pcre2_match_data_free(match_data); + pcre2_match_context_free(mcontext); + pcre2_jit_stack_free(jit_stack); + + +JIT FAST PATH API + + Because the API described above falls back to interpreted matching when + JIT is not available, it is convenient for programs that are written + for general use in many environments. However, calling JIT via + pcre2_match() does have a performance impact. Programs that are written + for use where JIT is known to be available, and which need the best + possible performance, can instead use a "fast path" API to call JIT + matching directly instead of calling pcre2_match() (obviously only for + patterns that have been successfully processed by pcre2_jit_compile()). + + The fast path function is called pcre2_jit_match(), and it takes ex- + actly the same arguments as pcre2_match(). However, the subject string + must be specified with a length; PCRE2_ZERO_TERMINATED is not sup- + ported. Unsupported option bits (for example, PCRE2_ANCHORED and + PCRE2_ENDANCHORED) are ignored, as is the PCRE2_NO_JIT option. The re- + turn values are also the same as for pcre2_match(), plus PCRE2_ER- + ROR_JIT_BADOPTION if a matching mode (partial or complete) is requested + that was not compiled. + + When you call pcre2_match(), as well as testing for invalid options, a + number of other sanity checks are performed on the arguments. For exam- + ple, if the subject pointer is NULL but the length is non-zero, an im- + mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF + subject string is tested for validity. In the interests of speed, these + checks do not happen on the JIT fast path. If invalid UTF data is + passed when PCRE2_MATCH_INVALID_UTF was not set for pcre2_compile(), + the result is undefined. The program may crash or loop or give wrong + results. In the absence of PCRE2_MATCH_INVALID_UTF you should call + pcre2_jit_match() in UTF mode only if you are sure the subject is + valid. + + Bypassing the sanity checks and the pcre2_match() wrapping can give + speedups of more than 10%. + + +SEE ALSO + + pcre2api(3), pcre2unicode(3) + + +AUTHOR + + Philip Hazel (FAQ by Zoltan Herczeg) + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 21 February 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.43 21 February 2024 PCRE2JIT(3) +------------------------------------------------------------------------------ + + + +PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SIZE AND OTHER LIMITATIONS + + There are some size limitations in PCRE2 but it is hoped that they will + never in practice be relevant. + + The maximum size of a compiled pattern is approximately 64 thousand + code units for the 8-bit and 16-bit libraries if PCRE2 is compiled with + the default internal linkage size, which is 2 bytes for these li- + braries. If you want to process regular expressions that are truly + enormous, you can compile PCRE2 with an internal linkage size of 3 or 4 + (when building the 16-bit library, 3 is rounded up to 4). See the + README file in the source distribution and the pcre2build documentation + for details. In these cases the limit is substantially larger. How- + ever, the speed of execution is slower. In the 32-bit library, the in- + ternal linkage size is always 4. + + The maximum length of a source pattern string is essentially unlimited; + it is the largest number a PCRE2_SIZE variable can hold. However, the + program that calls pcre2_compile() can specify a smaller limit. + + The maximum length (in code units) of a subject string is one less than + the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an un- + signed integer type, usually defined as size_t. Its maximum value (that + is ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-termi- + nated strings and unset offsets. + + All values in repeating quantifiers must be less than 65536. + + There are two different limits that apply to branches of lookbehind as- + sertions. If every branch in such an assertion matches a fixed number + of characters, the maximum length of any branch is 65535 characters. If + any branch matches a variable number of characters, then the maximum + matching length for every branch is limited. The default limit is set + at compile time, defaulting to 255, but can be changed by the calling + program. + + There is no limit to the number of parenthesized groups, but there can + be no more than 65535 capture groups, and there is a limit to the depth + of nesting of parenthesized subpatterns of all kinds. This is imposed + in order to limit the amount of system stack used at compile time. The + default limit can be specified when PCRE2 is built; if not, the default + is set to 250. An application can change this limit by calling + pcre2_set_parens_nest_limit() to set the limit in a compile context. + + The maximum length of name for a named capture group is 32 code units, + and the maximum number of such groups is 10000. + + The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or + (*THEN) verb is 255 code units for the 8-bit library and 65535 code + units for the 16-bit and 32-bit libraries. + + The maximum length of a string argument to a callout is the largest + number a 32-bit unsigned integer can hold. + + The maximum amount of heap memory used for matching is controlled by + the heap limit, which can be set in a pattern or in a match context. + The default is a very large number, effectively unlimited. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: August 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.43 1 August 2023 PCRE2LIMITS(3) +------------------------------------------------------------------------------ + + + +PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 MATCHING ALGORITHMS + + This document describes the two different algorithms that are available + in PCRE2 for matching a compiled regular expression against a given + subject string. The "standard" algorithm is the one provided by the + pcre2_match() function. This works in the same as Perl's matching func- + tion, and provide a Perl-compatible matching operation. The just-in- + time (JIT) optimization that is described in the pcre2jit documentation + is compatible with this function. + + An alternative algorithm is provided by the pcre2_dfa_match() function; + it operates in a different way, and is not Perl-compatible. This alter- + native has advantages and disadvantages compared with the standard al- + gorithm, and these are described below. + + When there is only one possible way in which a given subject string can + match a pattern, the two algorithms give the same answer. A difference + arises, however, when there are multiple possibilities. For example, if + the pattern + + ^<.*> + + is matched against the string + + + + there are three possible answers. The standard algorithm finds only one + of them, whereas the alternative algorithm finds all three. + + +REGULAR EXPRESSIONS AS TREES + + The set of strings that are matched by a regular expression can be rep- + resented as a tree structure. An unlimited repetition in the pattern + makes the tree of infinite size, but it is still a tree. Matching the + pattern to a given subject string (from a given starting point) can be + thought of as a search of the tree. There are two ways to search a + tree: depth-first and breadth-first, and these correspond to the two + matching algorithms provided by PCRE2. + + +THE STANDARD MATCHING ALGORITHM + + In the terminology of Jeffrey Friedl's book "Mastering Regular Expres- + sions", the standard algorithm is an "NFA algorithm". It conducts a + depth-first search of the pattern tree. That is, it proceeds along a + single path through the tree, checking that the subject matches what is + required. When there is a mismatch, the algorithm tries any alterna- + tives at the current point, and if they all fail, it backs up to the + previous branch point in the tree, and tries the next alternative + branch at that level. This often involves backing up (moving to the + left) in the subject string as well. The order in which repetition + branches are tried is controlled by the greedy or ungreedy nature of + the quantifier. + + If a leaf node is reached, a matching string has been found, and at + that point the algorithm stops. Thus, if there is more than one possi- + ble match, this algorithm returns the first one that it finds. Whether + this is the shortest, the longest, or some intermediate length depends + on the way the alternations and the greedy or ungreedy repetition quan- + tifiers are specified in the pattern. + + Because it ends up with a single path through the tree, it is rela- + tively straightforward for this algorithm to keep track of the sub- + strings that are matched by portions of the pattern in parentheses. + This provides support for capturing parentheses and backreferences. + + +THE ALTERNATIVE MATCHING ALGORITHM + + This algorithm conducts a breadth-first search of the tree. Starting + from the first matching point in the subject, it scans the subject + string from left to right, once, character by character, and as it does + this, it remembers all the paths through the tree that represent valid + matches. In Friedl's terminology, this is a kind of "DFA algorithm", + though it is not implemented as a traditional finite state machine (it + keeps multiple states active simultaneously). + + Although the general principle of this matching algorithm is that it + scans the subject string only once, without backtracking, there is one + exception: when a lookaround assertion is encountered, the characters + following or preceding the current point have to be independently in- + spected. + + The scan continues until either the end of the subject is reached, or + there are no more unterminated paths. At this point, terminated paths + represent the different matching possibilities (if there are none, the + match has failed). Thus, if there is more than one possible match, + this algorithm finds all of them, and in particular, it finds the + longest. The matches are returned in the output vector in decreasing + order of length. There is an option to stop the algorithm after the + first match (which is necessarily the shortest) is found. + + Note that the size of vector needed to contain all the results depends + on the number of simultaneous matches, not on the number of parentheses + in the pattern. Using pcre2_match_data_create_from_pattern() to create + the match data block is therefore not advisable when doing DFA match- + ing. + + Note also that all the matches that are found start at the same point + in the subject. If the pattern + + cat(er(pillar)?)? + + is matched against the string "the caterpillar catchment", the result + is the three strings "caterpillar", "cater", and "cat" that start at + the fifth character of the subject. The algorithm does not automati- + cally move on to find matches that start at later positions. + + PCRE2's "auto-possessification" optimization usually applies to charac- + ter repeats at the end of a pattern (as well as internally). For exam- + ple, the pattern "a\d+" is compiled as if it were "a\d++" because there + is no point even considering the possibility of backtracking into the + repeated digits. For DFA matching, this means that only one possible + match is found. If you really do want multiple matches in such cases, + either use an ungreedy repeat ("a\d+?") or set the PCRE2_NO_AUTO_POS- + SESS option when compiling. + + There are a number of features of PCRE2 regular expressions that are + not supported or behave differently in the alternative matching func- + tion. Those that are not supported cause an error if encountered. + + 1. Because the algorithm finds all possible matches, the greedy or un- + greedy nature of repetition quantifiers is not relevant (though it may + affect auto-possessification, as just described). During matching, + greedy and ungreedy quantifiers are treated in exactly the same way. + However, possessive quantifiers can make a difference when what follows + could also match what is quantified, for example in a pattern like + this: + + ^a++\w! + + This pattern matches "aaab!" but not "aaa!", which would be matched by + a non-possessive quantifier. Similarly, if an atomic group is present, + it is matched as if it were a standalone pattern at the current point, + and the longest match is then "locked in" for the rest of the overall + pattern. + + 2. When dealing with multiple paths through the tree simultaneously, it + is not straightforward to keep track of captured substrings for the + different matching possibilities, and PCRE2's implementation of this + algorithm does not attempt to do this. This means that no captured sub- + strings are available. + + 3. Because no substrings are captured, backreferences within the pat- + tern are not supported. + + 4. For the same reason, conditional expressions that use a backrefer- + ence as the condition or test for a specific group recursion are not + supported. + + 5. Again for the same reason, script runs are not supported. + + 6. Because many paths through the tree may be active, the \K escape se- + quence, which resets the start of the match when encountered (but may + be on some paths and not on others), is not supported. + + 7. Callouts are supported, but the value of the capture_top field is + always 1, and the value of the capture_last field is always 0. + + 8. The \C escape sequence, which (in the standard algorithm) always + matches a single code unit, even in a UTF mode, is not supported in + these modes, because the alternative algorithm moves through the sub- + ject string one character (not code unit) at a time, for all active + paths through the tree. + + 9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) + are not supported. (*FAIL) is supported, and behaves like a failing + negative assertion. + + 10. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not sup- + ported by pcre2_dfa_match(). + + +ADVANTAGES OF THE ALTERNATIVE ALGORITHM + + The main advantage of the alternative algorithm is that all possible + matches (at a single point in the subject) are automatically found, and + in particular, the longest match is found. To find more than one match + at the same point using the standard algorithm, you have to do kludgy + things with callouts. + + Partial matching is possible with this algorithm, though it has some + limitations. The pcre2partial documentation gives details of partial + matching and discusses multi-segment matching. + + +DISADVANTAGES OF THE ALTERNATIVE ALGORITHM + + The alternative algorithm suffers from a number of disadvantages: + + 1. It is substantially slower than the standard algorithm. This is + partly because it has to search for all possible matches, but is also + because it is less susceptible to optimization. + + 2. Capturing parentheses, backreferences, script runs, and matching + within invalid UTF string are not supported. + + 3. Although atomic groups are supported, their use does not provide the + performance advantage that it does for the standard algorithm. + + 4. JIT optimization is not supported. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 19 January 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.43 19 January 2024 PCRE2MATCHING(3) +------------------------------------------------------------------------------ + + + +PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) + + +NAME + PCRE2 - Perl-compatible regular expressions + + +PARTIAL MATCHING IN PCRE2 + + In normal use of PCRE2, if there is a match up to the end of a subject + string, but more characters are needed to match the entire pattern, + PCRE2_ERROR_NOMATCH is returned, just like any other failing match. + There are circumstances where it might be helpful to distinguish this + "partial match" case. + + One example is an application where the subject string is very long, + and not all available at once. The requirement here is to be able to do + the matching segment by segment, but special action is needed when a + matched substring spans the boundary between two segments. + + Another example is checking a user input string as it is typed, to en- + sure that it conforms to a required format. Invalid characters can be + immediately diagnosed and rejected, giving instant feedback. + + Partial matching is a PCRE2-specific feature; it is not Perl-compati- + ble. It is requested by setting one of the PCRE2_PARTIAL_HARD or + PCRE2_PARTIAL_SOFT options when calling a matching function. The dif- + ference between the two options is whether or not a partial match is + preferred to an alternative complete match, though the details differ + between the two types of matching function. If both options are set, + PCRE2_PARTIAL_HARD takes precedence. + + If you want to use partial matching with just-in-time optimized code, + as well as setting a partial match option for the matching function, + you must also call pcre2_jit_compile() with one or both of these op- + tions: + + PCRE2_JIT_PARTIAL_HARD + PCRE2_JIT_PARTIAL_SOFT + + PCRE2_JIT_COMPLETE should also be set if you are going to run non-par- + tial matches on the same pattern. Separate code is compiled for each + mode. If the appropriate JIT mode has not been compiled, interpretive + matching code is used. + + Setting a partial matching option disables two of PCRE2's standard op- + timization hints. PCRE2 remembers the last literal code unit in a pat- + tern, and abandons matching immediately if it is not present in the + subject string. This optimization cannot be used for a subject string + that might match only partially. PCRE2 also remembers a minimum length + of a matching string, and does not bother to run the matching function + on shorter strings. This optimization is also disabled for partial + matching. + + +REQUIREMENTS FOR A PARTIAL MATCH + + A possible partial match occurs during matching when the end of the + subject string is reached successfully, but either more characters are + needed to complete the match, or the addition of more characters might + change what is matched. + + Example 1: if the pattern is /abc/ and the subject is "ab", more char- + acters are definitely needed to complete a match. In this case both + hard and soft matching options yield a partial match. + + Example 2: if the pattern is /ab+/ and the subject is "ab", a complete + match can be found, but the addition of more characters might change + what is matched. In this case, only PCRE2_PARTIAL_HARD returns a par- + tial match; PCRE2_PARTIAL_SOFT returns the complete match. + + On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if + the next pattern item is \z, \Z, \b, \B, or $ there is always a partial + match. Otherwise, for both options, the next pattern item must be one + that inspects a character, and at least one of the following must be + true: + + (1) At least one character has already been inspected. An inspected + character need not form part of the final matched string; lookbehind + assertions and the \K escape sequence provide ways of inspecting char- + acters before the start of a matched string. + + (2) The pattern contains one or more lookbehind assertions. This condi- + tion exists in case there is a lookbehind that inspects characters be- + fore the start of the match. + + (3) There is a special case when the whole pattern can match an empty + string. When the starting point is at the end of the subject, the + empty string match is a possibility, and if PCRE2_PARTIAL_SOFT is set + and neither of the above conditions is true, it is returned. However, + because adding more characters might result in a non-empty match, + PCRE2_PARTIAL_HARD returns a partial match, which in this case means + "there is going to be a match at this point, but until some more char- + acters are added, we do not know if it will be an empty string or some- + thing longer". + + +PARTIAL MATCHING USING pcre2_match() + + When a partial matching option is set, the result of calling + pcre2_match() can be one of the following: + + A successful match + A complete match has been found, starting and ending within this sub- + ject. + + PCRE2_ERROR_NOMATCH + No match can start anywhere in this subject. + + PCRE2_ERROR_PARTIAL + Adding more characters may result in a complete match that uses one + or more characters from the end of this subject. + + When a partial match is returned, the first two elements in the ovector + point to the portion of the subject that was matched, but the values in + the rest of the ovector are undefined. The appearance of \K in the pat- + tern has no effect for a partial match. Consider this pattern: + + /abc\K123/ + + If it is matched against "456abc123xyz" the result is a complete match, + and the ovector defines the matched string as "123", because \K resets + the "start of match" point. However, if a partial match is requested + and the subject string is "456abc12", a partial match is found for the + string "abc12", because all these characters are needed for a subse- + quent re-match with additional characters. + + If there is more than one partial match, the first one that was found + provides the data that is returned. Consider this pattern: + + /123\w+X|dogY/ + + If this is matched against the subject string "abc123dog", both alter- + natives fail to match, but the end of the subject is reached during + matching, so PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 + and 9, identifying "123dog" as the first partial match. (In this exam- + ple, there are two partial matches, because "dog" on its own partially + matches the second alternative.) + + How a partial match is processed by pcre2_match() + + What happens when a partial match is identified depends on which of the + two partial matching options is set. + + If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon + as a partial match is found, without continuing to search for possible + complete matches. This option is "hard" because it prefers an earlier + partial match over a later complete match. For this reason, the assump- + tion is made that the end of the supplied subject string is not the + true end of the available data, which is why \z, \Z, \b, \B, and $ al- + ways give a partial match. + + If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but + matching continues as normal, and other alternatives in the pattern are + tried. If no complete match can be found, PCRE2_ERROR_PARTIAL is re- + turned instead of PCRE2_ERROR_NOMATCH. This option is "soft" because it + prefers a complete match over a partial match. All the various matching + items in a pattern behave as if the subject string is potentially com- + plete; \z, \Z, and $ match at the end of the subject, as normal, and + for \b and \B the end of the subject is treated as a non-alphanumeric. + + The difference between the two partial matching options can be illus- + trated by a pattern such as: + + /dog(sbody)?/ + + This matches either "dog" or "dogsbody", greedily (that is, it prefers + the longer string if possible). If it is matched against the string + "dog" with PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". + However, if PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PAR- + TIAL. On the other hand, if the pattern is made ungreedy the result is + different: + + /dog(sbody)??/ + + In this case the result is always a complete match because that is + found first, and matching never continues after finding a complete + match. It might be easier to follow this explanation by thinking of the + two patterns like this: + + /dog(sbody)?/ is the same as /dogsbody|dog/ + /dog(sbody)??/ is the same as /dog|dogsbody/ + + The second pattern will never match "dogsbody", because it will always + find the shorter match first. + + Example of partial matching using pcre2test + + The pcre2test data modifiers partial_hard (or ph) and partial_soft (or + ps) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, respectively, when + calling pcre2_match(). Here is a run of pcre2test using a pattern that + matches the whole subject in the form of a date: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 25dec3\=ph + Partial match: 23dec3 + data> 3ju\=ph + Partial match: 3ju + data> 3juj\=ph + No match + + This example gives the same results for both hard and soft partial + matching options. Here is an example where there is a difference: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 25jun04\=ps + 0: 25jun04 + 1: jun + data> 25jun04\=ph + Partial match: 25jun04 + + With PCRE2_PARTIAL_SOFT, the subject is matched completely. For + PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, + so there is only a partial match. + + +MULTI-SEGMENT MATCHING WITH pcre2_match() + + PCRE was not originally designed with multi-segment matching in mind. + However, over time, features (including partial matching) that make + multi-segment matching possible have been added. A very long string can + be searched segment by segment by calling pcre2_match() repeatedly, + with the aim of achieving the same results that would happen if the en- + tire string was available for searching all the time. Normally, the + strings that are being sought are much shorter than each individual + segment, and are in the middle of very long strings, so the pattern is + normally not anchored. + + Special logic must be implemented to handle a matched substring that + spans a segment boundary. PCRE2_PARTIAL_HARD should be used, because it + returns a partial match at the end of a segment whenever there is the + possibility of changing the match by adding more characters. The + PCRE2_NOTBOL option should also be set for all but the first segment. + + When a partial match occurs, the next segment must be added to the cur- + rent subject and the match re-run, using the startoffset argument of + pcre2_match() to begin at the point where the partial match started. + For example: + + re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/ + data> ...the date is 23ja\=ph + Partial match: 23ja + data> ...the date is 23jan19 and on that day...\=offset=15 + 0: 23jan19 + 1: jan + + Note the use of the offset modifier to start the new match where the + partial match was found. In this example, the next segment was added to + the one in which the partial match was found. This is the most + straightforward approach, typically using a memory buffer that is twice + the size of each segment. After a partial match, the first half of the + buffer is discarded, the second half is moved to the start of the + buffer, and a new segment is added before repeating the match as in the + example above. After a no match, the entire buffer can be discarded. + + If there are memory constraints, you may want to discard text that pre- + cedes a partial match before adding the next segment. Unfortunately, + this is not at present straightforward. In cases such as the above, + where the pattern does not contain any lookbehinds, it is sufficient to + retain only the partially matched substring. However, if the pattern + contains a lookbehind assertion, characters that precede the start of + the partial match may have been inspected during the matching process. + When pcre2test displays a partial match, it indicates these characters + with '<' if the allusedtext modifier is set: + + re> "(?<=123)abc" + data> xx123ab\=ph,allusedtext + Partial match: 123ab + <<< + + However, the allusedtext modifier is not available for JIT matching, + because JIT matching does not record the first (or last) consulted + characters. For this reason, this information is not available via the + API. It is therefore not possible in general to obtain the exact number + of characters that must be retained in order to get the right match re- + sult. If you cannot retain the entire segment, you must find some + heuristic way of choosing. + + If you know the approximate length of the matching substrings, you can + use that to decide how much text to retain. The only lookbehind infor- + mation that is currently available via the API is the length of the + longest individual lookbehind in a pattern, but this can be misleading + if there are nested lookbehinds. The value returned by calling + pcre2_pattern_info() with the PCRE2_INFO_MAXLOOKBEHIND option is the + maximum number of characters (not code units) that any individual look- + behind moves back when it is processed. A pattern such as + "(?<=(? /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 23ja\=dfa,ps + Partial match: 23ja + data> n05\=dfa,dfa_restart + 0: n05 + + The first call has "23ja" as the subject, and requests partial match- + ing; the second call has "n05" as the subject for the continued + (restarted) match. Notice that when the match is complete, only the + last part is shown; PCRE2 does not retain the previously partially- + matched string. It is up to the calling program to do that if it needs + to. This means that, for an unanchored pattern, if a continued match + fails, it is not possible to try again at a new starting point. All + this facility is capable of doing is continuing with the previous match + attempt. For example, consider this pattern: + + 1234|3789 + + If the first part of the subject is "ABC123", a partial match of the + first alternative is found at offset 3. There is no partial match for + the second alternative, because such a match does not start at the same + point in the subject string. Attempting to continue with the string + "7890" does not yield a match because only those alternatives that + match at one point in the subject are remembered. Depending on the ap- + plication, this may or may not be what you want. + + If you do want to allow for starting again at the next character, one + way of doing it is to retain some or all of the segment and try a new + complete match, as described for pcre2_match() above. Another possibil- + ity is to work with two buffers. If a partial match at offset n in the + first buffer is followed by "no match" when PCRE2_DFA_RESTART is used + on the second buffer, you can then try a new match starting at offset + n+1 in the first buffer. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 04 September 2019 + Copyright (c) 1997-2019 University of Cambridge. + + +PCRE2 10.34 04 September 2019 PCRE2PARTIAL(3) +------------------------------------------------------------------------------ + + + +PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 REGULAR EXPRESSION DETAILS + + The syntax and semantics of the regular expressions that are supported + by PCRE2 are described in detail below. There is a quick-reference syn- + tax summary in the pcre2syntax page. PCRE2 tries to match Perl syntax + and semantics as closely as it can. PCRE2 also supports some alterna- + tive regular expression syntax (which does not conflict with the Perl + syntax) in order to provide some compatibility with regular expressions + in Python, .NET, and Oniguruma. + + Perl's regular expressions are described in its own documentation, and + regular expressions in general are covered in a number of books, some + of which have copious examples. Jeffrey Friedl's "Mastering Regular Ex- + pressions", published by O'Reilly, covers regular expressions in great + detail. This description of PCRE2's regular expressions is intended as + reference material. + + This document discusses the regular expression patterns that are sup- + ported by PCRE2 when its main matching function, pcre2_match(), is + used. PCRE2 also has an alternative matching function, + pcre2_dfa_match(), which matches using a different algorithm that is + not Perl-compatible. Some of the features discussed below are not + available when DFA matching is used. The advantages and disadvantages + of the alternative function, and how it differs from the normal func- + tion, are discussed in the pcre2matching page. + + +SPECIAL START-OF-PATTERN ITEMS + + A number of options that can be passed to pcre2_compile() can also be + set by special items at the start of a pattern. These are not Perl-com- + patible, but are provided to make these options accessible to pattern + writers who are not able to change the program that processes the pat- + tern. Any number of these items may appear, but they must all be to- + gether right at the start of the pattern string, and the letters must + be in upper case. + + UTF support + + In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either + as single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 + can be specified for the 32-bit library, in which case it constrains + the character values to valid Unicode code points. To process UTF + strings, PCRE2 must be built to include Unicode support (which is the + default). When using UTF strings you must either call the compiling + function with one or both of the PCRE2_UTF or PCRE2_MATCH_INVALID_UTF + options, or the pattern must start with the special sequence (*UTF), + which is equivalent to setting the relevant PCRE2_UTF. How setting a + UTF mode affects pattern matching is mentioned in several places below. + There is also a summary of features in the pcre2unicode page. + + Some applications that allow their users to supply patterns may wish to + restrict them to non-UTF data for security reasons. If the + PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not al- + lowed, and its appearance in a pattern causes an error. + + Unicode property support + + Another special sequence that may appear at the start of a pattern is + (*UCP). This has the same effect as setting the PCRE2_UCP option: it + causes sequences such as \d and \w to use Unicode properties to deter- + mine character types, instead of recognizing only characters with codes + less than 256 via a lookup table. If also causes upper/lower casing op- + erations to use Unicode properties for characters with code points + greater than 127, even when UTF is not set. These behaviours can be + changed within the pattern; see the section entitled "Internal Option + Setting" below. + + Some applications that allow their users to supply patterns may wish to + restrict them for security reasons. If the PCRE2_NEVER_UCP option is + passed to pcre2_compile(), (*UCP) is not allowed, and its appearance in + a pattern causes an error. + + Locking out empty string matching + + Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same + effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option + to whichever matching function is subsequently called to match the pat- + tern. These options lock out the matching of empty strings, either en- + tirely, or only at the start of the subject. + + Disabling auto-possessification + + If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as + setting the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making + quantifiers possessive when what follows cannot match the repeated + item. For example, by default a+b is treated as a++b. For more details, + see the pcre2api documentation. + + Disabling start-up optimizations + + If a pattern starts with (*NO_START_OPT), it has the same effect as + setting the PCRE2_NO_START_OPTIMIZE option. This disables several opti- + mizations for quickly reaching "no match" results. For more details, + see the pcre2api documentation. + + Disabling automatic anchoring + + If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect + as setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimiza- + tions that apply to patterns whose top-level branches all start with .* + (match any number of arbitrary characters). For more details, see the + pcre2api documentation. + + Disabling JIT compilation + + If a pattern that starts with (*NO_JIT) is successfully compiled, an + attempt by the application to apply the JIT optimization by calling + pcre2_jit_compile() is ignored. + + Setting match resource limits + + The pcre2_match() function contains a counter that is incremented every + time it goes round its main loop. The caller of pcre2_match() can set a + limit on this counter, which therefore limits the amount of computing + resource used for a match. The maximum depth of nested backtracking can + also be limited; this indirectly restricts the amount of heap memory + that is used, but there is also an explicit memory limit that can be + set. + + These facilities are provided to catch runaway matches that are pro- + voked by patterns with huge matching trees. A common example is a pat- + tern with nested unlimited repeats applied to a long string that does + not match. When one of these limits is reached, pcre2_match() gives an + error return. The limits can also be set by items at the start of the + pattern of the form + + (*LIMIT_HEAP=d) + (*LIMIT_MATCH=d) + (*LIMIT_DEPTH=d) + + where d is any number of decimal digits. However, the value of the set- + ting must be less than the value set (or defaulted) by the caller of + pcre2_match() for it to have any effect. In other words, the pattern + writer can lower the limits set by the programmer, but not raise them. + If there is more than one setting of one of these limits, the lower + value is used. The heap limit is specified in kibibytes (units of 1024 + bytes). + + Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This + name is still recognized for backwards compatibility. + + The heap limit applies only when the pcre2_match() or pcre2_dfa_match() + interpreters are used for matching. It does not apply to JIT. The match + limit is used (but in a different way) when JIT is being used, or when + pcre2_dfa_match() is called, to limit computing resource usage by those + matching functions. The depth limit is ignored by JIT but is relevant + for DFA matching, which uses function recursion for recursions within + the pattern and for lookaround assertions and atomic groups. In this + case, the depth limit controls the depth of such recursion. + + Newline conventions + + PCRE2 supports six different conventions for indicating line breaks in + strings: a single CR (carriage return) character, a single LF (line- + feed) character, the two-character sequence CRLF, any of the three pre- + ceding, any Unicode newline sequence, or the NUL character (binary + zero). The pcre2api page has further discussion about newlines, and + shows how to set the newline convention when calling pcre2_compile(). + + It is also possible to specify a newline convention by starting a pat- + tern string with one of the following sequences: + + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences + (*NUL) the NUL character (binary zero) + + These override the default and the options given to the compiling func- + tion. For example, on a Unix system where LF is the default newline se- + quence, the pattern + + (*CR)a.b + + changes the convention to CR. That pattern matches "a\nb" because LF is + no longer a newline. If more than one of these settings is present, the + last one is used. + + The newline convention affects where the circumflex and dollar asser- + tions are true. It also affects the interpretation of the dot metachar- + acter when PCRE2_DOTALL is not set, and the behaviour of \N when not + followed by an opening brace. However, it does not affect what the \R + escape sequence matches. By default, this is any Unicode newline se- + quence, for Perl compatibility. However, this can be changed; see the + next section and the description of \R in the section entitled "Newline + sequences" below. A change of \R setting can be combined with a change + of newline convention. + + Specifying what \R matches + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. This effect can also be achieved by + starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNI- + CODE) is also recognized, corresponding to PCRE2_BSR_UNICODE. + + +EBCDIC CHARACTER CODES + + PCRE2 can be compiled to run in an environment that uses EBCDIC as its + character code instead of ASCII or Unicode (typically a mainframe sys- + tem). In the sections below, character code values are ASCII or Uni- + code; in an EBCDIC environment these characters may have different code + values, and there are no code points greater than 255. + + +CHARACTERS AND METACHARACTERS + + A regular expression is a pattern that is matched against a subject + string from left to right. Most characters stand for themselves in a + pattern, and match the corresponding characters in the subject. As a + trivial example, the pattern + + The quick brown fox + + matches a portion of a subject string that is identical to itself. When + caseless matching is specified (the PCRE2_CASELESS option or (?i) + within the pattern), letters are matched independently of case. Note + that there are two ASCII characters, K and S, that, in addition to + their lower case ASCII equivalents, are case-equivalent with Unicode + U+212A (Kelvin sign) and U+017F (long S) respectively when either + PCRE2_UTF or PCRE2_UCP is set, unless the PCRE2_EXTRA_CASELESS_RESTRICT + option is in force (either passed to pcre2_compile() or set by (?r) + within the pattern). + + The power of regular expressions comes from the ability to include wild + cards, character classes, alternatives, and repetitions in the pattern. + These are encoded in the pattern by the use of metacharacters, which do + not stand for themselves but instead are interpreted in some special + way. + + There are two different sets of metacharacters: those that are recog- + nized anywhere in the pattern except within square brackets, and those + that are recognized within square brackets. Outside square brackets, + the metacharacters are as follows: + + \ general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start group or control verb + ) end group or control verb + * 0 or more quantifier + + 1 or more quantifier; also "possessive quantifier" + ? 0 or 1 quantifier; also quantifier minimizer + { potential start of min/max quantifier + + Brace characters { and } are also used to enclose data for construc- + tions such as \g{2} or \k{name}. In almost all uses of braces, space + and/or horizontal tab characters that follow { or precede } are allowed + and are ignored. In the case of quantifiers, they may also appear be- + fore or after the comma. The exception to this is \u{...} which is an + ECMAScript compatibility feature that is recognized only when the + PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript does not ignore such + white space; it causes the item to be interpreted as literal. + + Part of a pattern that is in square brackets is called a "character + class". In a character class the only metacharacters are: + + \ general escape character + ^ negate the class, but only if the first character + - indicates character range + [ POSIX character class (if followed by POSIX syntax) + ] terminates the character class + + If a pattern is compiled with the PCRE2_EXTENDED option, most white + space in the pattern, other than in a character class, within a \Q...\E + sequence, or between a # outside a character class and the next new- + line, inclusive, are ignored. An escaping backslash can be used to in- + clude a white space or a # character as part of the pattern. If the + PCRE2_EXTENDED_MORE option is set, the same applies, but in addition + unescaped space and horizontal tab characters are ignored inside a + character class. Note: only these two characters are ignored, not the + full set of pattern white space characters that are ignored outside a + character class. Option settings can be changed within a pattern; see + the section entitled "Internal Option Setting" below. + + The following sections describe the use of each of the metacharacters. + + +BACKSLASH + + The backslash character has several uses. Firstly, if it is followed by + a character that is not a digit or a letter, it takes away any special + meaning that character may have. This use of backslash as an escape + character applies both inside and outside character classes. + + For example, if you want to match a * character, you must write \* in + the pattern. This escaping action applies whether or not the following + character would otherwise be interpreted as a metacharacter, so it is + always safe to precede a non-alphanumeric with backslash to specify + that it stands for itself. In particular, if you want to match a back- + slash, you write \\. + + Only ASCII digits and letters have any special meaning after a back- + slash. All other characters (in particular, those whose code points are + greater than 127) are treated as literals. + + If you want to treat all characters in a sequence as literals, you can + do so by putting them between \Q and \E. Note that this includes white + space even when the PCRE2_EXTENDED option is set so that most other + white space is ignored. The behaviour is different from Perl in that $ + and @ are handled as literals in \Q...\E sequences in PCRE2, whereas in + Perl, $ and @ cause variable interpolation. Also, Perl does "double- + quotish backslash interpolation" on any backslashes between \Q and \E + which, its documentation says, "may lead to confusing results". PCRE2 + treats a backslash between \Q and \E just like any other character. + Note the following examples: + + Pattern PCRE2 matches Perl matches + + \Qabc$xyz\E abc$xyz abc followed by the + contents of $xyz + \Qabc\$xyz\E abc\$xyz abc\$xyz + \Qabc\E\$\Qxyz\E abc$xyz abc$xyz + \QA\B\E A\B A\B + \Q\\E \ \\E + + The \Q...\E sequence is recognized both inside and outside character + classes. An isolated \E that is not preceded by \Q is ignored. If \Q + is not followed by \E later in the pattern, the literal interpretation + continues to the end of the pattern (that is, \E is assumed at the + end). If the isolated \Q is inside a character class, this causes an + error, because the character class is then not terminated by a closing + square bracket. + + Non-printing characters + + A second use of backslash provides a way of encoding non-printing char- + acters in patterns in a visible manner. There is no restriction on the + appearance of non-printing characters in a pattern, but when a pattern + is being prepared by text editing, it is often easier to use one of the + following escape sequences instead of the binary character it repre- + sents. In an ASCII or Unicode environment, these escapes are as fol- + lows: + + \a alarm, that is, the BEL character (hex 07) + \cx "control-x", where x is a non-control ASCII character + \e escape (hex 1B) + \f form feed (hex 0C) + \n linefeed (hex 0A) + \r carriage return (hex 0D) (but see below) + \t tab (hex 09) + \0dd character with octal code 0dd + \ddd character with octal code ddd, or backreference + \o{ddd..} character with octal code ddd.. + \xhh character with hex code hh + \x{hhh..} character with hex code hhh.. + \N{U+hhh..} character with Unicode hex code point hhh.. + + By default, after \x that is not followed by {, from zero to two hexa- + decimal digits are read (letters can be in upper or lower case). Any + number of hexadecimal digits may appear between \x{ and }. If a charac- + ter other than a hexadecimal digit appears between \x{ and }, or if + there is no terminating }, an error occurs. + + Characters whose code points are less than 256 can be defined by either + of the two syntaxes for \x or by an octal sequence. There is no differ- + ence in the way they are handled. For example, \xdc is exactly the same + as \x{dc} or \334. However, using the braced versions does make such + sequences easier to read. + + Support is available for some ECMAScript (aka JavaScript) escape se- + quences via two compile-time options. If PCRE2_ALT_BSUX is set, the se- + quence \x followed by { is not recognized. Only if \x is followed by + two hexadecimal digits is it recognized as a character escape. Other- + wise it is interpreted as a literal "x" character. In this mode, sup- + port for code points greater than 256 is provided by \u, which must be + followed by four hexadecimal digits; otherwise it is interpreted as a + literal "u" character. + + PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in ad- + dition, \u{hhh..} is recognized as the character specified by hexadeci- + mal code point. There may be any number of hexadecimal digits, but un- + like other places that also use curly brackets, spaces are not allowed + and would result in the string being interpreted as a literal. This + syntax is from ECMAScript 6. + + The \N{U+hhh..} escape sequence is recognized only when PCRE2 is oper- + ating in UTF mode. Perl also uses \N{name} to specify characters by + Unicode name; PCRE2 does not support this. Note that when \N is not + followed by an opening brace (curly bracket) it has an entirely differ- + ent meaning, matching any character that is not a newline. + + There are some legacy applications where the escape sequence \r is ex- + pected to match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option + is set, \r in a pattern is converted to \n so that it matches a LF + (linefeed) instead of a CR (carriage return) character. + + An error occurs if \c is not followed by a character whose ASCII code + point is in the range 32 to 126. The precise effect of \cx is as fol- + lows: if x is a lower case letter, it is converted to upper case. Then + bit 6 of the character (hex 40) is inverted. Thus \cA to \cZ become hex + 01 to hex 1A (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and + \c; becomes hex 7B (; is 3B). If the code unit following \c has a code + point less than 32 or greater than 126, a compile-time error occurs. + + When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. + \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values. + The \c escape is processed as specified for Perl in the perlebcdic doc- + ument. The only characters that are allowed after \c are A-Z, a-z, or + one of @, [, \, ], ^, _, or ?. Any other character provokes a compile- + time error. The sequence \c@ encodes character code 0; after \c the + letters (in either case) encode characters 1-26 (hex 01 to hex 1A); [, + \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and \c? be- + comes either 255 (hex FF) or 95 (hex 5F). + + Thus, apart from \c?, these escapes generate the same character code + values as they do in an ASCII environment, though the meanings of the + values mostly differ. For example, \cG always generates code value 7, + which is BEL in ASCII but DEL in EBCDIC. + + The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, + but because 127 is not a control character in EBCDIC, Perl makes it + generate the APC character. Unfortunately, there are several variants + of EBCDIC. In most of them the APC character has the value 255 (hex + FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If + certain other characters have POSIX-BC values, PCRE2 makes \c? generate + 95; otherwise it generates 255. + + After \0 up to two further octal digits are read. If there are fewer + than two digits, just those that are present are used. Thus the se- + quence \0\x\015 specifies two binary zeros followed by a CR character + (code value 13). Make sure you supply two digits after the initial zero + if the pattern character that follows is itself an octal digit. + + The escape \o must be followed by a sequence of octal digits, enclosed + in braces. An error occurs if this is not the case. This escape is a + recent addition to Perl; it provides way of specifying character code + points as octal numbers greater than 0777, and it also allows octal + numbers and backreferences to be unambiguously specified. + + For greater clarity and unambiguity, it is best to avoid following \ by + a digit greater than zero. Instead, use \o{...} or \x{...} to specify + numerical character code points, and \g{...} to specify backreferences. + The following paragraphs describe the old, ambiguous syntax. + + The handling of a backslash followed by a digit other than 0 is compli- + cated, and Perl has changed over time, causing PCRE2 also to change. + + Outside a character class, PCRE2 reads the digit and any following dig- + its as a decimal number. If the number is less than 10, begins with the + digit 8 or 9, or if there are at least that many previous capture + groups in the expression, the entire sequence is taken as a backrefer- + ence. A description of how this works is given later, following the + discussion of parenthesized groups. Otherwise, up to three octal dig- + its are read to form a character code. + + Inside a character class, PCRE2 handles \8 and \9 as the literal char- + acters "8" and "9", and otherwise reads up to three octal digits fol- + lowing the backslash, using them to generate a data character. Any sub- + sequent digits stand for themselves. For example, outside a character + class: + + \040 is another way of writing an ASCII space + \40 is the same, provided there are fewer than 40 + previous capture groups + \7 is always a backreference + \11 might be a backreference, or another way of + writing a tab + \011 is always a tab + \0113 is a tab followed by the character "3" + \113 might be a backreference, otherwise the + character with octal code 113 + \377 might be a backreference, otherwise + the value 255 (decimal) + \81 is always a backreference + + Note that octal values of 100 or greater that are specified using this + syntax must not be introduced by a leading zero, because no more than + three octal digits are ever read. + + Constraints on character values + + Characters that are specified using octal or hexadecimal numbers are + limited to certain values, as follows: + + 8-bit non-UTF mode no greater than 0xff + 16-bit non-UTF mode no greater than 0xffff + 32-bit non-UTF mode no greater than 0xffffffff + All UTF modes no greater than 0x10ffff and a valid code point + + Invalid Unicode code points are all those in the range 0xd800 to 0xdfff + (the so-called "surrogate" code points). The check for these can be + disabled by the caller of pcre2_compile() by setting the option + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in + UTF-8 and UTF-32 modes, because these values are not representable in + UTF-16. + + Escape sequences in character classes + + All the sequences that define a single character value can be used both + inside and outside character classes. In addition, inside a character + class, \b is interpreted as the backspace character (hex 08). + + When not followed by an opening brace, \N is not allowed in a character + class. \B, \R, and \X are not special inside a character class. Like + other unrecognized alphabetic escape sequences, they cause an error. + Outside a character class, these sequences have different meanings. + + Unsupported escape sequences + + In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its + string handler and used to modify the case of following characters. By + default, PCRE2 does not support these escape sequences in patterns. + However, if either of the PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX op- + tions is set, \U matches a "U" character, and \u can be used to define + a character by code point, as described above. + + Absolute and relative backreferences + + The sequence \g followed by a signed or unsigned number, optionally en- + closed in braces, is an absolute or relative backreference. A named + backreference can be coded as \g{name}. Backreferences are discussed + later, following the discussion of parenthesized groups. + + Absolute and relative subroutine calls + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for referencing a capture group as a subroutine. + Details are discussed later. Note that \g{...} (Perl syntax) and + \g<...> (Oniguruma syntax) are not synonymous. The former is a backref- + erence; the latter is a subroutine call. + + Generic character types + + Another use of backslash is for specifying generic character types: + + \d any decimal digit + \D any character that is not a decimal digit + \h any horizontal white space character + \H any character that is not a horizontal white space character + \N any character that is not a newline + \s any white space character + \S any character that is not a white space character + \v any vertical white space character + \V any character that is not a vertical white space character + \w any "word" character + \W any "non-word" character + + The \N escape sequence has the same meaning as the "." metacharacter + when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change + the meaning of \N. Note that when \N is followed by an opening brace it + has a different meaning. See the section entitled "Non-printing charac- + ters" above for details. Perl also uses \N{name} to specify characters + by Unicode name; PCRE2 does not support this. + + Each pair of lower and upper case escape sequences partitions the com- + plete set of characters into two disjoint sets. Any given character + matches one, and only one, of each pair. The sequences can appear both + inside and outside character classes. They each match one character of + the appropriate type. If the current matching point is at the end of + the subject string, all of them fail, because there is no character to + match. + + The default \s characters are HT (9), LF (10), VT (11), FF (12), CR + (13), and space (32), which are defined as white space in the "C" lo- + cale. This list may vary if locale-specific matching is taking place. + For example, in some locales the "non-breaking space" character (\xA0) + is recognized as white space, and in others the VT character is not. + + A "word" character is an underscore or any character that is a letter + or digit. By default, the definition of letters and digits is con- + trolled by PCRE2's low-valued character tables, and may vary if locale- + specific matching is taking place (see "Locale support" in the pcre2api + page). For example, in a French locale such as "fr_FR" in Unix-like + systems, or "french" in Windows, some character codes greater than 127 + are used for accented letters, and these are then matched by \w. The + use of locales with Unicode is discouraged. + + By default, characters whose code points are greater than 127 never + match \d, \s, or \w, and always match \D, \S, and \W, although this may + be different for characters in the range 128-255 when locale-specific + matching is happening. These escape sequences retain their original + meanings from before Unicode support was available, mainly for effi- + ciency reasons. If the PCRE2_UCP option is set, the behaviour is + changed so that Unicode properties are used to determine character + types, as follows: + + \d any character that matches \p{Nd} (decimal digit) + \s any character that matches \p{Z} or \h or \v + \w any character that matches \p{L}, \p{N}, \p{Mn}, or \p{Pc} + + The addition of \p{Mn} (non-spacing mark) and the replacement of an ex- + plicit test for underscore with a test for \p{Pc} (connector punctua- + tion) happened in PCRE2 release 10.43. This brings PCRE2 into line with + Perl. + + The upper case escapes match the inverse sets of characters. Note that + \d matches only decimal digits, whereas \w matches any Unicode digit, + as well as other character categories. Note also that PCRE2_UCP affects + \b, and \B because they are defined in terms of \w and \W. Matching + these sequences is noticeably slower when PCRE2_UCP is set. + + The effect of PCRE2_UCP on any one of these escape sequences can be + negated by the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, + and PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and + reset within a pattern by means of an internal option setting (see be- + low). + + The sequences \h, \H, \v, and \V, in contrast to the other sequences, + which match only ASCII characters by default, always match a specific + list of code points, whether or not PCRE2_UCP is set. The horizontal + space characters are: + + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space + + The vertical space characters are: + + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator + + In 8-bit, non-UTF-8 mode, only the characters with code points less + than 256 are relevant. + + Newline sequences + + Outside a character class, by default, the escape sequence \R matches + any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent + to the following: + + (?>\r\n|\n|\x0b|\f|\r|\x85) + + This is an example of an "atomic group", details of which are given be- + low. This particular group matches either the two-character sequence + CR followed by LF, or one of the single characters LF (linefeed, + U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car- + riage return, U+000D), or NEL (next line, U+0085). Because this is an + atomic group, the two-character sequence is treated as a single unit + that cannot be split. + + In other modes, two additional characters whose code points are greater + than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa- + rator, U+2029). Unicode support is not needed for these characters to + be recognized. + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. (BSR is an abbreviation for "back- + slash R".) This can be made the default when PCRE2 is built; if this is + the case, the other behaviour can be requested via the PCRE2_BSR_UNI- + CODE option. It is also possible to specify these settings by starting + a pattern string with one of the following sequences: + + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence + + These override the default and the options given to the compiling func- + tion. Note that these special settings, which are not Perl-compatible, + are recognized only at the very start of a pattern, and that they must + be in upper case. If more than one of them is present, the last one is + used. They can be combined with a change of newline convention; for ex- + ample, a pattern can start with: + + (*ANY)(*BSR_ANYCRLF) + + They can also be combined with the (*UTF) or (*UCP) special sequences. + Inside a character class, \R is treated as an unrecognized escape se- + quence, and causes an error. + + Unicode character properties + + When PCRE2 is built with Unicode support (the default), three addi- + tional escape sequences that match characters with specific properties + are available. They can be used in any mode, though in 8-bit and 16-bit + non-UTF modes these sequences are of course limited to testing charac- + ters whose code points are less than U+0100 and U+10000, respectively. + In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode + limit) may be encountered. These are all treated as being in the Un- + known script and with an unassigned type. + + Matching characters by Unicode property is not fast, because PCRE2 has + to do a multistage table lookup in order to find a character's prop- + erty. That is why the traditional escape sequences such as \d and \w do + not use Unicode properties in PCRE2 by default, though you can make + them do so by setting the PCRE2_UCP option or by starting the pattern + with (*UCP). + + The extra escape sequences that provide property support are: + + \p{xx} a character with the xx property + \P{xx} a character without the xx property + \X a Unicode extended grapheme cluster + + The property names represented by xx above are not case-sensitive, and + in accordance with Unicode's "loose matching" rules, spaces, hyphens, + and underscores are ignored. There is support for Unicode script names, + Unicode general category properties, "Any", which matches any character + (including newline), Bidi_Class, a number of binary (yes/no) proper- + ties, and some special PCRE2 properties (described below). Certain + other Perl properties such as "InMusicalSymbols" are not supported by + PCRE2. Note that \P{Any} does not match any characters, so always + causes a match failure. + + Script properties for \p and \P + + There are three different syntax forms for matching a script. Each Uni- + code character has a basic script and, optionally, a list of other + scripts ("Script Extensions") with which it is commonly used. Using the + Adlam script as an example, \p{sc:Adlam} matches characters whose basic + script is Adlam, whereas \p{scx:Adlam} matches, in addition, characters + that have Adlam in their extensions list. The full names "script" and + "script extensions" for the property types are recognized, and a equals + sign is an alternative to the colon. If a script name is given without + a property type, for example, \p{Adlam}, it is treated as \p{scx:Ad- + lam}. Perl changed to this interpretation at release 5.26 and PCRE2 + changed at release 10.40. + + Unassigned characters (and in non-UTF 32-bit mode, characters with code + points greater than 0x10FFFF) are assigned the "Unknown" script. Others + that are not part of an identified script are lumped together as "Com- + mon". The current list of recognized script names and their 4-character + abbreviations can be obtained by running this command: + + pcre2test -LS + + + The general category property for \p and \P + + Each character has exactly one Unicode general category property, spec- + ified by a two-letter abbreviation. For compatibility with Perl, nega- + tion can be specified by including a circumflex between the opening + brace and the property name. For example, \p{^Lu} is the same as + \P{Lu}. + + If only one letter is specified with \p or \P, it includes all the gen- + eral category properties that start with that letter. In this case, in + the absence of negation, the curly brackets in the escape sequence are + optional; these two examples have the same effect: + + \p{L} + \pL + + The following general category property codes are supported: + + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate + + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark + + N Number + Nd Decimal number + Nl Letter number + No Other number + + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation + + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol + + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator + + The special property LC, which has the synonym L&, is also supported: + it matches a character that has the Lu, Ll, or Lt property, in other + words, a letter that is not classified as a modifier or "other". + + The Cs (Surrogate) property applies only to characters whose code + points are in the range U+D800 to U+DFFF. These characters are no dif- + ferent to any other character when PCRE2 is not in UTF mode (using the + 16-bit or 32-bit library). However, they are not valid in Unicode + strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid- + ity checking has been turned off (see the discussion of + PCRE2_NO_UTF_CHECK in the pcre2api page). + + The long synonyms for property names that Perl supports (such as + \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix + any of these properties with "Is". + + No character that is in the Unicode table has the Cn (unassigned) prop- + erty. Instead, this property is assumed for any code point that is not + in the Unicode table. + + Specifying caseless matching does not affect these escape sequences. + For example, \p{Lu} always matches only upper case letters. This is + different from the behaviour of current versions of Perl. + + Binary (yes/no) properties for \p and \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + + The Bidi_Class property for \p and \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space + + An equals sign may be used instead of a colon. The class names are + case-insensitive; only the short names listed above are recognized. + + Extended grapheme clusters + + The \X escape matches any number of Unicode characters that form an + "extended grapheme cluster", and treats the sequence as an atomic group + (see below). Unicode supports various kinds of composite character by + giving each character a grapheme breaking property, and having rules + that use these properties to define the boundaries of extended grapheme + clusters. The rules are defined in Unicode Standard Annex 29, "Unicode + Text Segmentation". Unicode 11.0.0 abandoned the use of some previous + properties that had been used for emojis. Instead it introduced vari- + ous emoji-specific properties. PCRE2 uses only the Extended Picto- + graphic property. + + \X always matches at least one character. Then it decides whether to + add additional characters according to the following rules for ending a + cluster: + + 1. End at the end of the subject string. + + 2. Do not end between CR and LF; otherwise end after any control char- + acter. + + 3. Do not break Hangul (a Korean script) syllable sequences. Hangul + characters are of five types: L, V, T, LV, and LVT. An L character may + be followed by an L, V, LV, or LVT character; an LV or V character may + be followed by a V or T character; an LVT or T character may be fol- + lowed only by a T character. + + 4. Do not end before extending characters or spacing marks or the zero- + width joiner (ZWJ) character. Characters with the "mark" property al- + ways have the "extend" grapheme breaking property. + + 5. Do not end after prepend characters. + + 6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width + joiner) sequences. An emoji ZWJ sequence consists of a character with + the Extended_Pictographic property, optionally followed by one or more + characters with the Extend property, followed by the ZWJ character, + followed by another Extended_Pictographic character. + + 7. Do not break within emoji flag sequences. That is, do not break be- + tween regional indicator (RI) characters if there are an odd number of + RI characters before the break point. + + 8. Otherwise, end the cluster. + + PCRE2's additional properties + + As well as the standard Unicode properties described above, PCRE2 sup- + ports four more that make it possible to convert traditional escape se- + quences such as \w and \s to use Unicode properties. PCRE2 uses these + non-standard, non-Perl properties internally when PCRE2_UCP is set. + However, they may also be used explicitly. These properties are: + + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character + + Xan matches characters that have either the L (letter) or the N (num- + ber) property. Xps matches the characters tab, linefeed, vertical tab, + form feed, or carriage return, and any other character that has the Z + (separator) property. Xsp is the same as Xps; in PCRE1 it used to ex- + clude vertical tab, for Perl compatibility, but Perl changed. Xwd + matches the same characters as Xan, plus those that match Mn (non-spac- + ing mark) or Pc (connector punctuation, which includes underscore). + + There is another non-standard property, Xuc, which matches any charac- + ter that can be represented by a Universal Character Name in C++ and + other programming languages. These are the characters $, @, ` (grave + accent), and all characters with Unicode code points greater than or + equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that + most base (ASCII) characters are excluded. (Universal Character Names + are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit. + Note that the Xuc property does not match these sequences but the char- + acters that they represent.) + + Resetting the match start + + In normal use, the escape sequence \K causes any previously matched + characters not to be included in the final matched sequence that is re- + turned. For example, the pattern: + + foo\Kbar + + matches "foobar", but reports that it has matched "bar". \K does not + interact with anchoring in any way. The pattern: + + ^foo\Kbar + + matches only when the subject begins with "foobar" (in single line + mode), though it again reports the matched string as "bar". This fea- + ture is similar to a lookbehind assertion (described below), but the + part of the pattern that precedes \K is not constrained to match a lim- + ited number of characters, as is required for a lookbehind assertion. + The use of \K does not interfere with the setting of captured sub- + strings. For example, when the pattern + + (foo)\Kbar + + matches "foobar", the first substring is still set to "foo". + + From version 5.32.0 Perl forbids the use of \K in lookaround asser- + tions. From release 10.38 PCRE2 also forbids this by default. However, + the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling + pcre2_compile() to re-enable the previous behaviour. When this option + is set, \K is acted upon when it occurs inside positive assertions, but + is ignored in negative assertions. Note that when a pattern such as + (?=ab\K) matches, the reported start of the match can be greater than + the end of the match. Using \K in a lookbehind assertion at the start + of a pattern can also lead to odd effects. For example, consider this + pattern: + + (?<=\Kfoo)bar + + If the subject is "foobar", a call to pcre2_match() with a starting + offset of 3 succeeds and reports the matching string as "foobar", that + is, the start of the reported match is earlier than where the match + started. + + Simple assertions + + The final use of backslash is for certain simple assertions. An asser- + tion specifies a condition that has to be met at a particular point in + a match, without consuming any characters from the subject string. The + use of groups for more complicated assertions is described below. The + backslashed assertions are: + + \b matches at a word boundary + \B matches when not at a word boundary + \A matches at the start of the subject + \Z matches at the end of the subject + also matches before a newline at the end of the subject + \z matches only at the end of the subject + \G matches at the first matching position in the subject + + Inside a character class, \b has a different meaning; it matches the + backspace character. If any other of these assertions appears in a + character class, an "invalid escape sequence" error is generated. + + A word boundary is a position in the subject string where the current + character and the previous character do not both match \w or \W (i.e. + one matches \w and the other matches \W), or the start or end of the + string if the first or last character matches \w, respectively. When + PCRE2 is built with Unicode support, the meanings of \w and \W can be + changed by setting the PCRE2_UCP option. When this is done, it also af- + fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word" + or "end of word" metasequence. However, whatever follows \b normally + determines which it is. For example, the fragment \ba matches "a" at + the start of a word. + + The \A, \Z, and \z assertions differ from the traditional circumflex + and dollar (described in the next section) in that they only ever match + at the very start and end of the subject string, whatever options are + set. Thus, they are independent of multiline mode. These three asser- + tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options, + which affect only the behaviour of the circumflex and dollar metachar- + acters. However, if the startoffset argument of pcre2_match() is non- + zero, indicating that matching is to start at a point other than the + beginning of the subject, \A can never match. The difference between + \Z and \z is that \Z matches before a newline at the end of the string + as well as at the very end, whereas \z matches only at the end. + + The \G assertion is true only when the current matching position is at + the start point of the matching process, as specified by the startoff- + set argument of pcre2_match(). It differs from \A when the value of + startoffset is non-zero. By calling pcre2_match() multiple times with + appropriate arguments, you can mimic Perl's /g option, and it is in + this kind of implementation where \G can be useful. + + Note, however, that PCRE2's implementation of \G, being true at the + starting character of the matching process, is subtly different from + Perl's, which defines it as true at the end of the previous match. In + Perl, these can be different when the previously matched string was + empty. Because PCRE2 does just one match at a time, it cannot reproduce + this behaviour. + + If all the alternatives of a pattern begin with \G, the expression is + anchored to the starting match position, and the "anchored" flag is set + in the compiled regular expression. + + +CIRCUMFLEX AND DOLLAR + + The circumflex and dollar metacharacters are zero-width assertions. + That is, they test for a particular condition being true without con- + suming any characters from the subject string. These two metacharacters + are concerned with matching the starts and ends of lines. If the new- + line convention is set so that only the two-character sequence CRLF is + recognized as a newline, isolated CR and LF characters are treated as + ordinary data characters, and are not recognized as newlines. + + Outside a character class, in the default matching mode, the circumflex + character is an assertion that is true only if the current matching + point is at the start of the subject string. If the startoffset argu- + ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum- + flex can never match if the PCRE2_MULTILINE option is unset. Inside a + character class, circumflex has an entirely different meaning (see be- + low). + + Circumflex need not be the first character of the pattern if a number + of alternatives are involved, but it should be the first thing in each + alternative in which it appears if the pattern is ever to match that + branch. If all possible alternatives start with a circumflex, that is, + if the pattern is constrained to match only at the start of the sub- + ject, it is said to be an "anchored" pattern. (There are also other + constructs that can cause a pattern to be anchored.) + + The dollar character is an assertion that is true only if the current + matching point is at the end of the subject string, or immediately be- + fore a newline at the end of the string (by default), unless PCRE2_NO- + TEOL is set. Note, however, that it does not actually match the new- + line. Dollar need not be the last character of the pattern if a number + of alternatives are involved, but it should be the last item in any + branch in which it appears. Dollar has no special meaning in a charac- + ter class. + + The meaning of dollar can be changed so that it matches only at the + very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at + compile time. This does not affect the \Z assertion. + + The meanings of the circumflex and dollar metacharacters are changed if + the PCRE2_MULTILINE option is set. When this is the case, a dollar + character matches before any newlines in the string, as well as at the + very end, and a circumflex matches immediately after internal newlines + as well as at the start of the subject string. It does not match after + a newline that ends the string, for compatibility with Perl. However, + this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option. + + For example, the pattern /^abc$/ matches the subject string "def\nabc" + (where \n represents a newline) in multiline mode, but not otherwise. + Consequently, patterns that are anchored in single line mode because + all branches start with ^ are not anchored in multiline mode, and a + match for circumflex is possible when the startoffset argument of + pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. + + When the newline convention (see "Newline conventions" below) recog- + nizes the two-character sequence CRLF as a newline, this is preferred, + even if the single characters CR and LF are also recognized as new- + lines. For example, if the newline convention is "any", a multiline + mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather + than after CR, even though CR on its own is a valid newline. (It also + matches at the very start of the string, of course.) + + Note that the sequences \A, \Z, and \z can be used to match the start + and end of the subject in both modes, and if all branches of a pattern + start with \A it is always anchored, whether or not PCRE2_MULTILINE is + set. + + +FULL STOP (PERIOD, DOT) AND \N + + Outside a character class, a dot in the pattern matches any one charac- + ter in the subject string except (by default) a character that signi- + fies the end of a line. One or more characters may be specified as line + terminators (see "Newline conventions" above). + + Dot never matches a single line-ending character. When the two-charac- + ter sequence CRLF is the only line ending, dot does not match CR if it + is immediately followed by LF, but otherwise it matches all characters + (including isolated CRs and LFs). When ANYCRLF is selected for line + endings, no occurrences of CR of LF match dot. When all Unicode line + endings are being recognized, dot does not match CR or LF or any of the + other line ending characters. + + The behaviour of dot with regard to newlines can be changed. If the + PCRE2_DOTALL option is set, a dot matches any one character, without + exception. If the two-character sequence CRLF is present in the sub- + ject string, it takes two dots to match it. + + The handling of dot is entirely independent of the handling of circum- + flex and dollar, the only relationship being that they both involve + newlines. Dot has no special meaning in a character class. + + The escape sequence \N when not followed by an opening brace behaves + like a dot, except that it is not affected by the PCRE2_DOTALL option. + In other words, it matches any character except one that signifies the + end of a line. + + When \N is followed by an opening brace it has a different meaning. See + the section entitled "Non-printing characters" above for details. Perl + also uses \N{name} to specify characters by Unicode name; PCRE2 does + not support this. + + +MATCHING A SINGLE CODE UNIT + + Outside a character class, the escape sequence \C matches any one code + unit, whether or not a UTF mode is set. In the 8-bit library, one code + unit is one byte; in the 16-bit library it is a 16-bit unit; in the + 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches + line-ending characters. The feature is provided in Perl in order to + match individual bytes in UTF-8 mode, but it is unclear how it can use- + fully be used. + + Because \C breaks up characters into individual code units, matching + one unit with \C in UTF-8 or UTF-16 mode means that the rest of the + string may start with a malformed UTF character. This has undefined re- + sults, because PCRE2 assumes that it is matching character by character + in a valid UTF string (by default it checks the subject string's valid- + ity at the start of processing unless the PCRE2_NO_UTF_CHECK or + PCRE2_MATCH_INVALID_UTF option is used). + + An application can lock out the use of \C by setting the + PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also + possible to build PCRE2 with the use of \C permanently disabled. + + PCRE2 does not allow \C to appear in lookbehind assertions (described + below) in UTF-8 or UTF-16 modes, because this would make it impossible + to calculate the length of the lookbehind. Neither the alternative + matching function pcre2_dfa_match() nor the JIT optimizer support \C in + these UTF modes. The former gives a match-time error; the latter fails + to optimize and so the match is always run using the interpreter. + + In the 32-bit library, however, \C is always supported (when not ex- + plicitly locked out) because it always matches a single code unit, + whether or not UTF-32 is specified. + + In general, the \C escape sequence is best avoided. However, one way of + using it that avoids the problem of malformed UTF-8 or UTF-16 charac- + ters is to use a lookahead to check the length of the next character, + as in this pattern, which could be used with a UTF-8 string (ignore + white space and line breaks): + + (?| (?=[\x00-\x7f])(\C) | + (?=[\x80-\x{7ff}])(\C)(\C) | + (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) | + (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C)) + + In this example, a group that starts with (?| resets the capturing + parentheses numbers in each alternative (see "Duplicate Group Numbers" + below). The assertions at the start of each branch check the next UTF-8 + character for values whose encoding uses 1, 2, 3, or 4 bytes, respec- + tively. The character's individual bytes are then captured by the ap- + propriate number of \C groups. + + +SQUARE BRACKETS AND CHARACTER CLASSES + + An opening square bracket introduces a character class, terminated by a + closing square bracket. A closing square bracket on its own is not spe- + cial by default. If a closing square bracket is required as a member + of the class, it should be the first data character in the class (after + an initial circumflex, if present) or escaped with a backslash. This + means that, by default, an empty class cannot be defined. However, if + the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at + the start does end the (empty) class. + + A character class matches a single character in the subject. A matched + character must be in the set of characters defined by the class, unless + the first character in the class definition is a circumflex, in which + case the subject character must not be in the set defined by the class. + If a circumflex is actually required as a member of the class, ensure + it is not the first character, or escape it with a backslash. + + For example, the character class [aeiou] matches any lower case vowel, + while [^aeiou] matches any character that is not a lower case vowel. + Note that a circumflex is just a convenient notation for specifying the + characters that are in the class by enumerating those that are not. A + class that starts with a circumflex is not an assertion; it still con- + sumes a character from the subject string, and therefore it fails if + the current pointer is at the end of the string. + + Characters in a class may be specified by their code points using \o, + \x, or \N{U+hh..} in the usual way. When caseless matching is set, any + letters in a class represent both their upper case and lower case ver- + sions, so for example, a caseless [aeiou] matches "A" as well as "a", + and a caseless [^aeiou] does not match "A", whereas a caseful version + would. Note that there are two ASCII characters, K and S, that, in ad- + dition to their lower case ASCII equivalents, are case-equivalent with + Unicode U+212A (Kelvin sign) and U+017F (long S) respectively when ei- + ther PCRE2_UTF or PCRE2_UCP is set. + + Characters that might indicate line breaks are never treated in any + special way when matching character classes, whatever line-ending se- + quence is in use, and whatever setting of the PCRE2_DOTALL and + PCRE2_MULTILINE options is used. A class such as [^a] always matches + one of these characters. + + The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s, + \S, \v, \V, \w, and \W may appear in a character class, and add the + characters that they match to the class. For example, [\dABCDEF] + matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option af- + fects the meanings of \d, \s, \w and their upper case partners, just as + it does when they appear outside a character class, as described in the + section entitled "Generic character types" above. The escape sequence + \b has a different meaning inside a character class; it matches the + backspace character. The sequences \B, \R, and \X are not special in- + side a character class. Like any other unrecognized escape sequences, + they cause an error. The same is true for \N when not followed by an + opening brace. + + The minus (hyphen) character can be used to specify a range of charac- + ters in a character class. For example, [d-m] matches any letter be- + tween d and m, inclusive. If a minus character is required in a class, + it must be escaped with a backslash or appear in a position where it + cannot be interpreted as indicating a range, typically as the first or + last character in the class, or immediately after a range. For example, + [b-d-z] matches letters in the range b to d, a hyphen character, or z. + + Perl treats a hyphen as a literal if it appears before or after a POSIX + class (see below) or before or after a character type escape such as \d + or \H. However, unless the hyphen is the last character in the class, + Perl outputs a warning in its warning mode, as this is most likely a + user error. As PCRE2 has no facility for warning, an error is given in + these cases. + + It is not possible to have the literal character "]" as the end charac- + ter of a range. A pattern such as [W-]46] is interpreted as a class of + two characters ("W" and "-") followed by a literal string "46]", so it + would match "W46]" or "-46]". However, if the "]" is escaped with a + backslash it is interpreted as the end of range, so [W-\]46] is inter- + preted as a class containing a range followed by two other characters. + The octal or hexadecimal representation of "]" can also be used to end + a range. + + Ranges normally include all code points between the start and end char- + acters, inclusive. They can also be used for code points specified nu- + merically, for example [\000-\037]. Ranges can include any characters + that are valid for the current mode. In any UTF mode, the so-called + "surrogate" characters (those whose code points lie between 0xd800 and + 0xdfff inclusive) may not be specified explicitly by default (the + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How- + ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates, + are always permitted. + + There is a special case in EBCDIC environments for ranges whose end + points are both specified as literal letters in the same case. For com- + patibility with Perl, EBCDIC code points within the range that are not + letters are omitted. For example, [h-k] matches only four characters, + even though the codes for h and k are 0x88 and 0x92, a range of 11 code + points. However, if the range is specified numerically, for example, + [\x88-\x92] or [h-\x92], all code points are included. + + If a range that includes letters is used when caseless matching is set, + it matches the letters in either case. For example, [W-c] is equivalent + to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if + character tables for a French locale are in use, [\xc8-\xcb] matches + accented E characters in both cases. + + A circumflex can conveniently be used with the upper case character + types to specify a more restricted set of characters than the matching + lower case type. For example, the class [^\W_] matches any letter or + digit, but not underscore, whereas [\w] includes underscore. A positive + character class should be read as "something OR something OR ..." and a + negative class as "NOT something AND NOT something AND NOT ...". + + The only metacharacters that are recognized in character classes are + backslash, hyphen (only where it can be interpreted as specifying a + range), circumflex (only at the start), opening square bracket (only + when it can be interpreted as introducing a POSIX class name, or for a + special compatibility feature - see the next two sections), and the + terminating closing square bracket. However, escaping other non-al- + phanumeric characters does no harm. + + +POSIX CHARACTER CLASSES + + Perl supports the POSIX notation for character classes. This uses names + enclosed by [: and :] within the enclosing square brackets. PCRE2 also + supports this notation. For example, + + [01[:alpha:]%] + + matches "0", "1", any alphabetic character, or "%". The supported class + names are: + + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \d) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \s from PCRE2 8.34) + upper upper case letters + word "word" characters (same as \w) + xdigit hexadecimal digits + + The default "space" characters are HT (9), LF (10), VT (11), FF (12), + CR (13), and space (32). If locale-specific matching is taking place, + the list of space characters may be different; there may be fewer or + more of them. "Space" and \s match the same set of characters, as do + "word" and \w. + + The name "word" is a Perl extension, and "blank" is a GNU extension + from Perl 5.8. Another Perl extension is negation, which is indicated + by a ^ character after the colon. For example, + + [12[:^digit:]] + + matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the + POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but + these are not supported, and an error is given if they are encountered. + + By default, characters with values greater than 127 do not match any of + the POSIX character classes, although this may be different for charac- + ters in the range 128-255 when locale-specific matching is happening. + However, in UCP mode, unless certain options are set (see below), some + of the classes are changed so that Unicode character properties are + used. This is achieved by replacing POSIX classes with other sequences, + as follows: + + [:alnum:] becomes \p{Xan} + [:alpha:] becomes \p{L} + [:blank:] becomes \h + [:cntrl:] becomes \p{Cc} + [:digit:] becomes \p{Nd} + [:lower:] becomes \p{Ll} + [:space:] becomes \p{Xps} + [:upper:] becomes \p{Lu} + [:word:] becomes \p{Xwd} + + Negated versions, such as [:^alpha:] use \P instead of \p. Four other + POSIX classes are handled specially in UCP mode: + + [:graph:] This matches characters that have glyphs that mark the page + when printed. In Unicode property terms, it matches all char- + acters with the L, M, N, P, S, or Cf properties, except for: + + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s + + + [:print:] This matches the same characters as [:graph:] plus space + characters that are not controls, that is, characters with + the Zs property. + + [:punct:] This matches all characters that have the Unicode P (punctua- + tion) property, plus those characters with code points less + than 256 that have the S (Symbol) property. + + [:xdigit:] + In addition to the ASCII hexadecimal digits, this also + matches the "fullwidth" versions of those characters, whose + Unicode code points start at U+FF10. This is a change that + was made in PCRE release 10.43 for Perl compatibility. + + The other POSIX classes are unchanged by PCRE2_UCP, and match only + characters with code points less than 256. + + There are two options that can be used to restrict the POSIX classes to + ASCII characters when PCRE2_UCP is set. The option PCRE2_EX- + TRA_ASCII_DIGIT affects just [:digit:] and [:xdigit:]. Within a pat- + tern, this can be set and unset by (?aT) and (?-aT). The PCRE2_EX- + TRA_ASCII_POSIX option disables UCP processing for all POSIX classes, + including [:digit:] and [:xdigit:]. Within a pattern, (?aP) and (?-aP) + set and unset both these options for consistency. + + +COMPATIBILITY FEATURE FOR WORD BOUNDARIES + + In the POSIX.2 compliant library that was included in 4.4BSD Unix, the + ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word" + and "end of word". PCRE2 treats these items as follows: + + [[:<:]] is converted to \b(?=\w) + [[:>:]] is converted to \b(?<=\w) + + Only these exact character sequences are recognized. A sequence such as + [a[:<:]b] provokes error for an unrecognized POSIX class name. This + support is not compatible with Perl. It is provided to help migrations + from other environments, and is best not used in any new patterns. Note + that \b matches at the start and the end of a word (see "Simple asser- + tions" above), and in a Perl-style pattern the preceding or following + character normally shows which is wanted, without the need for the as- + sertions that are used above in order to give exactly the POSIX behav- + iour. Note also that the PCRE2_UCP option changes the meaning of \w + (and therefore \b) by default, so it also affects these POSIX se- + quences. + + +VERTICAL BAR + + Vertical bar characters are used to separate alternative patterns. For + example, the pattern + + gilbert|sullivan + + matches either "gilbert" or "sullivan". Any number of alternatives may + appear, and an empty alternative is permitted (matching the empty + string). The matching process tries each alternative in turn, from left + to right, and the first one that succeeds is used. If the alternatives + are within a group (defined below), "succeeds" means matching the rest + of the main pattern as well as the alternative in the group. + + +INTERNAL OPTION SETTING + + The settings of several options can be changed within a pattern by a + sequence of letters enclosed between "(?" and ")". The following are + Perl-compatible, and are described in detail in the pcre2api documenta- + tion. The option letters are: + + i for PCRE2_CASELESS + m for PCRE2_MULTILINE + n for PCRE2_NO_AUTO_CAPTURE + s for PCRE2_DOTALL + x for PCRE2_EXTENDED + xx for PCRE2_EXTENDED_MORE + + For example, (?im) sets caseless, multiline matching. It is also possi- + ble to unset these options by preceding the relevant letters with a hy- + phen, for example (?-im). The two "extended" options are not indepen- + dent; unsetting either one cancels the effects of both of them. + + A combined setting and unsetting such as (?im-sx), which sets + PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and + PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the + options string. If a letter appears both before and after the hyphen, + the option is unset. An empty options setting "(?)" is allowed. Need- + less to say, it has no effect. + + If the first character following (? is a circumflex, it causes all of + the above options to be unset. Letters may follow the circumflex to + cause some options to be re-instated, but a hyphen may not appear. + + Some PCRE2-specific options can be changed by the same mechanism using + these pairs or individual letters: + + aD for PCRE2_EXTRA_ASCII_BSD + aS for PCRE2_EXTRA_ASCII_BSS + aW for PCRE2_EXTRA_ASCII_BSW + aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT + aT for PCRE2_EXTRA_ASCII_DIGIT + r for PCRE2_EXTRA_CASELESS_RESTRICT + J for PCRE2_DUPNAMES + U for PCRE2_UNGREEDY + + However, except for 'r', these are not unset by (?^), which is equiva- + lent to (?-imnrsx). If 'a' is not followed by any of the upper case + letters shown above, it sets (or unsets) all the ASCII options. + + PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EX- + TRA_ASCII_POSIX is set, but including it in (?aP) means that (?-aP) + suppresses all ASCII restrictions for POSIX classes. + + When one of these option changes occurs at top level (that is, not in- + side group parentheses), the change applies until a subsequent change, + or the end of the pattern. An option change within a group (see below + for a description of groups) affects only that part of the group that + follows it. At the end of the group these options are reset to the + state they were before the group. For example, + + (a(?i)b)c + + matches abc and aBc and no other strings (assuming PCRE2_CASELESS is + not set externally). Any changes made in one alternative do carry on + into subsequent branches within the same group. For example, + + (a(?i)b|c) + + matches "ab", "aB", "c", and "C", even though when matching "C" the + first branch is abandoned before the option setting. This is because + the effects of option settings happen at compile time. There would be + some very weird behaviour otherwise. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing group (see the next section), the option let- + ters may appear between the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. + + Note: There are other PCRE2-specific options, applying to the whole + pattern, which can be set by the application when the compiling func- + tion is called. In addition, the pattern can contain special leading + sequences such as (*CRLF) to override what the application has set or + what has been defaulted. Details are given in the section entitled + "Newline sequences" above. There are also the (*UTF) and (*UCP) leading + sequences that can be used to set UTF and Unicode property modes; they + are equivalent to setting the PCRE2_UTF and PCRE2_UCP options, respec- + tively. However, the application can set the PCRE2_NEVER_UTF or + PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and + (*UCP) sequences. + + +GROUPS + + Groups are delimited by parentheses (round brackets), which can be + nested. Turning part of a pattern into a group does two things: + + 1. It localizes a set of alternatives. For example, the pattern + + cat(aract|erpillar|) + + matches "cataract", "caterpillar", or "cat". Without the parentheses, + it would match "cataract", "erpillar" or an empty string. + + 2. It creates a "capture group". This means that, when the whole pat- + tern matches, the portion of the subject string that matched the group + is passed back to the caller, separately from the portion that matched + the whole pattern. (This applies only to the traditional matching + function; the DFA matching function does not support capturing.) + + Opening parentheses are counted from left to right (starting from 1) to + obtain numbers for capture groups. For example, if the string "the red + king" is matched against the pattern + + the ((red|white) (king|queen)) + + the captured substrings are "red king", "red", and "king", and are num- + bered 1, 2, and 3, respectively. + + The fact that plain parentheses fulfil two functions is not always + helpful. There are often times when grouping is required without cap- + turing. If an opening parenthesis is followed by a question mark and a + colon, the group does not do any capturing, and is not counted when + computing the number of any subsequent capture groups. For example, if + the string "the white queen" is matched against the pattern + + the ((?:red|white) (king|queen)) + + the captured substrings are "white queen" and "queen", and are numbered + 1 and 2. The maximum number of capture groups is 65535. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing group, the option letters may appear between + the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. Because alternative branches are + tried from left to right, and options are not reset until the end of + the group is reached, an option setting in one branch does affect sub- + sequent branches, so the above patterns match "SUNDAY" as well as "Sat- + urday". + + +DUPLICATE GROUP NUMBERS + + Perl 5.10 introduced a feature whereby each alternative in a group uses + the same numbers for its capturing parentheses. Such a group starts + with (?| and is itself a non-capturing group. For example, consider + this pattern: + + (?|(Sat)ur|(Sun))day + + Because the two alternatives are inside a (?| group, both sets of cap- + turing parentheses are numbered one. Thus, when the pattern matches, + you can look at captured substring number one, whichever alternative + matched. This construct is useful when you want to capture part, but + not all, of one of a number of alternatives. Inside a (?| group, paren- + theses are numbered as usual, but the number is reset at the start of + each branch. The numbers of any capturing parentheses that follow the + whole group start after the highest number used in any branch. The fol- + lowing example is taken from the Perl documentation. The numbers under- + neath show in which buffer the captured content will be stored. + + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 + + A backreference to a capture group uses the most recent value that is + set for the group. The following pattern matches "abcabc" or "defdef": + + /(?|(abc)|(def))\1/ + + In contrast, a subroutine call to a capture group always refers to the + first one in the pattern with the given number. The following pattern + matches "abcabc" or "defabc": + + /(?|(abc)|(def))(?1)/ + + A relative reference such as (?-1) is no different: it is just a conve- + nient way of computing an absolute group number. + + If a condition test for a group's having matched refers to a non-unique + number, the test is true if any group with that number has matched. + + An alternative approach to using this "branch reset" feature is to use + duplicate named groups, as described in the next section. + + +NAMED CAPTURE GROUPS + + Identifying capture groups by number is simple, but it can be very hard + to keep track of the numbers in complicated patterns. Furthermore, if + an expression is modified, the numbers may change. To help with this + difficulty, PCRE2 supports the naming of capture groups. This feature + was not added to Perl until release 5.10. Python had the feature ear- + lier, and PCRE1 introduced it at release 4.0, using the Python syntax. + PCRE2 supports both the Perl and the Python syntax. + + In PCRE2, a capture group can be named in one of three ways: + (?...) or (?'name'...) as in Perl, or (?P...) as in Python. + Names may be up to 128 code units long. When PCRE2_UTF is not set, they + may contain only ASCII alphanumeric characters and underscores, but + must start with a non-digit. When PCRE2_UTF is set, the syntax of group + names is extended to allow any Unicode letter or Unicode decimal digit. + In other words, group names must match one of these patterns: + + ^[_A-Za-z][_A-Za-z0-9]*\z when PCRE2_UTF is not set + ^[_\p{L}][_\p{L}\p{Nd}]*\z when PCRE2_UTF is set + + References to capture groups from other parts of the pattern, such as + backreferences, recursion, and conditions, can all be made by name as + well as by number. + + Named capture groups are allocated numbers as well as names, exactly as + if the names were not present. In both PCRE2 and Perl, capture groups + are primarily identified by numbers; any names are just aliases for + these numbers. The PCRE2 API provides function calls for extracting the + complete name-to-number translation table from a compiled pattern, as + well as convenience functions for extracting captured substrings by + name. + + Warning: When more than one capture group has the same number, as de- + scribed in the previous section, a name given to one of them applies to + all of them. Perl allows identically numbered groups to have different + names. Consider this pattern, where there are two capture groups, both + numbered 1: + + (?|(?aa)|(?bb)) + + Perl allows this, with both names AA and BB as aliases of group 1. + Thus, after a successful match, both names yield the same value (either + "aa" or "bb"). + + In an attempt to reduce confusion, PCRE2 does not allow the same group + number to be associated with more than one name. The example above pro- + vokes a compile-time error. However, there is still scope for confu- + sion. Consider this pattern: + + (?|(?aa)|(bb)) + + Although the second group number 1 is not explicitly named, the name AA + is still an alias for any group 1. Whether the pattern matches "aa" or + "bb", a reference by name to group AA yields the matched string. + + By default, a name must be unique within a pattern, except that dupli- + cate names are permitted for groups with the same number, for example: + + (?|(?aa)|(?bb)) + + The duplicate name constraint can be disabled by setting the PCRE2_DUP- + NAMES option at compile time, or by the use of (?J) within the pattern, + as described in the section entitled "Internal Option Setting" above. + + Duplicate names can be useful for patterns where only one instance of + the named capture group can match. Suppose you want to match the name + of a weekday, either as a 3-letter abbreviation or as the full name, + and in both cases you want to extract the abbreviation. This pattern + (ignoring the line breaks) does the job: + + (?J) + (?Mon|Fri|Sun)(?:day)?| + (?Tue)(?:sday)?| + (?Wed)(?:nesday)?| + (?Thu)(?:rsday)?| + (?Sat)(?:urday)? + + There are five capture groups, but only one is ever set after a match. + The convenience functions for extracting the data by name returns the + substring for the first (and in this example, the only) group of that + name that matched. This saves searching to find which numbered group it + was. (An alternative way of solving this problem is to use a "branch + reset" group, as described in the previous section.) + + If you make a backreference to a non-unique named group from elsewhere + in the pattern, the groups to which the name refers are checked in the + order in which they appear in the overall pattern. The first one that + is set is used for the reference. For example, this pattern matches + both "foofoo" and "barbar" but not "foobar" or "barfoo": + + (?J)(?:(?foo)|(?bar))\k + + + If you make a subroutine call to a non-unique named group, the one that + corresponds to the first occurrence of the name is used. In the absence + of duplicate numbers this is the one with the lowest number. + + If you use a named reference in a condition test (see the section about + conditions below), either to check whether a capture group has matched, + or to check for recursion, all groups with the same name are tested. If + the condition is true for any one of them, the overall condition is + true. This is the same behaviour as testing by number. For further de- + tails of the interfaces for handling named capture groups, see the + pcre2api documentation. + + +REPETITION + + Repetition is specified by quantifiers, which may follow any one of + these items: + + a literal data character + the dot metacharacter + the \C escape sequence + the \R escape sequence + the \X escape sequence + any escape sequence that matches a single character + a character class + a backreference + a parenthesized group (including lookaround assertions) + a subroutine call (recursive or otherwise) + + If a quantifier does not follow a repeatable item, an error occurs. The + general repetition quantifier specifies a minimum and maximum number of + permitted matches by giving two numbers in curly brackets (braces), + separated by a comma. The numbers must be less than 65536, and the + first must be less than or equal to the second. For example, + + z{2,4} + + matches "zz", "zzz", or "zzzz". A closing brace on its own is not a + special character. If the second number is omitted, but the comma is + present, there is no upper limit; if the second number and the comma + are both omitted, the quantifier specifies an exact number of required + matches. Thus + + [aeiou]{3,} + + matches at least 3 successive vowels, but may match many more, whereas + + \d{8} + + matches exactly 8 digits. If the first number is omitted, the lower + limit is taken as zero; in this case the upper limit must be present. + + X{,4} is interpreted as X{0,4} + + This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 + 10.43. In earlier versions such a sequence was not interpreted as a + quantifier. Other regular expression engines may behave either way. + + If the characters that follow an opening brace do not match the syntax + of a quantifier, the brace is taken as a literal character. In particu- + lar, this means that {,} is a literal string of three characters. + + Note that not every opening brace is potentially the start of a quanti- + fier because braces are used in other items such as \N{U+345} or + \k{name}. + + In UTF modes, quantifiers apply to characters rather than to individual + code units. Thus, for example, \x{100}{2} matches two characters, each + of which is represented by a two-byte sequence in a UTF-8 string. Simi- + larly, \X{3} matches three Unicode extended grapheme clusters, each of + which may be several code units long (and they may be of different + lengths). + + The quantifier {0} is permitted, causing the expression to behave as if + the previous item and the quantifier were not present. This may be use- + ful for capture groups that are referenced as subroutines from else- + where in the pattern (but see also the section entitled "Defining cap- + ture groups for use by reference only" below). Except for parenthesized + groups, items that have a {0} quantifier are omitted from the compiled + pattern. + + For convenience, the three most common quantifiers have single-charac- + ter abbreviations: + + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} + + It is possible to construct infinite loops by following a group that + can match no characters with a quantifier that has no upper limit, for + example: + + (a?)* + + Earlier versions of Perl and PCRE1 used to give an error at compile + time for such patterns. However, because there are cases where this can + be useful, such patterns are now accepted, but whenever an iteration of + such a group matches no characters, matching moves on to the next item + in the pattern instead of repeatedly matching an empty string. This + does not prevent backtracking into any of the iterations if a subse- + quent item fails to match. + + By default, quantifiers are "greedy", that is, they match as much as + possible (up to the maximum number of permitted repetitions), without + causing the rest of the pattern to fail. The classic example of where + this gives problems is in trying to match comments in C programs. These + appear between /* and */ and within the comment, individual * and / + characters may appear. An attempt to match C comments by applying the + pattern + + /\*.*\*/ + + to the string + + /* first comment */ not comment /* second comment */ + + fails, because it matches the entire string owing to the greediness of + the .* item. However, if a quantifier is followed by a question mark, + it ceases to be greedy, and instead matches the minimum number of times + possible, so the pattern + + /\*.*?\*/ + + does the right thing with C comments. The meaning of the various quan- + tifiers is not otherwise changed, just the preferred number of matches. + Do not confuse this use of question mark with its use as a quantifier + in its own right. Because it has two uses, it can sometimes appear + doubled, as in + + \d??\d + + which matches one digit by preference, but can match two if that is the + only way the rest of the pattern matches. + + If the PCRE2_UNGREEDY option is set (an option that is not available in + Perl), the quantifiers are not greedy by default, but individual ones + can be made greedy by following them with a question mark. In other + words, it inverts the default behaviour. + + When a parenthesized group is quantified with a minimum repeat count + that is greater than 1 or with a limited maximum, more memory is re- + quired for the compiled pattern, in proportion to the size of the mini- + mum or maximum. + + If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option + (equivalent to Perl's /s) is set, thus allowing the dot to match new- + lines, the pattern is implicitly anchored, because whatever follows + will be tried against every character position in the subject string, + so there is no point in retrying the overall match at any position af- + ter the first. PCRE2 normally treats such a pattern as though it were + preceded by \A. + + In cases where it is known that the subject string contains no new- + lines, it is worth setting PCRE2_DOTALL in order to obtain this opti- + mization, or alternatively, using ^ to indicate anchoring explicitly. + + However, there are some cases where the optimization cannot be used. + When .* is inside capturing parentheses that are the subject of a + backreference elsewhere in the pattern, a match at the start may fail + where a later one succeeds. Consider, for example: + + (.*)abc\1 + + If the subject is "xyz123abc123" the match point is the fourth charac- + ter. For this reason, such a pattern is not implicitly anchored. + + Another case where implicit anchoring is not applied is when the lead- + ing .* is inside an atomic group. Once again, a match at the start may + fail where a later one succeeds. Consider this pattern: + + (?>.*?a)b + + It matches "ab" in the subject "aab". The use of the backtracking con- + trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and + there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. + + When a capture group is repeated, the value captured is the substring + that matched the final iteration. For example, after + + (tweedle[dume]{3}\s*)+ + + has matched "tweedledum tweedledee" the value of the captured substring + is "tweedledee". However, if there are nested capture groups, the cor- + responding captured values may have been set in previous iterations. + For example, after + + (a|(b))+ + + matches "aba" the value of the second captured substring is "b". + + +ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS + + With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") + repetition, failure of what follows normally causes the repeated item + to be re-evaluated to see if a different number of repeats allows the + rest of the pattern to match. Sometimes it is useful to prevent this, + either to change the nature of the match, or to cause it fail earlier + than it otherwise might, when the author of the pattern knows there is + no point in carrying on. + + Consider, for example, the pattern \d+foo when applied to the subject + line + + 123456bar + + After matching all 6 digits and then failing to match "foo", the normal + action of the matcher is to try again with only 5 digits matching the + \d+ item, and then with 4, and so on, before ultimately failing. + "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides + the means for specifying that once a group has matched, it is not to be + re-evaluated in this way. + + If we use atomic grouping for the previous example, the matcher gives + up immediately on failing to match "foo" the first time. The notation + is a kind of special parenthesis, starting with (?> as in this example: + + (?>\d+)foo + + Perl 5.28 introduced an experimental alphabetic form starting with (* + which may be easier to remember: + + (*atomic:\d+)foo + + This kind of parenthesized group "locks up" the part of the pattern it + contains once it has matched, and a failure further into the pattern is + prevented from backtracking into it. Backtracking past it to previous + items, however, works as normal. + + An alternative description is that a group of this type matches exactly + the string of characters that an identical standalone pattern would + match, if anchored at the current point in the subject string. + + Atomic groups are not capture groups. Simple cases such as the above + example can be thought of as a maximizing repeat that must swallow + everything it can. So, while both \d+ and \d+? are prepared to adjust + the number of digits they match in order to make the rest of the pat- + tern match, (?>\d+) can only match an entire sequence of digits. + + Atomic groups in general can of course contain arbitrarily complicated + expressions, and can be nested. However, when the contents of an atomic + group is just a single repeated item, as in the example above, a sim- + pler notation, called a "possessive quantifier" can be used. This con- + sists of an additional + character following a quantifier. Using this + notation, the previous example can be rewritten as + + \d++foo + + Note that a possessive quantifier can be used with an entire group, for + example: + + (abc|xyz){2,3}+ + + Possessive quantifiers are always greedy; the setting of the PCRE2_UN- + GREEDY option is ignored. They are a convenient notation for the sim- + pler forms of atomic group. However, there is no difference in the + meaning of a possessive quantifier and the equivalent atomic group, + though there may be a performance difference; possessive quantifiers + should be slightly faster. + + The possessive quantifier syntax is an extension to the Perl 5.8 syn- + tax. Jeffrey Friedl originated the idea (and the name) in the first + edition of his book. Mike McCloskey liked it, so implemented it when he + built Sun's Java package, and PCRE1 copied it from there. It found its + way into Perl at release 5.10. + + PCRE2 has an optimization that automatically "possessifies" certain + simple pattern constructs. For example, the sequence A+B is treated as + A++B because there is no point in backtracking into a sequence of A's + when B must follow. This feature can be disabled by the PCRE2_NO_AUTO- + POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS). + + When a pattern contains an unlimited repeat inside a group that can it- + self be repeated an unlimited number of times, the use of an atomic + group is the only way to avoid some failing matches taking a very long + time indeed. The pattern + + (\D+|<\d+>)*[!?] + + matches an unlimited number of substrings that either consist of non- + digits, or digits enclosed in <>, followed by either ! or ?. When it + matches, it runs quickly. However, if it is applied to + + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + it takes a long time before reporting failure. This is because the + string can be divided between the internal \D+ repeat and the external + * repeat in a large number of ways, and all have to be tried. (The ex- + ample uses [!?] rather than a single character at the end, because both + PCRE2 and Perl have an optimization that allows for fast failure when a + single character is used. They remember the last single character that + is required for a match, and fail early if it is not present in the + string.) If the pattern is changed so that it uses an atomic group, + like this: + + ((?>\D+)|<\d+>)*[!?] + + sequences of non-digits cannot be broken, and failure happens quickly. + + +BACKREFERENCES + + Outside a character class, a backslash followed by a digit greater than + 0 (and possibly further digits) is a backreference to a capture group + earlier (that is, to its left) in the pattern, provided there have been + that many previous capture groups. + + However, if the decimal number following the backslash is less than 8, + it is always taken as a backreference, and causes an error only if + there are not that many capture groups in the entire pattern. In other + words, the group that is referenced need not be to the left of the ref- + erence for numbers less than 8. A "forward backreference" of this type + can make sense when a repetition is involved and the group to the right + has participated in an earlier iteration. + + It is not possible to have a numerical "forward backreference" to a + group whose number is 8 or more using this syntax because a sequence + such as \50 is interpreted as a character defined in octal. See the + subsection entitled "Non-printing characters" above for further details + of the handling of digits following a backslash. Other forms of back- + referencing do not suffer from this restriction. In particular, there + is no problem when named capture groups are used (see below). + + Another way of avoiding the ambiguity inherent in the use of digits + following a backslash is to use the \g escape sequence. This escape + must be followed by a signed or unsigned number, optionally enclosed in + braces. These examples are all identical: + + (ring), \1 + (ring), \g1 + (ring), \g{1} + + An unsigned number specifies an absolute reference without the ambigu- + ity that is present in the older syntax. It is also useful when literal + digits follow the reference. A signed number is a relative reference. + Consider this example: + + (abc(def)ghi)\g{-1} + + The sequence \g{-1} is a reference to the capture group whose number is + one less than the number of the next group to be started, so in this + example (where the next group would be numbered 3) is it equivalent to + \2, and \g{-2} would be equivalent to \1. Note that if this construct + is inside a capture group, that group is included in the count, so in + this example \g{-2} also refers to group 1: + + (A)(\g{-2}B) + + The use of relative references can be helpful in long patterns, and + also in patterns that are created by joining together fragments that + contain references within themselves. + + The sequence \g{+1} is a reference to the next capture group that is + started after this item, and \g{+2} refers to the one after that, and + so on. This kind of forward reference can be useful in patterns that + repeat. Perl does not support the use of + in this way. + + A backreference matches whatever actually most recently matched the + capture group in the current subject string, rather than anything at + all that matches the group (see "Groups as subroutines" below for a way + of doing that). So the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If caseful matching is in force at the + time of the backreference, the case of letters is relevant. For exam- + ple, + + ((?i)rah)\s+\1 + + matches "rah rah" and "RAH RAH", but not "RAH rah", even though the + original capture group is matched caselessly. + + There are several different ways of writing backreferences to named + capture groups. The .NET syntax is \k{name}, the Python syntax is + (?=name), and the original Perl syntax is \k or \k'name'. All of + these are now supported by both Perl and PCRE2. Perl 5.10's unified + backreference syntax, in which \g can be used for both numeric and + named references, is also supported by PCRE2. We could rewrite the + above example in any of the following ways: + + (?(?i)rah)\s+\k + (?'p1'(?i)rah)\s+\k{p1} + (?P(?i)rah)\s+(?P=p1) + (?(?i)rah)\s+\g{p1} + + A capture group that is referenced by name may appear in the pattern + before or after the reference. + + There may be more than one backreference to the same group. If a group + has not actually been used in a particular match, backreferences to it + always fail by default. For example, the pattern + + (a|(bc))\2 + + always fails if it starts to match "a" rather than "bc". However, if + the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backref- + erence to an unset value matches an empty string. + + Because there may be many capture groups in a pattern, all digits fol- + lowing a backslash are taken as part of a potential backreference num- + ber. If the pattern continues with a digit character, some delimiter + must be used to terminate the backreference. If the PCRE2_EXTENDED or + PCRE2_EXTENDED_MORE option is set, this can be white space. Otherwise, + the \g{} syntax or an empty comment (see "Comments" below) can be used. + + Recursive backreferences + + A backreference that occurs inside the group to which it refers fails + when the group is first used, so, for example, (a\1) never matches. + However, such references can be useful inside repeated groups. For ex- + ample, the pattern + + (a|b\1)+ + + matches any number of "a"s and also "aba", "ababbaa" etc. At each iter- + ation of the group, the backreference matches the character string cor- + responding to the previous iteration. In order for this to work, the + pattern must be such that the first iteration does not need to match + the backreference. This can be done using alternation, as in the exam- + ple above, or by a quantifier with a minimum of zero. + + For versions of PCRE2 less than 10.25, backreferences of this type used + to cause the group that they reference to be treated as an atomic + group. This restriction no longer applies, and backtracking into such + groups can occur as normal. + + +ASSERTIONS + + An assertion is a test on the characters following or preceding the + current matching point that does not consume any characters. The simple + assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described + above. + + More complicated assertions are coded as parenthesized groups. There + are two kinds: those that look ahead of the current position in the + subject string, and those that look behind it, and in each case an as- + sertion may be positive (must match for the assertion to be true) or + negative (must not match for the assertion to be true). An assertion + group is matched in the normal way, and if it is true, matching contin- + ues after it, but with the matching position in the subject string re- + set to what it was before the assertion was processed. + + The Perl-compatible lookaround assertions are atomic. If an assertion + is true, but there is a subsequent matching failure, there is no back- + tracking into the assertion. However, there are some cases where non- + atomic assertions can be useful. PCRE2 has some support for these, de- + scribed in the section entitled "Non-atomic assertions" below, but they + are not Perl-compatible. + + A lookaround assertion may appear as the condition in a conditional + group (see below). In this case, the result of matching the assertion + determines which branch of the condition is followed. + + Assertion groups are not capture groups. If an assertion contains cap- + ture groups within it, these are counted for the purposes of numbering + the capture groups in the whole pattern. Within each branch of an as- + sertion, locally captured substrings may be referenced in the usual + way. For example, a sequence such as (.)\g{-1} can be used to check + that two adjacent characters are the same. + + When a branch within an assertion fails to match, any substrings that + were captured are discarded (as happens with any pattern branch that + fails to match). A negative assertion is true only when all its + branches fail to match; this means that no captured substrings are ever + retained after a successful negative assertion. When an assertion con- + tains a matching branch, what happens depends on the type of assertion. + + For a positive assertion, internally captured substrings in the suc- + cessful branch are retained, and matching continues with the next pat- + tern item after the assertion. For a negative assertion, a matching + branch means that the assertion is not true. If such an assertion is + being used as a condition in a conditional group (see below), captured + substrings are retained, because matching continues with the "no" + branch of the condition. For other failing negative assertions, control + passes to the previous backtracking point, thus discarding any captured + strings within the assertion. + + Most assertion groups may be repeated; though it makes no sense to as- + sert the same thing several times, the side effect of capturing in pos- + itive assertions may occasionally be useful. However, an assertion that + forms the condition for a conditional group may not be quantified. + PCRE2 used to restrict the repetition of assertions, but from release + 10.35 the only restriction is that an unlimited maximum repetition is + changed to be one more than the minimum. For example, {3,} is treated + as {3,4}. + + Alphabetic assertion names + + Traditionally, symbolic sequences such as (?= and (?<= have been used + to specify lookaround assertions. Perl 5.28 introduced some experimen- + tal alphabetic alternatives which might be easier to remember. They all + start with (* instead of (? and must be written using lower case let- + ters. PCRE2 supports the following synonyms: + + (*positive_lookahead: or (*pla: is the same as (?= + (*negative_lookahead: or (*nla: is the same as (?! + (*positive_lookbehind: or (*plb: is the same as (?<= + (*negative_lookbehind: or (*nlb: is the same as (? .*? \b\1\b ){2} + + For a subject such as "word1 word2 word3 word2 word3 word4" the result + is "word3". How does it work? At the start, ^(?x) anchors the pattern + and sets the "x" option, which causes white space (introduced for read- + ability) to be ignored. Inside the assertion, the greedy .* at first + consumes the entire string, but then has to backtrack until the rest of + the assertion can match a word, which is captured by group 1. In other + words, when the assertion first succeeds, it captures the right-most + word in the string. + + The current matching point is then reset to the start of the subject, + and the rest of the pattern match checks for two occurrences of the + captured word, using an ungreedy .*? to scan from the left. If this + succeeds, we are done, but if the last word in the string does not oc- + cur twice, this part of the pattern fails. If a traditional atomic + lookahead (?= or (*pla: had been used, the assertion could not be re- + entered, and the whole match would fail. The pattern would succeed only + if the very last word in the subject was found twice. + + Using a non-atomic lookahead, however, means that when the last word + does not occur twice in the string, the lookahead can backtrack and + find the second-last word, and so on, until either the match succeeds, + or all words have been tested. + + Two conditions must be met for a non-atomic assertion to be useful: the + contents of one or more capturing groups must change after a backtrack + into the assertion, and there must be a backreference to a changed + group later in the pattern. If this is not the case, the rest of the + pattern match fails exactly as before because nothing has changed, so + using a non-atomic assertion just wastes resources. + + There is one exception to backtracking into a non-atomic assertion. If + an (*ACCEPT) control verb is triggered, the assertion succeeds atomi- + cally. That is, a subsequent match failure cannot backtrack into the + assertion. + + Non-atomic assertions are not supported by the alternative matching + function pcre2_dfa_match(). They are supported by JIT, but only if they + do not contain any control verbs such as (*ACCEPT). (This may change in + future). Note that assertions that appear as conditions for conditional + groups (see below) must be atomic. + + +SCRIPT RUNS + + In concept, a script run is a sequence of characters that are all from + the same Unicode script such as Latin or Greek. However, because some + scripts are commonly used together, and because some diacritical and + other marks are used with multiple scripts, it is not that simple. + There is a full description of the rules that PCRE2 uses in the section + entitled "Script Runs" in the pcre2unicode documentation. + + If part of a pattern is enclosed between (*script_run: or (*sr: and a + closing parenthesis, it fails if the sequence of characters that it + matches are not a script run. After a failure, normal backtracking oc- + curs. Script runs can be used to detect spoofing attacks using charac- + ters that look the same, but are from different scripts. The string + "paypal.com" is an infamous example, where the letters could be a mix- + ture of Latin and Cyrillic. This pattern ensures that the matched char- + acters in a sequence of non-spaces that follow white space are a script + run: + + \s+(*sr:\S+) + + To be sure that they are all from the Latin script (for example), a + lookahead can be used: + + \s+(?=\p{Latin})(*sr:\S+) + + This works as long as the first character is expected to be a character + in that script, and not (for example) punctuation, which is allowed + with any script. If this is not the case, a more creative lookahead is + needed. For example, if digits, underscore, and dots are permitted at + the start: + + \s+(?=[0-9_.]*\p{Latin})(*sr:\S+) + + + In many cases, backtracking into a script run pattern fragment is not + desirable. The script run can employ an atomic group to prevent this. + Because this is a common requirement, a shorthand notation is provided + by (*atomic_script_run: or (*asr: + + (*asr:...) is the same as (*sr:(?>...)) + + Note that the atomic group is inside the script run. Putting it outside + would not prevent backtracking into the script run pattern. + + Support for script runs is not available if PCRE2 is compiled without + Unicode support. A compile-time error is given if any of the above con- + structs is encountered. Script runs are not supported by the alternate + matching function, pcre2_dfa_match() because they use the same mecha- + nism as capturing parentheses. + + Warning: The (*ACCEPT) control verb (see below) should not be used + within a script run group, because it causes an immediate exit from the + group, bypassing the script run checking. + + +CONDITIONAL GROUPS + + It is possible to cause the matching process to obey a pattern fragment + conditionally or to choose between two alternative fragments, depending + on the result of an assertion, or whether a specific capture group has + already been matched. The two possible forms of conditional group are: + + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) + + If the condition is satisfied, the yes-pattern is used; otherwise the + no-pattern (if present) is used. An absent no-pattern is equivalent to + an empty string (it always matches). If there are more than two alter- + natives in the group, a compile-time error occurs. Each of the two al- + ternatives may itself contain nested groups of any form, including con- + ditional groups; the restriction to two alternatives applies only at + the level of the condition itself. This pattern fragment is an example + where the alternatives are complex: + + (?(1) (A|B|C) | (D | (?(2)E|F) | E) ) + + + There are five kinds of condition: references to capture groups, refer- + ences to recursion, two pseudo-conditions called DEFINE and VERSION, + and assertions. + + Checking for a used capture group by number + + If the text between the parentheses consists of a sequence of digits, + the condition is true if a capture group of that number has previously + matched. If there is more than one capture group with the same number + (see the earlier section about duplicate group numbers), the condition + is true if any of them have matched. An alternative notation, which is + a PCRE2 extension, not supported by Perl, is to precede the digits with + a plus or minus sign. In this case, the group number is relative rather + than absolute. The most recently opened capture group (which could be + enclosing this condition) can be referenced by (?(-1), the next most + recent by (?(-2), and so on. Inside loops it can also make sense to re- + fer to subsequent groups. The next capture group to be opened can be + referenced as (?(+1), and so on. The value zero in any of these forms + is not used; it provokes a compile-time error. + + Consider the following pattern, which contains non-significant white + space to make it more readable (assume the PCRE2_EXTENDED option) and + to divide it into three parts for ease of discussion: + + ( \( )? [^()]+ (?(1) \) ) + + The first part matches an optional opening parenthesis, and if that + character is present, sets it as the first captured substring. The sec- + ond part matches one or more characters that are not parentheses. The + third part is a conditional group that tests whether or not the first + capture group matched. If it did, that is, if subject started with an + opening parenthesis, the condition is true, and so the yes-pattern is + executed and a closing parenthesis is required. Otherwise, since no- + pattern is not present, the conditional group matches nothing. In other + words, this pattern matches a sequence of non-parentheses, optionally + enclosed in parentheses. + + If you were embedding this pattern in a larger one, you could use a + relative reference: + + ...other stuff... ( \( )? [^()]+ (?(-1) \) ) ... + + This makes the fragment independent of the parentheses in the larger + pattern. + + Checking for a used capture group by name + + Perl uses the syntax (?()...) or (?('name')...) to test for a + used capture group by name. For compatibility with earlier versions of + PCRE1, which had this facility before Perl, the syntax (?(name)...) is + also recognized. Note, however, that undelimited names consisting of + the letter R followed by digits are ambiguous (see the following sec- + tion). Rewriting the above example to use a named group gives this: + + (? \( )? [^()]+ (?() \) ) + + If the name used in a condition of this kind is a duplicate, the test + is applied to all groups of the same name, and is true if any one of + them has matched. + + Checking for pattern recursion + + "Recursion" in this sense refers to any subroutine-like call from one + part of the pattern to another, whether or not it is actually recur- + sive. See the sections entitled "Recursive patterns" and "Groups as + subroutines" below for details of recursion and subroutine calls. + + If a condition is the string (R), and there is no capture group with + the name R, the condition is true if matching is currently in a recur- + sion or subroutine call to the whole pattern or any capture group. If + digits follow the letter R, and there is no group with that name, the + condition is true if the most recent call is into a group with the + given number, which must exist somewhere in the overall pattern. This + is a contrived example that is equivalent to a+b: + + ((?(R1)a+|(?1)b)) + + However, in both cases, if there is a capture group with a matching + name, the condition tests for its being set, as described in the sec- + tion above, instead of testing for recursion. For example, creating a + group with the name R1 by adding (?) to the above pattern com- + pletely changes its meaning. + + If a name preceded by ampersand follows the letter R, for example: + + (?(R&name)...) + + the condition is true if the most recent recursion is into a group of + that name (which must exist within the pattern). + + This condition does not check the entire recursion stack. It tests only + the current level. If the name used in a condition of this kind is a + duplicate, the test is applied to all groups of the same name, and is + true if any one of them is the most recent recursion. + + At "top level", all these recursion test conditions are false. + + Defining capture groups for use by reference only + + If the condition is the string (DEFINE), the condition is always false, + even if there is a group with the name DEFINE. In this case, there may + be only one alternative in the rest of the conditional group. It is al- + ways skipped if control reaches this point in the pattern; the idea of + DEFINE is that it can be used to define subroutines that can be refer- + enced from elsewhere. (The use of subroutines is described below.) For + example, a pattern to match an IPv4 address such as "192.168.23.245" + could be written like this (ignore white space and line breaks): + + (?(DEFINE) (? 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) ) + \b (?&byte) (\.(?&byte)){3} \b + + The first part of the pattern is a DEFINE group inside which another + group named "byte" is defined. This matches an individual component of + an IPv4 address (a number less than 256). When matching takes place, + this part of the pattern is skipped because DEFINE acts like a false + condition. The rest of the pattern uses references to the named group + to match the four dot-separated components of an IPv4 address, insist- + ing on a word boundary at each end. + + Checking the PCRE2 version + + Programs that link with a PCRE2 library can check the version by call- + ing pcre2_config() with appropriate arguments. Users of applications + that do not have access to the underlying code cannot do this. A spe- + cial "condition" called VERSION exists to allow such users to discover + which version of PCRE2 they are dealing with by using this condition to + match a string such as "yesno". VERSION must be followed either by "=" + or ">=" and a version number. For example: + + (?(VERSION>=10.4)yes|no) + + This pattern matches "yes" if the PCRE2 version is greater or equal to + 10.4, or "no" otherwise. The fractional part of the version number may + not contain more than two digits. + + Assertion conditions + + If the condition is not in any of the above formats, it must be a + parenthesized assertion. This may be a positive or negative lookahead + or lookbehind assertion. However, it must be a traditional atomic as- + sertion, not one of the non-atomic assertions. + + Consider this pattern, again containing non-significant white space, + and with the two alternatives on the second line: + + (?(?=[^a-z]*[a-z]) + \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) + + The condition is a positive lookahead assertion that matches an op- + tional sequence of non-letters followed by a letter. In other words, it + tests for the presence of at least one letter in the subject. If a let- + ter is found, the subject is matched against the first alternative; + otherwise it is matched against the second. This pattern matches + strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are + letters and dd are digits. + + When an assertion that is a condition contains capture groups, any cap- + turing that occurs in a matching branch is retained afterwards, for + both positive and negative assertions, because matching always contin- + ues after the assertion, whether it succeeds or fails. (Compare non- + conditional assertions, for which captures are retained only for posi- + tive assertions that succeed.) + + +COMMENTS + + There are two ways of including comments in patterns that are processed + by PCRE2. In both cases, the start of the comment must not be in a + character class, nor in the middle of any other sequence of related + characters such as (?: or a group name or number. The characters that + make up a comment play no part in the pattern matching. + + The sequence (?# marks the start of a comment that continues up to the + next closing parenthesis. Nested parentheses are not permitted. If the + PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # + character also introduces a comment, which in this case continues to + immediately after the next newline character or character sequence in + the pattern. Which characters are interpreted as newlines is controlled + by an option passed to the compiling function or by a special sequence + at the start of the pattern, as described in the section entitled "New- + line conventions" above. Note that the end of this type of comment is a + literal newline sequence in the pattern; escape sequences that happen + to represent a newline do not count. For example, consider this pattern + when PCRE2_EXTENDED is set, and the default newline convention (a sin- + gle linefeed character) is in force: + + abc #comment \n still comment + + On encountering the # character, pcre2_compile() skips along, looking + for a newline in the pattern. The sequence \n is still literal at this + stage, so it does not terminate the comment. Only an actual character + with the code value 0x0a (the default newline) does so. + + +RECURSIVE PATTERNS + + Consider the problem of matching a string in parentheses, allowing for + unlimited nested parentheses. Without the use of recursion, the best + that can be done is to use a pattern that matches up to some fixed + depth of nesting. It is not possible to handle an arbitrary nesting + depth. + + For some time, Perl has provided a facility that allows regular expres- + sions to recurse (amongst other things). It does this by interpolating + Perl code in the expression at run time, and the code can refer to the + expression itself. A Perl pattern using code interpolation to solve the + parentheses problem can be created like this: + + $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x; + + The (?p{...}) item interpolates Perl code at run time, and in this case + refers recursively to the pattern in which it appears. + + Obviously, PCRE2 cannot support the interpolation of Perl code. In- + stead, it supports special syntax for recursion of the entire pattern, + and also for individual capture group recursion. After its introduction + in PCRE1 and Python, this kind of recursion was subsequently introduced + into Perl at release 5.10. + + A special item that consists of (? followed by a number greater than + zero and a closing parenthesis is a recursive subroutine call of the + capture group of the given number, provided that it occurs inside that + group. (If not, it is a non-recursive subroutine call, which is de- + scribed in the next section.) The special item (?R) or (?0) is a recur- + sive call of the entire regular expression. + + This PCRE2 pattern solves the nested parentheses problem (assume the + PCRE2_EXTENDED option is set so that white space is ignored): + + \( ( [^()]++ | (?R) )* \) + + First it matches an opening parenthesis. Then it matches any number of + substrings which can either be a sequence of non-parentheses, or a re- + cursive match of the pattern itself (that is, a correctly parenthesized + substring). Finally there is a closing parenthesis. Note the use of a + possessive quantifier to avoid backtracking into sequences of non- + parentheses. + + If this were part of a larger pattern, you would not want to recurse + the entire pattern, so instead you could use this: + + ( \( ( [^()]++ | (?1) )* \) ) + + We have put the pattern into parentheses, and caused the recursion to + refer to them instead of the whole pattern. + + In a larger pattern, keeping track of parenthesis numbers can be + tricky. This is made easier by the use of relative references. Instead + of (?1) in the pattern above you can write (?-2) to refer to the second + most recently opened parentheses preceding the recursion. In other + words, a negative number counts capturing parentheses leftwards from + the point at which it is encountered. + + Be aware however, that if duplicate capture group numbers are in use, + relative references refer to the earliest group with the appropriate + number. Consider, for example: + + (?|(a)|(b)) (c) (?-2) + + The first two capture groups (a) and (b) are both numbered 1, and group + (c) is number 2. When the reference (?-2) is encountered, the second + most recently opened parentheses has the number 1, but it is the first + such group (the (a) group) to which the recursion refers. This would be + the same if an absolute reference (?1) was used. In other words, rela- + tive references are just a shorthand for computing a group number. + + It is also possible to refer to subsequent capture groups, by writing + references such as (?+2). However, these cannot be recursive because + the reference is not inside the parentheses that are referenced. They + are always non-recursive subroutine calls, as described in the next + section. + + An alternative approach is to use named parentheses. The Perl syntax + for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup- + ported. We could rewrite the above example as follows: + + (? \( ( [^()]++ | (?&pn) )* \) ) + + If there is more than one group with the same name, the earliest one is + used. + + The example pattern that we have been looking at contains nested unlim- + ited repeats, and so the use of a possessive quantifier for matching + strings of non-parentheses is important when applying the pattern to + strings that do not match. For example, when this pattern is applied to + + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() + + it yields "no match" quickly. However, if a possessive quantifier is + not used, the match runs for a very long time indeed because there are + so many different ways the + and * repeats can carve up the subject, + and all have to be tested before failure can be reported. + + At the end of a match, the values of capturing parentheses are those + from the outermost level. If you want to obtain intermediate values, a + callout function can be used (see below and the pcre2callout documenta- + tion). If the pattern above is matched against + + (ab(cd)ef) + + the value for the inner capturing parentheses (numbered 2) is "ef", + which is the last value taken on at the top level. If a capture group + is not matched at the top level, its final captured value is unset, + even if it was (temporarily) set at a deeper level during the matching + process. + + Do not confuse the (?R) item with the condition (R), which tests for + recursion. Consider this pattern, which matches text in angle brack- + ets, allowing for arbitrary nesting. Only digits are allowed in nested + brackets (that is, when recursing), whereas any characters are permit- + ted at the outer level. + + < (?: (?(R) \d++ | [^<>]*+) | (?R)) * > + + In this pattern, (?(R) is the start of a conditional group, with two + different alternatives for the recursive and non-recursive cases. The + (?R) item is the actual recursive call. + + Differences in recursion processing between PCRE2 and Perl + + Some former differences between PCRE2 and Perl no longer exist. + + Before release 10.30, recursion processing in PCRE2 differed from Perl + in that a recursive subroutine call was always treated as an atomic + group. That is, once it had matched some of the subject string, it was + never re-entered, even if it contained untried alternatives and there + was a subsequent matching failure. (Historical note: PCRE implemented + recursion before Perl did.) + + Starting with release 10.30, recursive subroutine calls are no longer + treated as atomic. That is, they can be re-entered to try unused alter- + natives if there is a matching failure later in the pattern. This is + now compatible with the way Perl works. If you want a subroutine call + to be atomic, you must explicitly enclose it in an atomic group. + + Supporting backtracking into recursions simplifies certain types of re- + cursive pattern. For example, this pattern matches palindromic strings: + + ^((.)(?1)\2|.?)$ + + The second branch in the group matches a single central character in + the palindrome when there are an odd number of characters, or nothing + when there are an even number of characters, but in order to work it + has to be able to try the second case when the rest of the pattern + match fails. If you want to match typical palindromic phrases, the pat- + tern has to ignore all non-word characters, which can be done like + this: + + ^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$ + + If run with the PCRE2_CASELESS option, this pattern matches phrases + such as "A man, a plan, a canal: Panama!". Note the use of the posses- + sive quantifier *+ to avoid backtracking into sequences of non-word + characters. Without this, PCRE2 takes a great deal longer (ten times or + more) to match typical phrases, and Perl takes so long that you think + it has gone into a loop. + + Another way in which PCRE2 and Perl used to differ in their recursion + processing is in the handling of captured values. Formerly in Perl, + when a group was called recursively or as a subroutine (see the next + section), it had no access to any values that were captured outside the + recursion, whereas in PCRE2 these values can be referenced. Consider + this pattern: + + ^(.)(\1|a(?2)) + + This pattern matches "bab". The first capturing parentheses match "b", + then in the second group, when the backreference \1 fails to match "b", + the second alternative matches "a" and then recurses. In the recursion, + \1 does now match "b" and so the whole match succeeds. This match used + to fail in Perl, but in later versions (I tried 5.024) it now works. + + +GROUPS AS SUBROUTINES + + If the syntax for a recursive group call (either by number or by name) + is used outside the parentheses to which it refers, it operates a bit + like a subroutine in a programming language. More accurately, PCRE2 + treats the referenced group as an independent subpattern which it tries + to match at the current matching position. The called group may be de- + fined before or after the reference. A numbered reference can be ab- + solute or relative, as in these examples: + + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... + + An earlier example pointed out that the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If instead the pattern + + (sens|respons)e and (?1)ibility + + is used, it does match "sense and responsibility" as well as the other + two strings. Another example is given in the discussion of DEFINE + above. + + Like recursions, subroutine calls used to be treated as atomic, but + this changed at PCRE2 release 10.30, so backtracking into subroutine + calls can now occur. However, any capturing parentheses that are set + during the subroutine call revert to their previous values afterwards. + + Processing options such as case-independence are fixed when a group is + defined, so if it is used as a subroutine, such options cannot be + changed for different calls. For example, consider this pattern: + + (abc)(?i:(?-1)) + + It matches "abcabc". It does not match "abcABC" because the change of + processing option does not affect the called group. + + The behaviour of backtracking control verbs in groups when called as + subroutines is described in the section entitled "Backtracking verbs in + subroutines" below. + + +ONIGURUMA SUBROUTINE SYNTAX + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for calling a group as a subroutine, possibly re- + cursively. Here are two of the examples used above, rewritten using + this syntax: + + (? \( ( (?>[^()]+) | \g )* \) ) + (sens|respons)e and \g'1'ibility + + PCRE2 supports an extension to Oniguruma: if a number is preceded by a + plus or a minus sign it is taken as a relative reference. For example: + + (abc)(?i:\g<-1>) + + Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not + synonymous. The former is a backreference; the latter is a subroutine + call. + + +CALLOUTS + + Perl has a feature whereby using the sequence (?{...}) causes arbitrary + Perl code to be obeyed in the middle of matching a regular expression. + This makes it possible, amongst other things, to extract different sub- + strings that match the same pair of parentheses when there is a repeti- + tion. + + PCRE2 provides a similar feature, but of course it cannot obey arbi- + trary Perl code. The feature is called "callout". The caller of PCRE2 + provides an external function by putting its entry point in a match + context using the function pcre2_set_callout(), and then passing that + context to pcre2_match() or pcre2_dfa_match(). If no match context is + passed, or if the callout entry point is set to NULL, callouts are dis- + abled. + + Within a regular expression, (?C) indicates a point at which the + external function is to be called. There are two kinds of callout: + those with a numerical argument and those with a string argument. (?C) + on its own with no argument is treated as (?C0). A numerical argument + allows the application to distinguish between different callouts. + String arguments were added for release 10.20 to make it possible for + script languages that use PCRE2 to embed short scripts within patterns + in a similar way to Perl. + + During matching, when PCRE2 reaches a callout point, the external func- + tion is called. It is provided with the number or string argument of + the callout, the position in the pattern, and one item of data that is + also set in the match block. The callout function may cause matching to + proceed, to backtrack, or to fail. + + By default, PCRE2 implements a number of optimizations at matching + time, and one side-effect is that sometimes callouts are skipped. If + you need all possible callouts to happen, you need to set options that + disable the relevant optimizations. More details, including a complete + description of the programming interface to the callout function, are + given in the pcre2callout documentation. + + Callouts with numerical arguments + + If you just want to have a means of identifying different callout + points, put a number less than 256 after the letter C. For example, + this pattern has two callout points: + + (?C1)abc(?C2)def + + If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical + callouts are automatically installed before each item in the pattern. + They are all numbered 255. If there is a conditional group in the pat- + tern whose condition is an assertion, an additional callout is inserted + just before the condition. An explicit callout may also be set at this + position, as in this example: + + (?(?C9)(?=a)abc|def) + + Note that this applies only to assertion conditions, not to other types + of condition. + + Callouts with string arguments + + A delimited string may be used instead of a number as a callout argu- + ment. The starting delimiter must be one of ` ' " ^ % # $ { and the + ending delimiter is the same as the start, except for {, where the end- + ing delimiter is }. If the ending delimiter is needed within the + string, it must be doubled. For example: + + (?C'ab ''c'' d')xyz(?C{any text})pqr + + The doubling is removed before the string is passed to the callout + function. + + +BACKTRACKING CONTROL + + There are a number of special "Backtracking Control Verbs" (to use + Perl's terminology) that modify the behaviour of backtracking during + matching. They are generally of the form (*VERB) or (*VERB:NAME). Some + verbs take either form, and may behave differently depending on whether + or not a name argument is present. The names are not required to be + unique within the pattern. + + By default, for compatibility with Perl, a name is any sequence of + characters that does not include a closing parenthesis. The name is not + processed in any way, and it is not possible to include a closing + parenthesis in the name. This can be changed by setting the + PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati- + ble. + + When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to + verb names and only an unescaped closing parenthesis terminates the + name. However, the only backslash items that are permitted are \Q, \E, + and sequences such as \x{100} that define character code points. Char- + acter type escapes such as \d are faulted. + + A closing parenthesis can be included in a name either as \) or between + \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED + or PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb + names is skipped, and #-comments are recognized, exactly as in the rest + of the pattern. PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect + verb names unless PCRE2_ALT_VERBNAMES is also set. + + The maximum length of a name is 255 in the 8-bit library and 65535 in + the 16-bit and 32-bit libraries. If the name is empty, that is, if the + closing parenthesis immediately follows the colon, the effect is as if + the colon were not there. Any number of these verbs may occur in a pat- + tern. Except for (*ACCEPT), they may not be quantified. + + Since these verbs are specifically related to backtracking, most of + them can be used only when the pattern is to be matched using the tra- + ditional matching function, because that uses a backtracking algorithm. + With the exception of (*FAIL), which behaves like a failing negative + assertion, the backtracking control verbs cause an error if encountered + by the DFA matching function. + + The behaviour of these verbs in repeated groups, assertions, and in + capture groups called as subroutines (whether or not recursively) is + documented below. + + Optimizations that affect backtracking verbs + + PCRE2 contains some optimizations that are used to speed up matching by + running some checks at the start of each match attempt. For example, it + may know the minimum length of matching subject, or that a particular + character must be present. When one of these optimizations bypasses the + running of a match, any included backtracking verbs will not, of + course, be processed. You can suppress the start-of-match optimizations + by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com- + pile(), or by starting the pattern with (*NO_START_OPT). There is more + discussion of this option in the section entitled "Compiling a pattern" + in the pcre2api documentation. + + Experiments with Perl suggest that it too has similar optimizations, + and like PCRE2, turning them off can change the result of a match. + + Verbs that act immediately + + The following verbs act as soon as they are encountered. + + (*ACCEPT) or (*ACCEPT:NAME) + + This verb causes the match to end successfully, skipping the remainder + of the pattern. However, when it is inside a capture group that is + called as a subroutine, only that group is ended successfully. Matching + then continues at the outer level. If (*ACCEPT) in triggered in a posi- + tive assertion, the assertion succeeds; in a negative assertion, the + assertion fails. + + If (*ACCEPT) is inside capturing parentheses, the data so far is cap- + tured. For example: + + A((?:A|B(*ACCEPT)|C)D) + + This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap- + tured by the outer parentheses. + + (*ACCEPT) is the only backtracking verb that is allowed to be quanti- + fied because an ungreedy quantification with a minimum of zero acts + only when a backtrack happens. Consider, for example, + + (A(*ACCEPT)??B)C + + where A, B, and C may be complex expressions. After matching "A", the + matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT) + is triggered and the match succeeds. In both cases, all but C is cap- + tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re- + peated (*ACCEPT) of this type means "succeed on backtrack". + + Warning: (*ACCEPT) should not be used within a script run group, be- + cause it causes an immediate exit from the group, bypassing the script + run checking. + + (*FAIL) or (*FAIL:NAME) + + This verb causes a matching failure, forcing backtracking to occur. It + may be abbreviated to (*F). It is equivalent to (?!) but easier to + read. The Perl documentation notes that it is probably useful only when + combined with (?{}) or (??{}). Those are, of course, Perl features that + are not present in PCRE2. The nearest equivalent is the callout fea- + ture, as for example in this pattern: + + a+(?C)(*FAIL) + + A match with the string "aaaa" always fails, but the callout is taken + before each backtrack happens (in this example, 10 times). + + (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC- + CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is + recorded just before the verb acts. + + Recording which path was taken + + There is one verb whose main purpose is to track how a match was ar- + rived at, though it also has a secondary use in conjunction with ad- + vancing the match starting point (see (*SKIP) below). + + (*MARK:NAME) or (*:NAME) + + A name is always required with this verb. For all the other backtrack- + ing control verbs, a NAME argument is optional. + + When a match succeeds, the name of the last-encountered mark name on + the matching path is passed back to the caller as described in the sec- + tion entitled "Other information about the match" in the pcre2api docu- + mentation. This applies to all instances of (*MARK) and other verbs, + including those inside assertions and atomic groups. However, there are + differences in those cases when (*MARK) is used in conjunction with + (*SKIP) as described below. + + The mark name that was last encountered on the matching path is passed + back. A verb without a NAME argument is ignored for this purpose. Here + is an example of pcre2test output, where the "mark" modifier requests + the retrieval and outputting of (*MARK) data: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B + + The (*MARK) name is tagged with "MK:" in this output, and in this exam- + ple it indicates which of the two alternatives matched. This is a more + efficient way of obtaining this information than putting each alterna- + tive in its own capturing parentheses. + + If a verb with a name is encountered in a positive assertion that is + true, the name is recorded and passed back if it is the last-encoun- + tered. This does not happen for negative assertions or failing positive + assertions. + + After a partial match or a failed match, the last encountered name in + the entire match process is returned. For example: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XP + No match, mark = B + + Note that in this unanchored example the mark is retained from the + match attempt that started at the letter "X" in the subject. Subsequent + match attempts starting at "P" and then with an empty string do not get + as far as the (*MARK) item, but nevertheless do not reset it. + + If you are interested in (*MARK) values after failed matches, you + should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to + ensure that the match is always attempted. + + Verbs that act after backtracking + + The following verbs do nothing when they are encountered. Matching con- + tinues with what follows, but if there is a subsequent match failure, + causing a backtrack to the verb, a failure is forced. That is, back- + tracking cannot pass to the left of the verb. However, when one of + these verbs appears inside an atomic group or in a lookaround assertion + that is true, its effect is confined to that group, because once the + group has been matched, there is never any backtracking into it. Back- + tracking from beyond an assertion or an atomic group ignores the entire + group, and seeks a preceding backtracking point. + + These verbs differ in exactly what kind of failure occurs when back- + tracking reaches them. The behaviour described below is what happens + when the verb is not in a subroutine or an assertion. Subsequent sec- + tions cover these special cases. + + (*COMMIT) or (*COMMIT:NAME) + + This verb causes the whole match to fail outright if there is a later + matching failure that causes backtracking to reach it. Even if the pat- + tern is unanchored, no further attempts to find a match by advancing + the starting point take place. If (*COMMIT) is the only backtracking + verb that is encountered, once it has been passed pcre2_match() is com- + mitted to finding a match at the current starting point, or not at all. + For example: + + a+(*COMMIT)b + + This matches "xxaab" but not "aacaab". It can be thought of as a kind + of dynamic anchor, or "I've started, so I must finish." + + The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM- + MIT). It is like (*MARK:NAME) in that the name is remembered for pass- + ing back to the caller. However, (*SKIP:NAME) searches only for names + that are set with (*MARK), ignoring those set by any of the other back- + tracking verbs. + + If there is more than one backtracking verb in a pattern, a different + one that follows (*COMMIT) may be triggered first, so merely passing + (*COMMIT) during a match does not always guarantee that a match must be + at this starting point. + + Note that (*COMMIT) at the start of a pattern is not the same as an an- + chor, unless PCRE2's start-of-match optimizations are turned off, as + shown in this output from pcre2test: + + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> + re> /(*COMMIT)abc/no_start_optimize + data> xyzabc + No match + + For the first pattern, PCRE2 knows that any match must start with "a", + so the optimization skips along the subject to "a" before applying the + pattern to the first set of data. The match attempt then succeeds. The + second pattern disables the optimization that skips along to the first + character. The pattern is now applied starting at "x", and so the + (*COMMIT) causes the match to fail without trying any other starting + points. + + (*PRUNE) or (*PRUNE:NAME) + + This verb causes the match to fail at the current starting position in + the subject if there is a later matching failure that causes backtrack- + ing to reach it. If the pattern is unanchored, the normal "bumpalong" + advance to the next starting character then happens. Backtracking can + occur as usual to the left of (*PRUNE), before it is reached, or when + matching to the right of (*PRUNE), but if there is no match to the + right, backtracking cannot cross (*PRUNE). In simple cases, the use of + (*PRUNE) is just an alternative to an atomic group or possessive quan- + tifier, but there are some uses of (*PRUNE) that cannot be expressed in + any other way. In an anchored pattern (*PRUNE) has the same effect as + (*COMMIT). + + The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). + It is like (*MARK:NAME) in that the name is remembered for passing back + to the caller. However, (*SKIP:NAME) searches only for names set with + (*MARK), ignoring those set by other backtracking verbs. + + (*SKIP) + + This verb, when given without a name, is like (*PRUNE), except that if + the pattern is unanchored, the "bumpalong" advance is not to the next + character, but to the position in the subject where (*SKIP) was encoun- + tered. (*SKIP) signifies that whatever text was matched leading up to + it cannot be part of a successful match if there is a later mismatch. + Consider: + + a+(*SKIP)b + + If the subject is "aaaac...", after the first match attempt fails + (starting at the first character in the string), the starting point + skips on to start the next attempt at "c". Note that a possessive quan- + tifier does not have the same effect as this example; although it would + suppress backtracking during the first match attempt, the second at- + tempt would start at the second character instead of skipping on to + "c". + + If (*SKIP) is used to specify a new starting position that is the same + as the starting position of the current match, or (by being inside a + lookbehind) earlier, the position specified by (*SKIP) is ignored, and + instead the normal "bumpalong" occurs. + + (*SKIP:NAME) + + When (*SKIP) has an associated name, its behaviour is modified. When + such a (*SKIP) is triggered, the previous path through the pattern is + searched for the most recent (*MARK) that has the same name. If one is + found, the "bumpalong" advance is to the subject position that corre- + sponds to that (*MARK) instead of to where (*SKIP) was encountered. If + no (*MARK) with a matching name is found, the (*SKIP) is ignored. + + The search for a (*MARK) name uses the normal backtracking mechanism, + which means that it does not see (*MARK) settings that are inside + atomic groups or assertions, because they are never re-entered by back- + tracking. Compare the following pcre2test examples: + + re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: a + 1: a + data: + re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: b + 1: b + + In the first example, the (*MARK) setting is in an atomic group, so it + is not seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. + This allows the second branch of the pattern to be tried at the first + character position. In the second example, the (*MARK) setting is not + in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it + backtracks, and this causes a new matching attempt to start at the sec- + ond character. This time, the (*MARK) is never seen because "a" does + not match "b", so the matcher immediately jumps to the second branch of + the pattern. + + Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It + ignores names that are set by other backtracking verbs. + + (*THEN) or (*THEN:NAME) + + This verb causes a skip to the next innermost alternative when back- + tracking reaches it. That is, it cancels any further backtracking + within the current alternative. Its name comes from the observation + that it can be used for a pattern-based if-then-else block: + + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... + + If the COND1 pattern matches, FOO is tried (and possibly further items + after the end of the group if FOO succeeds); on failure, the matcher + skips to the second alternative and tries COND2, without backtracking + into COND1. If that succeeds and BAR fails, COND3 is tried. If subse- + quently BAZ fails, there are no more alternatives, so there is a back- + track to whatever came before the entire group. If (*THEN) is not in- + side an alternation, it acts like (*PRUNE). + + The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). + It is like (*MARK:NAME) in that the name is remembered for passing back + to the caller. However, (*SKIP:NAME) searches only for names set with + (*MARK), ignoring those set by other backtracking verbs. + + A group that does not contain a | character is just a part of the en- + closing alternative; it is not a nested alternation with only one al- + ternative. The effect of (*THEN) extends beyond such a group to the en- + closing alternative. Consider this pattern, where A, B, etc. are com- + plex pattern fragments that do not contain any | characters at this + level: + + A (B(*THEN)C) | D + + If A and B are matched, but there is a failure in C, matching does not + backtrack into A; instead it moves to the next alternative, that is, D. + However, if the group containing (*THEN) is given an alternative, it + behaves differently: + + A (B(*THEN)C | (*FAIL)) | D + + The effect of (*THEN) is now confined to the inner group. After a fail- + ure in C, matching moves to (*FAIL), which causes the whole group to + fail because there are no more alternatives to try. In this case, + matching does backtrack into A. + + Note that a conditional group is not considered as having two alterna- + tives, because only one is ever used. In other words, the | character + in a conditional group has a different meaning. Ignoring white space, + consider: + + ^.*? (?(?=a) a | b(*THEN)c ) + + If the subject is "ba", this pattern does not match. Because .*? is un- + greedy, it initially matches zero characters. The condition (?=a) then + fails, the character "b" is matched, but "c" is not. At this point, + matching does not backtrack to .*? as might perhaps be expected from + the presence of the | character. The conditional group is part of the + single alternative that comprises the whole pattern, and so the match + fails. (If there was a backtrack into .*?, allowing it to match "b", + the match would succeed.) + + The verbs just described provide four different "strengths" of control + when subsequent matching fails. (*THEN) is the weakest, carrying on the + match at the next alternative. (*PRUNE) comes next, failing the match + at the current starting position, but allowing an advance to the next + character (for an unanchored pattern). (*SKIP) is similar, except that + the advance may be more than one character. (*COMMIT) is the strongest, + causing the entire match to fail. + + More than one backtracking verb + + If more than one backtracking verb is present in a pattern, the one + that is backtracked onto first acts. For example, consider this pat- + tern, where A, B, etc. are complex pattern fragments: + + (A(*COMMIT)B(*THEN)C|ABD) + + If A matches but B fails, the backtrack to (*COMMIT) causes the entire + match to fail. However, if A and B match, but C fails, the backtrack to + (*THEN) causes the next alternative (ABD) to be tried. This behaviour + is consistent, but is not always the same as Perl's. It means that if + two or more backtracking verbs appear in succession, all but the last + of them has no effect. Consider this example: + + ...(*COMMIT)(*PRUNE)... + + If there is a matching failure to the right, backtracking onto (*PRUNE) + causes it to be triggered, and its action is taken. There can never be + a backtrack onto (*COMMIT). + + Backtracking verbs in repeated groups + + PCRE2 sometimes differs from Perl in its handling of backtracking verbs + in repeated groups. For example, consider: + + /(a(*COMMIT)b)+ac/ + + If the subject is "abac", Perl matches unless its optimizations are + disabled, but PCRE2 always fails because the (*COMMIT) in the second + repeat of the group acts. + + Backtracking verbs in assertions + + (*FAIL) in any assertion has its normal effect: it forces an immediate + backtrack. The behaviour of the other backtracking verbs depends on + whether or not the assertion is standalone or acting as the condition + in a conditional group. + + (*ACCEPT) in a standalone positive assertion causes the assertion to + succeed without any further processing; captured strings and a mark + name (if set) are retained. In a standalone negative assertion, (*AC- + CEPT) causes the assertion to fail without any further processing; cap- + tured substrings and any mark name are discarded. + + If the assertion is a condition, (*ACCEPT) causes the condition to be + true for a positive assertion and false for a negative one; captured + substrings are retained in both cases. + + The remaining verbs act only when a later failure causes a backtrack to + reach them. This means that, for the Perl-compatible assertions, their + effect is confined to the assertion, because Perl lookaround assertions + are atomic. A backtrack that occurs after such an assertion is complete + does not jump back into the assertion. Note in particular that a + (*MARK) name that is set in an assertion is not "seen" by an instance + of (*SKIP:NAME) later in the pattern. + + PCRE2 now supports non-atomic positive assertions, as described in the + section entitled "Non-atomic assertions" above. These assertions must + be standalone (not used as conditions). They are not Perl-compatible. + For these assertions, a later backtrack does jump back into the asser- + tion, and therefore verbs such as (*COMMIT) can be triggered by back- + tracks from later in the pattern. + + The effect of (*THEN) is not allowed to escape beyond an assertion. If + there are no more branches to try, (*THEN) causes a positive assertion + to be false, and a negative assertion to be true. + + The other backtracking verbs are not treated specially if they appear + in a standalone positive assertion. In a conditional positive asser- + tion, backtracking (from within the assertion) into (*COMMIT), (*SKIP), + or (*PRUNE) causes the condition to be false. However, for both stand- + alone and conditional negative assertions, backtracking into (*COMMIT), + (*SKIP), or (*PRUNE) causes the assertion to be true, without consider- + ing any further alternative branches. + + Backtracking verbs in subroutines + + These behaviours occur whether or not the group is called recursively. + + (*ACCEPT) in a group called as a subroutine causes the subroutine match + to succeed without any further processing. Matching then continues af- + ter the subroutine call. Perl documents this behaviour. Perl's treat- + ment of the other verbs in subroutines is different in some cases. + + (*FAIL) in a group called as a subroutine has its normal effect: it + forces an immediate backtrack. + + (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail + when triggered by being backtracked to in a group called as a subrou- + tine. There is then a backtrack at the outer level. + + (*THEN), when triggered, skips to the next alternative in the innermost + enclosing group that has alternatives (its normal behaviour). However, + if there is no such group within the subroutine's group, the subroutine + match fails and there is a backtrack at the outer level. + + +SEE ALSO + + pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 04 June 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.44 04 June 2024 PCRE2PATTERN(3) +------------------------------------------------------------------------------ + + + +PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 PERFORMANCE + + Two aspects of performance are discussed below: memory usage and pro- + cessing time. The way you express your pattern as a regular expression + can affect both of them. + + +COMPILED PATTERN MEMORY USAGE + + Patterns are compiled by PCRE2 into a reasonably efficient interpretive + code, so that most simple patterns do not use much memory for storing + the compiled version. However, there is one case where the memory usage + of a compiled pattern can be unexpectedly large. If a parenthesized + group has a quantifier with a minimum greater than 1 and/or a limited + maximum, the whole group is repeated in the compiled code. For example, + the pattern + + (abc|def){2,4} + + is compiled as if it were + + (abc|def)(abc|def)((abc|def)(abc|def)?)? + + (Technical aside: It is done this way so that backtrack points within + each of the repetitions can be independently maintained.) + + For regular expressions whose quantifiers use only small numbers, this + is not usually a problem. However, if the numbers are large, and par- + ticularly if such repetitions are nested, the memory usage can become + an embarrassment. For example, the very simple pattern + + ((ab){1,1000}c){1,3} + + uses over 50KiB when compiled using the 8-bit library. When PCRE2 is + compiled with its default internal pointer size of two bytes, the size + limit on a compiled pattern is 65535 code units in the 8-bit and 16-bit + libraries, and this is reached with the above pattern if the outer rep- + etition is increased from 3 to 4. PCRE2 can be compiled to use larger + internal pointers and thus handle larger compiled patterns, but it is + better to try to rewrite your pattern to use less memory if you can. + + One way of reducing the memory usage for such patterns is to make use + of PCRE2's "subroutine" facility. Re-writing the above pattern as + + ((ab)(?2){0,999}c)(?1){0,2} + + reduces the memory requirements to around 16KiB, and indeed it remains + under 20KiB even with the outer repetition increased to 100. However, + this kind of pattern is not always exactly equivalent, because any cap- + tures within subroutine calls are lost when the subroutine completes. + If this is not a problem, this kind of rewriting will allow you to + process patterns that PCRE2 cannot otherwise handle. The matching per- + formance of the two different versions of the pattern are roughly the + same. (This applies from release 10.30 - things were different in ear- + lier releases.) + + +STACK AND HEAP USAGE AT RUN TIME + + From release 10.30, the interpretive (non-JIT) version of pcre2_match() + uses very little system stack at run time. In earlier releases recur- + sive function calls could use a great deal of stack, and this could + cause problems, but this usage has been eliminated. Backtracking posi- + tions are now explicitly remembered in memory frames controlled by the + code. + + The size of each frame depends on the size of pointer variables and the + number of capturing parenthesized groups in the pattern being matched. + On a 64-bit system the frame size for a pattern with no captures is 128 + bytes. For each capturing group the size increases by 16 bytes. + + Until release 10.41, an initial 20KiB frames vector was allocated on + the system stack, but this still caused some issues for multi-thread + applications where each thread has a very small stack. From release + 10.41 backtracking memory frames are always held in heap memory. An + initial heap allocation is obtained the first time any match data block + is passed to pcre2_match(). This is remembered with the match data + block and re-used if that block is used for another match. It is freed + when the match data block itself is freed. + + The size of the initial block is the larger of 20KiB or ten times the + pattern's frame size, unless the heap limit is less than this, in which + case the heap limit is used. If the initial block proves to be too + small during matching, it is replaced by a larger block, subject to the + heap limit. The heap limit is checked only when a new block is to be + allocated. Reducing the heap limit between calls to pcre2_match() with + the same match data block does not affect the saved block. + + In contrast to pcre2_match(), pcre2_dfa_match() does use recursive + function calls, but only for processing atomic groups, lookaround as- + sertions, and recursion within the pattern. The original version of the + code used to allocate quite large internal workspace vectors on the + stack, which caused some problems for some patterns in environments + with small stacks. From release 10.32 the code for pcre2_dfa_match() + has been re-factored to use heap memory when necessary for internal + workspace when recursing, though recursive function calls are still + used. + + The "match depth" parameter can be used to limit the depth of function + recursion, and the "match heap" parameter to limit heap memory in + pcre2_dfa_match(). + + +PROCESSING TIME + + Certain items in regular expression patterns are processed more effi- + ciently than others. It is more efficient to use a character class like + [aeiou] than a set of single-character alternatives such as + (a|e|i|o|u). In general, the simplest construction that provides the + required behaviour is usually the most efficient. Jeffrey Friedl's book + contains a lot of useful general discussion about optimizing regular + expressions for efficient performance. This document contains a few ob- + servations about PCRE2. + + Using Unicode character properties (the \p, \P, and \X escapes) is + slow, because PCRE2 has to use a multi-stage table lookup whenever it + needs a character's property. If you can find an alternative pattern + that does not use character properties, it will probably be faster. + + By default, the escape sequences \b, \d, \s, and \w, and the POSIX + character classes such as [:alpha:] do not use Unicode properties, + partly for backwards compatibility, and partly for performance reasons. + However, you can set the PCRE2_UCP option or start the pattern with + (*UCP) if you want Unicode character properties to be used. This can + double the matching time for items such as \d, when matched with + pcre2_match(); the performance loss is less with a DFA matching func- + tion, and in both cases there is not much difference for \b. + + When a pattern begins with .* not in atomic parentheses, nor in paren- + theses that are the subject of a backreference, and the PCRE2_DOTALL + option is set, the pattern is implicitly anchored by PCRE2, since it + can match only at the start of a subject string. If the pattern has + multiple top-level branches, they must all be anchorable. The optimiza- + tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is au- + tomatically disabled if the pattern contains (*PRUNE) or (*SKIP). + + If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, be- + cause the dot metacharacter does not then match a newline, and if the + subject string contains newlines, the pattern may match from the char- + acter immediately following one of them instead of from the very start. + For example, the pattern + + .*second + + matches the subject "first\nand second" (where \n stands for a newline + character), with the match starting at the seventh character. In order + to do this, PCRE2 has to retry the match starting after every newline + in the subject. + + If you are using such a pattern with subject strings that do not con- + tain newlines, the best performance is obtained by setting + PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate ex- + plicit anchoring. That saves PCRE2 from having to scan along the sub- + ject looking for a newline to restart at. + + Beware of patterns that contain nested indefinite repeats. These can + take a long time to run when applied to a string that does not match. + Consider the pattern fragment + + ^(a+)* + + This can match "aaaa" in 16 different ways, and this number increases + very rapidly as the string gets longer. (The * repeat can match 0, 1, + 2, 3, or 4 times, and for each of those cases other than 0 or 4, the + + repeats can match different numbers of times.) When the remainder of + the pattern is such that the entire match is going to fail, PCRE2 has + in principle to try every possible variation, and this can take an ex- + tremely long time, even for relatively short strings. + + An optimization catches some of the more simple cases such as + + (a+)*b + + where a literal character follows. Before embarking on the standard + matching procedure, PCRE2 checks that there is a "b" later in the sub- + ject string, and if there is not, it fails the match immediately. How- + ever, when there is no following literal this optimization cannot be + used. You can see the difference by comparing the behaviour of + + (a+)*\d + + with the pattern above. The former gives a failure almost instantly + when applied to a whole line of "a" characters, whereas the latter + takes an appreciable time with strings longer than about 20 characters. + + In many cases, the solution to this kind of performance issue is to use + an atomic group or a possessive quantifier. This can often reduce mem- + ory requirements as well. As another example, consider this pattern: + + ([^<]|<(?!inet))+ + + It matches from wherever it starts until it encounters " + + int pcre2_regcomp(regex_t *preg, const char *pattern, + int cflags); + + int pcre2_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + + size_t pcre2_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); + + void pcre2_regfree(regex_t *preg); + + +DESCRIPTION + + This set of functions provides a POSIX-style API for the PCRE2 regular + expression 8-bit library. There are no POSIX-style wrappers for PCRE2's + 16-bit and 32-bit libraries. See the pcre2api documentation for a de- + scription of PCRE2's native API, which contains much additional func- + tionality. + + IMPORTANT NOTE: The functions described here are NOT thread-safe, and + should not be used in multi-threaded applications. They are also lim- + ited to processing subjects that are not bigger than 2GB. Use the na- + tive API instead. + + These functions are wrapper functions that ultimately call the PCRE2 + native API. Their prototypes are defined in the pcre2posix.h header + file, and they all have unique names starting with pcre2_. However, the + pcre2posix.h header also contains macro definitions that convert the + standard POSIX names such regcomp() into pcre2_regcomp() etc. This + means that a program can use the usual POSIX names without running the + risk of accidentally linking with POSIX functions from a different li- + brary. + + On Unix-like systems the PCRE2 POSIX library is called libpcre2-posix, + so can be accessed by adding -lpcre2-posix to the command for linking + an application. Because the POSIX functions call the native ones, it is + also necessary to add -lpcre2-8. + + On Windows systems, if you are linking to a DLL version of the library, + it is recommended that PCRE2POSIX_SHARED is defined before including + the pcre2posix.h header, as it will allow for a more efficient way to + invoke the functions by adding the __declspec(dllimport) decorator. + + Although they were not defined as prototypes in pcre2posix.h, releases + 10.33 to 10.36 of the library contained functions with the POSIX names + regcomp() etc. These simply passed their arguments to the PCRE2 func- + tions. These functions were provided for backwards compatibility with + earlier versions of PCRE2, which had only POSIX names. However, this + has proved troublesome in situations where a program links with several + libraries, some of which use PCRE2's POSIX interface while others use + the real POSIX functions. For this reason, the POSIX names have been + removed since release 10.37. + + Calling the header file pcre2posix.h avoids any conflict with other + POSIX libraries. It can, of course, be renamed or aliased as regex.h, + which is the "correct" name, if there is no clash. It provides two + structure types, regex_t for compiled internal forms, and regmatch_t + for returning captured substrings. It also defines some constants whose + names start with "REG_"; these are used for setting options and identi- + fying error codes. + + +USING THE POSIX FUNCTIONS + + Note that these functions are just POSIX-style wrappers for PCRE2's na- + tive API. They do not give POSIX regular expression behaviour, and + they are not thread-safe or even POSIX compatible. + + Those POSIX option bits that can reasonably be mapped to PCRE2 native + options have been implemented. In addition, the option REG_EXTENDED is + defined with the value zero. This has no effect, but since programs + that are written to the POSIX interface often use it, this makes it + easier to slot in PCRE2 as a replacement library. Other POSIX options + are not even defined. + + There are also some options that are not defined by POSIX. These have + been added at the request of users who want to make use of certain + PCRE2-specific features via the POSIX calling interface or to add BSD + or GNU functionality. + + When PCRE2 is called via these functions, it is only the API that is + POSIX-like in style. The syntax and semantics of the regular expres- + sions themselves are still those of Perl, subject to the setting of + various PCRE2 options, as described below. "POSIX-like in style" means + that the API approximates to the POSIX definition; it is not fully + POSIX-compatible, and in multi-unit encoding domains it is probably + even less compatible. + + The descriptions below use the actual names of the functions, but, as + described above, the standard POSIX names (without the pcre2_ prefix) + may also be used. + + +COMPILING A PATTERN + + The function pcre2_regcomp() is called to compile a pattern into an in- + ternal form. By default, the pattern is a C string terminated by a bi- + nary zero (but see REG_PEND below). The preg argument is a pointer to a + regex_t structure that is used as a base for storing information about + the compiled regular expression. It is also used for input when + REG_PEND is set. The regex_t structure used by pcre2_regcomp() is de- + fined in pcre2posix.h and is not the same as the structure used by + other libraries that provide POSIX-style matching. + + The argument cflags is either zero, or contains one or more of the bits + defined by the following macros: + + REG_DOTALL + + The PCRE2_DOTALL option is set when the regular expression is passed + for compilation to the native function. Note that REG_DOTALL is not + part of the POSIX standard. + + REG_ICASE + + The PCRE2_CASELESS option is set when the regular expression is passed + for compilation to the native function. + + REG_NEWLINE + + The PCRE2_MULTILINE option is set when the regular expression is passed + for compilation to the native function. Note that this does not mimic + the defined POSIX behaviour for REG_NEWLINE (see the following sec- + tion). + + REG_NOSPEC + + The PCRE2_LITERAL option is set when the regular expression is passed + for compilation to the native function. This disables all meta charac- + ters in the pattern, causing it to be treated as a literal string. The + only other options that are allowed with REG_NOSPEC are REG_ICASE, + REG_NOSUB, REG_PEND, and REG_UTF. Note that REG_NOSPEC is not part of + the POSIX standard. + + REG_NOSUB + + When a pattern that is compiled with this flag is passed to + pcre2_regexec() for matching, the nmatch and pmatch arguments are ig- + nored, and no captured strings are returned. Versions of the PCRE li- + brary prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile op- + tion, but this no longer happens because it disables the use of back- + references. + + REG_PEND + + If this option is set, the reg_endp field in the preg structure (which + has the type const char *) must be set to point to the character beyond + the end of the pattern before calling pcre2_regcomp(). The pattern it- + self may now contain binary zeros, which are treated as data charac- + ters. Without REG_PEND, a binary zero terminates the pattern and the + re_endp field is ignored. This is a GNU extension to the POSIX standard + and should be used with caution in software intended to be portable to + other systems. + + REG_UCP + + The PCRE2_UCP option is set when the regular expression is passed for + compilation to the native function. This causes PCRE2 to use Unicode + properties when matching \d, \w, etc., instead of just recognizing + ASCII values. Note that REG_UCP is not part of the POSIX standard. + + REG_UNGREEDY + + The PCRE2_UNGREEDY option is set when the regular expression is passed + for compilation to the native function. Note that REG_UNGREEDY is not + part of the POSIX standard. + + REG_UTF + + The PCRE2_UTF option is set when the regular expression is passed for + compilation to the native function. This causes the pattern itself and + all data strings used for matching it to be treated as UTF-8 strings. + Note that REG_UTF is not part of the POSIX standard. + + In the absence of these flags, no options are passed to the native + function. This means that the regex is compiled with PCRE2 default se- + mantics. In particular, the way it handles newline characters in the + subject string is the Perl way, not the POSIX way. Note that setting + PCRE2_MULTILINE has only some of the effects specified for REG_NEWLINE. + It does not affect the way newlines are matched by the dot metacharac- + ter (they are not) or by a negative class such as [^a] (they are). + + The yield of pcre2_regcomp() is zero on success, and non-zero other- + wise. The preg structure is filled in on success, and one other member + of the structure (as well as re_endp) is public: re_nsub contains the + number of capturing subpatterns in the regular expression. Various er- + ror codes are defined in the header file. + + NOTE: If the yield of pcre2_regcomp() is non-zero, you must not attempt + to use the contents of the preg structure. If, for example, you pass it + to pcre2_regexec(), the result is undefined and your program is likely + to crash. + + +MATCHING NEWLINE CHARACTERS + + This area is not simple, because POSIX and Perl take different views of + things. It is not possible to get PCRE2 to obey POSIX semantics, but + then PCRE2 was never intended to be a POSIX engine. The following table + lists the different possibilities for matching newline characters in + Perl and PCRE2: + + Default Change with + + . matches newline no PCRE2_DOTALL + newline matches [^a] yes not changeable + $ matches \n at end yes PCRE2_DOLLAR_ENDONLY + $ matches \n in middle no PCRE2_MULTILINE + ^ matches \n in middle no PCRE2_MULTILINE + + This is the equivalent table for a POSIX-compatible pattern matcher: + + Default Change with + + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \n at end no REG_NEWLINE + $ matches \n in middle no REG_NEWLINE + ^ matches \n in middle no REG_NEWLINE + + This behaviour is not what happens when PCRE2 is called via its POSIX + API. By default, PCRE2's behaviour is the same as Perl's, except that + there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 + and Perl, there is no way to stop newline from matching [^a]. + + Default POSIX newline handling can be obtained by setting PCRE2_DOTALL + and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but + there is no way to make PCRE2 behave exactly as for the REG_NEWLINE ac- + tion. When using the POSIX API, passing REG_NEWLINE to PCRE2's + pcre2_regcomp() function causes PCRE2_MULTILINE to be passed to + pcre2_compile(), and REG_DOTALL passes PCRE2_DOTALL. There is no way to + pass PCRE2_DOLLAR_ENDONLY. + + +MATCHING A PATTERN + + The function pcre2_regexec() is called to match a compiled pattern preg + against a given string, which is by default terminated by a zero byte + (but see REG_STARTEND below), subject to the options in eflags. These + can be: + + REG_NOTBOL + + The PCRE2_NOTBOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_NOTEMPTY + + The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 + matching function. Note that REG_NOTEMPTY is not part of the POSIX + standard. However, setting this option can give more POSIX-like behav- + iour in some situations. + + REG_NOTEOL + + The PCRE2_NOTEOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_STARTEND + + When this option is set, the subject string starts at string + + pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should + point to the first character beyond the string. There may be binary ze- + ros within the subject string, and indeed, using REG_STARTEND is the + only way to pass a subject string that contains a binary zero. + + Whatever the value of pmatch[0].rm_so, the offsets of the matched + string and any captured substrings are still given relative to the + start of string itself. (Before PCRE2 release 10.30 these were given + relative to string + pmatch[0].rm_so, but this differs from other im- + plementations.) + + This is a BSD extension, compatible with but not specified by IEEE + Standard 1003.2 (POSIX.2), and should be used with caution in software + intended to be portable to other systems. Note that a non-zero rm_so + does not imply REG_NOTBOL; REG_STARTEND affects only the location and + length of the string, not how it is matched. Setting REG_STARTEND and + passing pmatch as NULL are mutually exclusive; the error REG_INVARG is + returned. + + If the pattern was compiled with the REG_NOSUB flag, no data about any + matched strings is returned. The nmatch and pmatch arguments of + pcre2_regexec() are ignored (except possibly as input for REG_STAR- + TEND). + + The value of nmatch may be zero, and the value pmatch may be NULL (un- + less REG_STARTEND is set); in both these cases no data about any + matched strings is returned. + + Otherwise, the portion of the string that was matched, and also any + captured substrings, are returned via the pmatch argument, which points + to an array of nmatch structures of type regmatch_t, containing the + members rm_so and rm_eo. These contain the byte offset to the first + character of each substring and the offset to the first character after + the end of each substring, respectively. The 0th element of the vector + relates to the entire portion of string that was matched; subsequent + elements relate to the capturing subpatterns of the regular expression. + Unused entries in the array have both structure members set to -1. + + regmatch_t as well as the regoff_t typedef it uses are defined in + pcre2posix.h and are not warranted to have the same size or layout as + other similarly named types from other libraries that provide POSIX- + style matching. + + A successful match yields a zero return; various error codes are de- + fined in the header file, of which REG_NOMATCH is the "expected" fail- + ure code. + + +ERROR MESSAGES + + The pcre2_regerror() function maps a non-zero errorcode from either + pcre2_regcomp() or pcre2_regexec() to a printable message. If preg is + not NULL, the error should have arisen from the use of that structure. + A message terminated by a binary zero is placed in errbuf. If the + buffer is too short, only the first errbuf_size - 1 characters of the + error message are used. The yield of the function is the size of buffer + needed to hold the whole message, including the terminating zero. This + value is greater than errbuf_size if the message was truncated. + + +MEMORY USAGE + + Compiling a regular expression causes memory to be allocated and asso- + ciated with the preg structure. The function pcre2_regfree() frees all + such memory, after which preg may no longer be used as a compiled ex- + pression. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 19 January 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.43 19 January 2024 PCRE2POSIX(3) +------------------------------------------------------------------------------ + + + +PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 SAMPLE PROGRAM + + A simple, complete demonstration program to get you started with using + PCRE2 is supplied in the file pcre2demo.c in the src directory in the + PCRE2 distribution. A listing of this program is given in the pcre2demo + documentation. If you do not have a copy of the PCRE2 distribution, you + can save this listing to re-create the contents of pcre2demo.c. + + The demonstration program compiles the regular expression that is its + first argument, and matches it against the subject string in its second + argument. No PCRE2 options are set, and default character tables are + used. If matching succeeds, the program outputs the portion of the sub- + ject that matched, together with the contents of any captured sub- + strings. + + If the -g option is given on the command line, the program then goes on + to check for further matches of the same regular expression in the same + subject string. The logic is a little bit tricky because of the possi- + bility of matching an empty string. Comments in the code explain what + is going on. + + The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit + library. It handles strings and characters that are stored in 8-bit + code units. By default, one character corresponds to one code unit, + but if the pattern starts with "(*UTF)", both it and the subject are + treated as UTF-8 strings, where characters may occupy multiple code + units. + + If PCRE2 is installed in the standard include and library directories + for your operating system, you should be able to compile the demonstra- + tion program using a command like this: + + cc -o pcre2demo pcre2demo.c -lpcre2-8 + + If PCRE2 is installed elsewhere, you may need to add additional options + to the command line. For example, on a Unix-like system that has PCRE2 + installed in /usr/local, you can compile the demonstration program us- + ing a command like this: + + cc -o pcre2demo -I/usr/local/include pcre2demo.c \ + -L/usr/local/lib -lpcre2-8 + + Once you have built the demonstration program, you can run simple tests + like this: + + ./pcre2demo 'cat|dog' 'the cat sat on the mat' + ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' + + Note that there is a much more comprehensive test program, called + pcre2test, which supports many more facilities for testing regular ex- + pressions using all three PCRE2 libraries (8-bit, 16-bit, and 32-bit, + though not all three need be installed). The pcre2demo program is pro- + vided as a relatively simple coding example. + + If you try to run pcre2demo when PCRE2 is not installed in the standard + library directory, you may get an error like this on some operating + systems (e.g. Solaris): + + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file + or directory + + This is caused by the way shared library support works on those sys- + tems. You need to add + + -R/usr/local/lib + + (for example) to the compile command to get round this problem. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 02 February 2016 + Copyright (c) 1997-2016 University of Cambridge. + + +PCRE2 10.22 02 February 2016 PCRE2SAMPLE(3) +------------------------------------------------------------------------------ + +PCRE2SERIALIZE(3) Library Functions Manual PCRE2SERIALIZE(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS + + int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); + + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); + + void pcre2_serialize_free(uint8_t *bytes); + + int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); + + If you are running an application that uses a large number of regular + expression patterns, it may be useful to store them in a precompiled + form instead of having to compile them every time the application is + run. However, if you are using the just-in-time optimization feature, + it is not possible to save and reload the JIT data, because it is posi- + tion-dependent. The host on which the patterns are reloaded must be + running the same version of PCRE2, with the same code unit width, and + must also have the same endianness, pointer width and PCRE2_SIZE type. + For example, patterns compiled on a 32-bit system using PCRE2's 16-bit + library cannot be reloaded on a 64-bit system, nor can they be reloaded + using the 8-bit library. + + Note that "serialization" in PCRE2 does not convert compiled patterns + to an abstract format like Java or .NET serialization. The serialized + output is really just a bytecode dump, which is why it can only be re- + loaded in the same environment as the one that created it. Hence the + restrictions mentioned above. Applications that are not statically + linked with a fixed version of PCRE2 must be prepared to recompile pat- + terns from their sources, in order to be immune to PCRE2 upgrades. + + +SECURITY CONCERNS + + The facility for saving and restoring compiled patterns is intended for + use within individual applications. As such, the data supplied to + pcre2_serialize_decode() is expected to be trusted data, not data from + arbitrary external sources. There is only some simple consistency + checking, not complete validation of what is being re-loaded. Corrupted + data may cause undefined results. For example, if the length field of a + pattern in the serialized data is corrupted, the deserializing code may + read beyond the end of the byte stream that is passed to it. + + +SAVING COMPILED PATTERNS + + Before compiled patterns can be saved they must be serialized, which in + PCRE2 means converting the pattern to a stream of bytes. A single byte + stream may contain any number of compiled patterns, but they must all + use the same character tables. A single copy of the tables is included + in the byte stream (its size is 1088 bytes). For more details of char- + acter tables, see the section on locale support in the pcre2api docu- + mentation. + + The function pcre2_serialize_encode() creates a serialized byte stream + from a list of compiled patterns. Its first two arguments specify the + list, being a pointer to a vector of pointers to compiled patterns, and + the length of the vector. The third and fourth arguments point to vari- + ables which are set to point to the created byte stream and its length, + respectively. The final argument is a pointer to a general context, + which can be used to specify custom memory management functions. If + this argument is NULL, malloc() is used to obtain memory for the byte + stream. The yield of the function is the number of serialized patterns, + or one of the following negative error codes: + + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + + PCRE2_ERROR_BADMAGIC means either that a pattern's code has been cor- + rupted, or that a slot in the vector does not point to a compiled pat- + tern. + + Once a set of patterns has been serialized you can save the data in any + appropriate manner. Here is sample code that compiles two patterns and + writes them to a file. It assumes that the variable fd refers to a file + that is open for output. The error checking that should be present in a + real application has been omitted for simplicity. + + int errorcode; + uint8_t *bytes; + PCRE2_SIZE erroroffset; + PCRE2_SIZE bytescount; + pcre2_code *list_of_codes[2]; + list_of_codes[0] = pcre2_compile("first pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + list_of_codes[1] = pcre2_compile("second pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes, + &bytescount, NULL); + errorcode = fwrite(bytes, 1, bytescount, fd); + + Note that the serialized data is binary data that may contain any of + the 256 possible byte values. On systems that make a distinction be- + tween binary and non-binary data, be sure that the file is opened for + binary output. + + Serializing a set of patterns leaves the original data untouched, so + they can still be used for matching. Their memory must eventually be + freed in the usual way by calling pcre2_code_free(). When you have fin- + ished with the byte stream, it too must be freed by calling pcre2_seri- + alize_free(). If this function is called with a NULL argument, it re- + turns immediately without doing anything. + + +RE-USING PRECOMPILED PATTERNS + + In order to re-use a set of saved patterns you must first make the se- + rialized byte stream available in main memory (for example, by reading + from a file). The management of this memory block is up to the applica- + tion. You can use the pcre2_serialize_get_number_of_codes() function to + find out how many compiled patterns are in the serialized data without + actually decoding the patterns: + + uint8_t *bytes = ; + int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes); + + The pcre2_serialize_decode() function reads a byte stream and recreates + the compiled patterns in new memory blocks, setting pointers to them in + a vector. The first two arguments are a pointer to a suitable vector + and its length, and the third argument points to a byte stream. The fi- + nal argument is a pointer to a general context, which can be used to + specify custom memory management functions for the decoded patterns. If + this argument is NULL, malloc() and free() are used. After deserializa- + tion, the byte stream is no longer needed and can be discarded. + + pcre2_code *list_of_codes[2]; + uint8_t *bytes = ; + int32_t number_of_codes = + pcre2_serialize_decode(list_of_codes, 2, bytes, NULL); + + If the vector is not large enough for all the patterns in the byte + stream, it is filled with those that fit, and the remainder are ig- + nored. The yield of the function is the number of decoded patterns, or + one of the following negative error codes: + + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL + + PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was + compiled on a system with different endianness. + + Decoded patterns can be used for matching in the usual way, and must be + freed by calling pcre2_code_free(). However, be aware that there is a + potential race issue if you are using multiple patterns that were de- + coded from a single byte stream in a multithreaded application. A sin- + gle copy of the character tables is used by all the decoded patterns + and a reference count is used to arrange for its memory to be automati- + cally freed when the last pattern is freed, but there is no locking on + this reference count. Therefore, if you want to call pcre2_code_free() + for these patterns in different threads, you must arrange your own + locking, and ensure that pcre2_code_free() cannot be called by two + threads at the same time. + + If a pattern was processed by pcre2_jit_compile() before being serial- + ized, the JIT data is discarded and so is no longer available after a + save/restore cycle. You can, however, process a restored pattern with + pcre2_jit_compile() if you wish. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 June 2018 + Copyright (c) 1997-2018 University of Cambridge. + + +PCRE2 10.32 27 June 2018 PCRE2SERIALIZE(3) +------------------------------------------------------------------------------ + + + +PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY + + The full syntax and semantics of the regular expressions that are sup- + ported by PCRE2 are described in the pcre2pattern documentation. This + document contains a quick-reference summary of the syntax. + + +QUOTING + + \x where x is non-alphanumeric is a literal x + \Q...\E treat enclosed characters as literal + + Note that white space inside \Q...\E is always treated as literal, even + if PCRE2_EXTENDED is set, causing most other white space to be ignored. + + +BRACED ITEMS + + With one exception, wherever brace characters { and } are required to + enclose data for constructions such as \g{2} or \k{name}, space and/or + horizontal tab characters that follow { or precede } are allowed and + are ignored. In the case of quantifiers, they may also appear before or + after the comma. The exception is \u{...} which is not Perl-compatible + and is recognized only when PCRE2_EXTRA_ALT_BSUX is set. This is an EC- + MAScript compatibility feature, and follows ECMAScript's behaviour. + + +ESCAPED CHARACTERS + + This table applies to ASCII and Unicode environments. An unrecognized + escape sequence causes an error. + + \a alarm, that is, the BEL character (hex 07) + \cx "control-x", where x is a non-control ASCII character + \e escape (hex 1B) + \f form feed (hex 0C) + \n newline (hex 0A) + \r carriage return (hex 0D) + \t tab (hex 09) + \0dd character with octal code 0dd + \ddd character with octal code ddd, or backreference + \o{ddd..} character with octal code ddd.. + \N{U+hh..} character with Unicode code point hh.. (Unicode mode only) + \xhh character with hex code hh + \x{hh..} character with hex code hh.. + + If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the + following are also recognized: + + \U the character "U" + \uhhhh character with hex code hhhh + \u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX + + When \x is not followed by {, from zero to two hexadecimal digits are + read, but in ALT_BSUX mode \x must be followed by two hexadecimal dig- + its to be recognized as a hexadecimal escape; otherwise it matches a + literal "x". Likewise, if \u (in ALT_BSUX mode) is not followed by + four hexadecimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex + digits in curly brackets, it matches a literal "u". + + Note that \0dd is always an octal code. The treatment of backslash fol- + lowed by a non-zero digit is complicated; for details see the section + "Non-printing characters" in the pcre2pattern documentation, where de- + tails of escape processing in EBCDIC environments are also given. + \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not supported in + EBCDIC environments. Note that \N not followed by an opening curly + bracket has a different meaning (see below). + + +CHARACTER TYPES + + . any character except newline; + in dotall mode, any character whatsoever + \C one code unit, even in UTF mode (best avoided) + \d a decimal digit + \D a character that is not a decimal digit + \h a horizontal white space character + \H a character that is not a horizontal white space character + \N a character that is not a newline + \p{xx} a character with the xx property + \P{xx} a character without the xx property + \R a newline sequence + \s a white space character + \S a character that is not a white space character + \v a vertical white space character + \V a character that is not a vertical white space character + \w a "word" character + \W a "non-word" character + \X a Unicode extended grapheme cluster + + \C is dangerous because it may leave the current matching point in the + middle of a UTF-8 or UTF-16 character. The application can lock out the + use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also + possible to build PCRE2 with the use of \C permanently disabled. + + By default, \d, \s, and \w match only ASCII characters, even in UTF-8 + mode or in the 16-bit and 32-bit libraries. However, if locale-specific + matching is happening, \s and \w may also match characters with code + points in the range 128-255. If the PCRE2_UCP option is set, the behav- + iour of these escape sequences is changed to use Unicode properties and + they match many more characters, but there are some option settings + that can restrict individual sequences to matching only ASCII charac- + ters. + + Property descriptions in \p and \P are matched caselessly; hyphens, un- + derscores, and white space are ignored, in accordance with Unicode's + "loose matching" rules. + + +GENERAL CATEGORY PROPERTIES FOR \p and \P + + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate + + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + Lc Ll, Lu, or Lt + L& Ll, Lu, or Lt + + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark + + N Number + Nd Decimal number + Nl Letter number + No Other number + + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation + + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol + + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator + + +PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P + + Xan Alphanumeric: union of properties L and N + Xps POSIX space: property Z or tab, NL, VT, FF, CR + Xsp Perl space: property Z or tab, NL, VT, FF, CR + Xuc Universally-named character: one that can be + represented by a Universal Character Name + Xwd Perl word: property Xan or underscore + + Perl and POSIX space are now the same. Perl added VT to its space char- + acter set at release 5.18. + + +BINARY PROPERTIES FOR \p AND \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + +SCRIPT MATCHING WITH \p AND \P + + Many script names and their 4-letter abbreviations are recognized in + \p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P + of course). You can obtain a list of these scripts by running this com- + mand: + + pcre2test -LS + + +THE BIDI_CLASS PROPERTY FOR \p AND \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space + + +CHARACTER CLASSES + + [...] positive character class + [^...] negative character class + [x-y] range (can be used for hex characters) + [[:xxx:]] positive POSIX named set + [[:^xxx:]] negative POSIX named set + + alnum alphanumeric + alpha alphabetic + ascii 0-127 + blank space or tab + cntrl control character + digit decimal digit + graph printing, excluding space + lower lower case letter + print printing, including space + punct printing, excluding alphanumeric + space white space + upper upper case letter + word same as \w + xdigit hexadecimal digit + + In PCRE2, POSIX character set names recognize only ASCII characters by + default, but some of them use Unicode properties if PCRE2_UCP is set. + You can use \Q...\E inside a character class. + + +QUANTIFIERS + + ? 0 or 1, greedy + ?+ 0 or 1, possessive + ?? 0 or 1, lazy + * 0 or more, greedy + *+ 0 or more, possessive + *? 0 or more, lazy + + 1 or more, greedy + ++ 1 or more, possessive + +? 1 or more, lazy + {n} exactly n + {n,m} at least n, no more than m, greedy + {n,m}+ at least n, no more than m, possessive + {n,m}? at least n, no more than m, lazy + {n,} n or more, greedy + {n,}+ n or more, possessive + {n,}? n or more, lazy + {,m} zero up to m, greedy + {,m}+ zero up to m, possessive + {,m}? zero up to m, lazy + + +ANCHORS AND SIMPLE ASSERTIONS + + \b word boundary + \B not a word boundary + ^ start of subject + also after an internal newline in multiline mode + (after any newline if PCRE2_ALT_CIRCUMFLEX is set) + \A start of subject + $ end of subject + also before newline at end of subject + also before internal newline in multiline mode + \Z end of subject + also before newline at end of subject + \z end of subject + \G first matching position in subject + + +REPORTED MATCH POINT SETTING + + \K set reported start of match + + From release 10.38 \K is not permitted by default in lookaround asser- + tions, for compatibility with Perl. However, if the PCRE2_EXTRA_AL- + LOW_LOOKAROUND_BSK option is set, the previous behaviour is re-enabled. + When this option is set, \K is honoured in positive assertions, but ig- + nored in negative ones. + + +ALTERNATION + + expr|expr|expr... + + +CAPTURING + + (...) capture group + (?...) named capture group (Perl) + (?'name'...) named capture group (Perl) + (?P...) named capture group (Python) + (?:...) non-capture group + (?|...) non-capture group; reset group numbers for + capture groups in each alternative + + In non-UTF modes, names may contain underscores and ASCII letters and + digits; in UTF modes, any Unicode letters and Unicode decimal digits + are permitted. In both cases, a name must not start with a digit. + + +ATOMIC GROUPS + + (?>...) atomic non-capture group + (*atomic:...) atomic non-capture group + + +COMMENT + + (?#....) comment (not nestable) + + +OPTION SETTING + Changes of these options within a group are automatically cancelled at + the end of the group. + + (?a) all ASCII options + (?aD) restrict \d to ASCII in UCP mode + (?aS) restrict \s to ASCII in UCP mode + (?aW) restrict \w to ASCII in UCP mode + (?aP) restrict all POSIX classes to ASCII in UCP mode + (?aT) restrict POSIX digit classes to ASCII in UCP mode + (?i) caseless + (?J) allow duplicate named groups + (?m) multiline + (?n) no auto capture + (?r) restrict caseless to either ASCII or non-ASCII + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) ignore white space except in classes or \Q...\E + (?xx) as (?x) but also ignore space and tab in classes + (?-...) unset the given option(s) + (?^) unset imnrsx options + + (?aP) implies (?aT) as well, though this has no additional effect. How- + ever, it means that (?-aP) is really (?-PT) which disables all ASCII + restrictions for POSIX classes. + + Unsetting x or xx unsets both. Several options may be set at once, and + a mixture of setting and unsetting such as (?i-x) is allowed, but there + may be only one hyphen. Setting (but no unsetting) is allowed after (?^ + for example (?^in). An option setting may appear at the start of a non- + capture group, for example (?i:...). + + The following are recognized only at the very start of a pattern or af- + ter one of the newline or \R options with similar syntax. More than one + of them may appear. For the first three, d is a decimal number. + + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \d etc) + + Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the + value of the limits set by the caller of pcre2_match() or + pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete + synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF) + and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, + respectively, at compile time. + + +NEWLINE CONVENTION + + These are recognized only at the very start of the pattern or after op- + tion settings with a similar syntax. + + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence + (*NUL) the NUL character (binary zero) + + +WHAT \R MATCHES + + These are recognized only at the very start of the pattern or after op- + tion setting with a similar syntax. + + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence + + +LOOKAHEAD AND LOOKBEHIND ASSERTIONS + + (?=...) ) + (*pla:...) ) positive lookahead + (*positive_lookahead:...) ) + + (?!...) ) + (*nla:...) ) negative lookahead + (*negative_lookahead:...) ) + + (?<=...) ) + (*plb:...) ) positive lookbehind + (*positive_lookbehind:...) ) + + (? reference by name (Perl) + \k'name' reference by name (Perl) + \g{name} reference by name (Perl) + \k{name} reference by name (.NET) + (?P=name) reference by name (Python) + + +SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) + + (?R) recurse whole pattern + (?n) call subroutine by absolute number + (?+n) call subroutine by relative number + (?-n) call subroutine by relative number + (?&name) call subroutine by name (Perl) + (?P>name) call subroutine by name (Python) + \g call subroutine by name (Oniguruma) + \g'name' call subroutine by name (Oniguruma) + \g call subroutine by absolute number (Oniguruma) + \g'n' call subroutine by absolute number (Oniguruma) + \g<+n> call subroutine by relative number (PCRE2 extension) + \g'+n' call subroutine by relative number (PCRE2 extension) + \g<-n> call subroutine by relative number (PCRE2 extension) + \g'-n' call subroutine by relative number (PCRE2 extension) + + +CONDITIONAL PATTERNS + + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) + + (?(n) absolute reference condition + (?(+n) relative reference condition (PCRE2 extension) + (?(-n) relative reference condition (PCRE2 extension) + (?() named reference condition (Perl) + (?('name') named reference condition (Perl) + (?(name) named reference condition (PCRE2, deprecated) + (?(R) overall recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition + (?(DEFINE) define groups for reference + (?(VERSION[>]=n.m) test PCRE2 version + (?(assert) assertion condition + + Note the ambiguity of (?(R) and (?(Rn) which might be named reference + conditions or recursion tests. Such a condition is interpreted as a + reference condition if the relevant named group exists. + + +BACKTRACKING CONTROL + + All backtracking control verbs may be in the form (*VERB:NAME). For + (*MARK) the name is mandatory, for the others it is optional. (*SKIP) + changes its behaviour if :NAME is present. The others just set a name + for passing back to the caller, but this is not a name that (*SKIP) can + see. The following act immediately they are reached: + + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) + + The following act only when a subsequent match failure causes a back- + track to reach them. They all force a match failure, but they differ in + what happens afterwards. Those that advance the start-of-match point do + so only if the pattern is not anchored. + + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation + + The effect of one of these verbs in a group called as a subroutine is + confined to the subroutine call. + + +CALLOUTS + + (?C) callout (assumed number 0) + (?Cn) callout with numerical data n + (?C"text") callout with string data + + The allowed string delimiters are ` ' " ^ % # $ (which are the same for + the start and the end), and the starting delimiter { matched with the + ending delimiter }. To encode the ending delimiter within the string, + double it. + + +SEE ALSO + + pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 12 October 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.43 12 October 2023 PCRE2SYNTAX(3) +------------------------------------------------------------------------------ + + + +PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) + + +NAME + PCRE - Perl-compatible regular expressions (revised API) + + +UNICODE AND UTF SUPPORT + + PCRE2 is normally built with Unicode support, though if you do not need + it, you can build it without, in which case the library will be + smaller. With Unicode support, PCRE2 has knowledge of Unicode character + properties and can process strings of text in UTF-8, UTF-16, and UTF-32 + format (depending on the code unit width), but this is not the default. + Unless specifically requested, PCRE2 treats each code unit in a string + as one character. + + There are two ways of telling PCRE2 to switch to UTF mode, where char- + acters may consist of more than one code unit and the range of values + is constrained. The program can call pcre2_compile() with the PCRE2_UTF + option, or the pattern may start with the sequence (*UTF). However, + the latter facility can be locked out by the PCRE2_NEVER_UTF option. + That is, the programmer can prevent the supplier of the pattern from + switching to UTF mode. + + Note that the PCRE2_MATCH_INVALID_UTF option (see below) forces + PCRE2_UTF to be set. + + In UTF mode, both the pattern and any subject strings that are matched + against it are treated as UTF strings instead of strings of individual + one-code-unit characters. There are also some other changes to the way + characters are handled, as documented below. + + +UNICODE PROPERTY SUPPORT + + When PCRE2 is built with Unicode support, the escape sequences \p{..}, + \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF set- + ting. The Unicode properties that can be tested are a subset of those + that Perl supports. Currently they are limited to the general category + properties such as Lu for an upper case letter or Nd for a decimal num- + ber, the derived properties Any and LC (synonym L&), the Unicode script + names such as Arabic or Han, Bidi_Class, Bidi_Control, and a few binary + properties. + + The full lists are given in the pcre2pattern and pcre2syntax documenta- + tion. In general, only the short names for properties are supported. + For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is + not supported. Furthermore, in Perl, many properties may optionally be + prefixed by "Is", for compatibility with Perl 5.6. PCRE2 does not sup- + port this. + + +WIDE CHARACTERS AND UTF MODES + + Code points less than 256 can be specified in patterns by either braced + or unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). + Larger values have to use braced sequences. Unbraced octal code points + up to \777 are also recognized; larger ones can be coded using \o{...}. + + The escape sequence \N{U+} is recognized as another way of + specifying a Unicode character by code point in a UTF mode. It is not + allowed in non-UTF mode. + + In UTF mode, repeat quantifiers apply to complete UTF characters, not + to individual code units. + + In UTF mode, the dot metacharacter matches one UTF character instead of + a single code unit. + + In UTF mode, capture group names are not restricted to ASCII, and may + contain any Unicode letters and decimal digits, as well as underscore. + + The escape sequence \C can be used to match a single code unit in UTF + mode, but its use can lead to some strange effects because it breaks up + multi-unit characters (see the description of \C in the pcre2pattern + documentation). For this reason, there is a build-time option that dis- + ables support for \C completely. There is also a less draconian com- + pile-time option for locking out the use of \C when a pattern is com- + piled. + + The use of \C is not supported by the alternative matching function + pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a charac- + ter may consist of more than one code unit. The use of \C in these + modes provokes a match-time error. Also, the JIT optimization does not + support \C in these modes. If JIT optimization is requested for a UTF-8 + or UTF-16 pattern that contains \C, it will not succeed, and so when + pcre2_match() is called, the matching will be carried out by the inter- + pretive function. + + The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test + characters of any code value, but, by default, the characters that + PCRE2 recognizes as digits, spaces, or word characters remain the same + set as in non-UTF mode, all with code points less than 256. This re- + mains true even when PCRE2 is built to include Unicode support, because + to do otherwise would slow down matching in many common cases. Note + that this also applies to \b and \B, because they are defined in terms + of \w and \W. If you want to test for a wider sense of, say, "digit", + you can use explicit Unicode property tests such as \p{Nd}. Alterna- + tively, if you set the PCRE2_UCP option, the way that the character es- + capes work is changed so that Unicode properties are used to determine + which characters match, though there are some options that suppress + this for individual escapes. For details see the section on generic + character types in the pcre2pattern documentation. + + Like the escapes, characters that match the POSIX named character + classes are all low-valued characters unless the PCRE2_UCP option is + set, but there is an option to override this. + + In contrast to the character escapes and character classes, the special + horizontal and vertical white space escapes (\h, \H, \v, and \V) do + match all the appropriate Unicode characters, whether or not PCRE2_UCP + is set. + + +UNICODE CASE-EQUIVALENCE + + If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing + makes use of Unicode properties except for characters whose code points + are less than 128 and that have at most two case-equivalent values. For + these, a direct table lookup is used for speed. A few Unicode charac- + ters such as Greek sigma have more than two code points that are case- + equivalent, and these are treated specially. Setting PCRE2_UCP without + PCRE2_UTF allows Unicode-style case processing for non-UTF character + encodings such as UCS-2. + + There are two ASCII characters (S and K) that, in addition to their + ASCII lower case equivalents, have a non-ASCII one as well (long S and + Kelvin sign). Recognition of these non-ASCII characters as case-equiv- + alent to their ASCII counterparts can be disabled by setting the + PCRE2_EXTRA_CASELESS_RESTRICT option. When this is set, all characters + in a case equivalence must either be ASCII or non-ASCII; there can be + no mixing. + + +SCRIPT RUNS + + The pattern constructs (*script_run:...) and (*atomic_script_run:...), + with synonyms (*sr:...) and (*asr:...), verify that the string matched + within the parentheses is a script run. In concept, a script run is a + sequence of characters that are all from the same Unicode script. How- + ever, because some scripts are commonly used together, and because some + diacritical and other marks are used with multiple scripts, it is not + that simple. + + Every Unicode character has a Script property, mostly with a value cor- + responding to the name of a script, such as Latin, Greek, or Cyrillic. + There are also three special values: + + "Unknown" is used for code points that have not been assigned, and also + for the surrogate code points. In the PCRE2 32-bit library, characters + whose code points are greater than the Unicode maximum (U+10FFFF), + which are accessible only in non-UTF mode, are assigned the Unknown + script. + + "Common" is used for characters that are used with many scripts. These + include punctuation, emoji, mathematical, musical, and currency sym- + bols, and the ASCII digits 0 to 9. + + "Inherited" is used for characters such as diacritical marks that mod- + ify a previous character. These are considered to take on the script of + the character that they modify. + + Some Inherited characters are used with many scripts, but many of them + are only normally used with a small number of scripts. For example, + U+102E0 (Coptic Epact thousands mark) is used only with Arabic and Cop- + tic. In order to make it possible to check this, a Unicode property + called Script Extension exists. Its value is a list of scripts that ap- + ply to the character. For the majority of characters, the list contains + just one script, the same one as the Script property. However, for + characters such as U+102E0 more than one Script is listed. There are + also some Common characters that have a single, non-Common script in + their Script Extension list. + + The next section describes the basic rules for deciding whether a given + string of characters is a script run. Note, however, that there are + some special cases involving the Chinese Han script, and an additional + constraint for decimal digits. These are covered in subsequent sec- + tions. + + Basic script run rules + + A string that is less than two characters long is a script run. This is + the only case in which an Unknown character can be part of a script + run. Longer strings are checked using only the Script Extensions prop- + erty, not the basic Script property. + + If a character's Script Extension property is the single value "Inher- + ited", it is always accepted as part of a script run. This is also true + for the property "Common", subject to the checking of decimal digits + described below. All the remaining characters in a script run must have + at least one script in common in their Script Extension lists. In set- + theoretic terminology, the intersection of all the sets of scripts must + not be empty. + + A simple example is an Internet name such as "google.com". The letters + are all in the Latin script, and the dot is Common, so this string is a + script run. However, the Cyrillic letter "o" looks exactly the same as + the Latin "o"; a string that looks the same, but with Cyrillic "o"s is + not a script run. + + More interesting examples involve characters with more than one script + in their Script Extension. Consider the following characters: + + U+060C Arabic comma + U+06D4 Arabic full stop + + The first has the Script Extension list Arabic, Hanifi Rohingya, Syr- + iac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both + of them could appear in script runs of either Arabic or Hanifi Ro- + hingya. The first could also appear in Syriac or Thaana script runs, + but the second could not. + + The Chinese Han script + + The Chinese Han script is commonly used in conjunction with other + scripts for writing certain languages. Japanese uses the Hiragana and + Katakana scripts together with Han; Korean uses Hangul and Han; Tai- + wanese Mandarin uses Bopomofo and Han. These three combinations are + treated as special cases when checking script runs and are, in effect, + "virtual scripts". Thus, a script run may contain a mixture of Hira- + gana, Katakana, and Han, or a mixture of Hangul and Han, or a mixture + of Bopomofo and Han, but not, for example, a mixture of Hangul and + Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical Stan- + dard 39 ("Unicode Security Mechanisms", http://unicode.org/re- + ports/tr39/) in allowing such mixtures. + + Decimal digits + + Unicode contains many sets of 10 decimal digits in different scripts, + and some scripts (including the Common script) contain more than one + set. Some of these decimal digits them are visually indistinguishable + from the common ASCII digits. In addition to the script checking de- + scribed above, if a script run contains any decimal digits, they must + all come from the same set of 10 adjacent characters. + + +VALIDITY OF UTF STRINGS + + When the PCRE2_UTF option is set, the strings passed as patterns and + subjects are (by default) checked for validity on entry to the relevant + functions. If an invalid UTF string is passed, a negative error code is + returned. The code unit offset to the offending character can be ex- + tracted from the match data block by calling pcre2_get_startchar(), + which is used for this purpose after a UTF error. + + In some situations, you may already know that your strings are valid, + and therefore want to skip these checks in order to improve perfor- + mance, for example in the case of a long subject string that is being + scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com- + pile time or at match time, PCRE2 assumes that the pattern or subject + it is given (respectively) contains only valid UTF code unit sequences. + + If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the + result is undefined and your program may crash or loop indefinitely or + give incorrect results. There is, however, one mode of matching that + can handle invalid UTF subject strings. This is enabled by passing + PCRE2_MATCH_INVALID_UTF to pcre2_compile() and is discussed below in + the next section. The rest of this section covers the case when + PCRE2_MATCH_INVALID_UTF is not set. + + Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF + check for the pattern; it does not also apply to subject strings. If + you want to disable the check for a subject string you must pass this + same option to pcre2_match() or pcre2_dfa_match(). + + UTF-16 and UTF-32 strings can indicate their endianness by special code + knows as a byte-order mark (BOM). The PCRE2 functions do not handle + this, expecting strings to be in host byte order. + + Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any + other processing takes place. In the case of pcre2_match() and + pcre2_dfa_match() calls with a non-zero starting offset, the check is + applied only to that part of the subject that could be inspected during + matching, and there is a check that the starting offset points to the + first code unit of a character or to the end of the subject. If there + are no lookbehind assertions in the pattern, the check starts at the + starting offset. Otherwise, it starts at the length of the longest + lookbehind before the starting offset, or at the start of the subject + if there are not that many characters before the starting offset. Note + that the sequences \b and \B are one-character lookbehinds. + + In addition to checking the format of the string, there is a check to + ensure that all code points lie in the range U+0 to U+10FFFF, excluding + the surrogate area. The so-called "non-character" code points are not + excluded because Unicode corrigendum #9 makes it clear that they should + not be. + + Characters in the "Surrogate Area" of Unicode are reserved for use by + UTF-16, where they are used in pairs to encode code points with values + greater than 0xFFFF. The code points that are encoded by UTF-16 pairs + are available independently in the UTF-8 and UTF-32 encodings. (In + other words, the whole surrogate thing is a fudge for UTF-16 which un- + fortunately messes up UTF-8 and UTF-32.) + + Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error + that is given if an escape sequence for an invalid Unicode code point + is encountered in the pattern. If you want to allow escape sequences + such as \x{d800} (a surrogate code point) you can set the PCRE2_EX- + TRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible + only in UTF-8 and UTF-32 modes, because these values are not repre- + sentable in UTF-16. + + Errors in UTF-8 strings + + The following negative error codes are given for invalid UTF-8 strings: + + PCRE2_ERROR_UTF8_ERR1 + PCRE2_ERROR_UTF8_ERR2 + PCRE2_ERROR_UTF8_ERR3 + PCRE2_ERROR_UTF8_ERR4 + PCRE2_ERROR_UTF8_ERR5 + + The string ends with a truncated UTF-8 character; the code specifies + how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 + characters to be no longer than 4 bytes, the encoding scheme (origi- + nally defined by RFC 2279) allows for up to 6 bytes, and this is + checked first; hence the possibility of 4 or 5 missing bytes. + + PCRE2_ERROR_UTF8_ERR6 + PCRE2_ERROR_UTF8_ERR7 + PCRE2_ERROR_UTF8_ERR8 + PCRE2_ERROR_UTF8_ERR9 + PCRE2_ERROR_UTF8_ERR10 + + The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of + the character do not have the binary value 0b10 (that is, either the + most significant bit is 0, or the next bit is 1). + + PCRE2_ERROR_UTF8_ERR11 + PCRE2_ERROR_UTF8_ERR12 + + A character that is valid by the RFC 2279 rules is either 5 or 6 bytes + long; these code points are excluded by RFC 3629. + + PCRE2_ERROR_UTF8_ERR13 + + A 4-byte character has a value greater than 0x10ffff; these code points + are excluded by RFC 3629. + + PCRE2_ERROR_UTF8_ERR14 + + A 3-byte character has a value in the range 0xd800 to 0xdfff; this + range of code points are reserved by RFC 3629 for use with UTF-16, and + so are excluded from UTF-8. + + PCRE2_ERROR_UTF8_ERR15 + PCRE2_ERROR_UTF8_ERR16 + PCRE2_ERROR_UTF8_ERR17 + PCRE2_ERROR_UTF8_ERR18 + PCRE2_ERROR_UTF8_ERR19 + + A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes + for a value that can be represented by fewer bytes, which is invalid. + For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor- + rect coding uses just one byte. + + PCRE2_ERROR_UTF8_ERR20 + + The two most significant bits of the first byte of a character have the + binary value 0b10 (that is, the most significant bit is 1 and the sec- + ond is 0). Such a byte can only validly occur as the second or subse- + quent byte of a multi-byte character. + + PCRE2_ERROR_UTF8_ERR21 + + The first byte of a character has the value 0xfe or 0xff. These values + can never occur in a valid UTF-8 string. + + Errors in UTF-16 strings + + The following negative error codes are given for invalid UTF-16 + strings: + + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate + + + Errors in UTF-32 strings + + The following negative error codes are given for invalid UTF-32 + strings: + + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff + + +MATCHING IN INVALID UTF STRINGS + + You can run pattern matches on subject strings that may contain invalid + UTF sequences if you call pcre2_compile() with the PCRE2_MATCH_IN- + VALID_UTF option. This is supported by pcre2_match(), including JIT + matching, but not by pcre2_dfa_match(). When PCRE2_MATCH_INVALID_UTF is + set, it forces PCRE2_UTF to be set as well. Note, however, that the + pattern itself must be a valid UTF string. + + If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, + and you are not certain that your subject strings are valid UTF se- + quences, you should not make use of the JIT "fast path" function + pcre2_jit_match() because it bypasses sanity checks, including the one + for UTF validity. An invalid string may cause undefined behaviour, in- + cluding looping, crashing, or giving the wrong answer. + + Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() + generates, but if pcre2_jit_compile() is subsequently called, it does + generate different code. If JIT is not used, the option affects the be- + haviour of the interpretive code in pcre2_match(). When PCRE2_MATCH_IN- + VALID_UTF is set at compile time, PCRE2_NO_UTF_CHECK is ignored at + match time. + + In this mode, an invalid code unit sequence in the subject never + matches any pattern item. It does not match dot, it does not match + \p{Any}, it does not even match negative items such as [^X]. A lookbe- + hind assertion fails if it encounters an invalid sequence while moving + the current point backwards. In other words, an invalid UTF code unit + sequence acts as a barrier which no match can cross. + + You can also think of this as the subject being split up into fragments + of valid UTF, delimited internally by invalid code unit sequences. The + pattern is matched fragment by fragment. The result of a successful + match, however, is given as code unit offsets in the entire subject + string in the usual way. There are a few points to consider: + + The internal boundaries are not interpreted as the beginnings or ends + of lines and so do not match circumflex or dollar characters in the + pattern. + + If pcre2_match() is called with an offset that points to an invalid + UTF-sequence, that sequence is skipped, and the match starts at the + next valid UTF character, or the end of the subject. + + At internal fragment boundaries, \b and \B behave in the same way as at + the beginning and end of the subject. For example, a sequence such as + \bWORD\b would match an instance of WORD that is surrounded by invalid + UTF code units. + + Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbi- + trary data, knowing that any matched strings that are returned are + valid UTF. This can be useful when searching for UTF text in executable + or other binary files. + + Note, however, that the 16-bit and 32-bit PCRE2 libraries process + strings as sequences of uint16_t or uint32_t code points. They cannot + find valid UTF sequences within an arbitrary string of bytes unless + such sequences are suitably aligned. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 12 October 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.43 04 February 2023 PCRE2UNICODE(3) +------------------------------------------------------------------------------ + + diff --git a/doc/pcre2_callout_enumerate.3 b/doc/pcre2_callout_enumerate.3 new file mode 100644 index 0000000..109c9be --- /dev/null +++ b/doc/pcre2_callout_enumerate.3 @@ -0,0 +1,51 @@ +.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function scans a compiled regular expression and calls the \fIcallback()\fP +function for each callout within the pattern. The yield of the function is zero +for success and non-zero otherwise. The arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIcallback\fP The callback function + \fIcallout_data\fP User data that is passed to the callback +.sp +The \fIcallback()\fP function is passed a pointer to a data block containing +the following fields (not necessarily in this order): +.sp + uint32_t \fIversion\fP Block version number + uint32_t \fIcallout_number\fP Number for numbered callouts + PCRE2_SIZE \fIpattern_position\fP Offset to next item in pattern + PCRE2_SIZE \fInext_item_length\fP Length of next item in pattern + PCRE2_SIZE \fIcallout_string_offset\fP Offset to string within pattern + PCRE2_SIZE \fIcallout_string_length\fP Length of callout string + PCRE2_SPTR \fIcallout_string\fP Points to callout string or is NULL +.sp +The second argument passed to the \fBcallback()\fP function is the callout data +that was passed to \fBpcre2_callout_enumerate()\fP. The \fBcallback()\fP +function must return zero for success. Any other value causes the pattern scan +to stop, with the value being passed back as the result of +\fBpcre2_callout_enumerate()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_code_copy.3 b/doc/pcre2_code_copy.3 new file mode 100644 index 0000000..09b4705 --- /dev/null +++ b/doc/pcre2_code_copy.3 @@ -0,0 +1,31 @@ +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +\fBpcre2_code_copy_with_tables()\fP). The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_code_copy_with_tables.3 b/doc/pcre2_code_copy_with_tables.3 new file mode 100644 index 0000000..cfbddb3 --- /dev/null +++ b/doc/pcre2_code_copy_with_tables.3 @@ -0,0 +1,32 @@ +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. +Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_code_free.3 b/doc/pcre2_code_free.3 new file mode 100644 index 0000000..9e0ad3c --- /dev/null +++ b/doc/pcre2_code_free.3 @@ -0,0 +1,30 @@ +.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +If \fIcode\fP is NULL, this function does nothing. Otherwise, \fIcode\fP must +point to a compiled pattern. This function frees its memory, including any +memory used by the JIT compiler. If the compiled pattern was created by a call +to \fBpcre2_code_copy_with_tables()\fP, the memory for the character tables is +also freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_compile.3 b/doc/pcre2_compile.3 new file mode 100644 index 0000000..151a703 --- /dev/null +++ b/doc/pcre2_compile.3 @@ -0,0 +1,106 @@ +.TH PCRE2_COMPILE 3 "19 January 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function compiles a regular expression pattern into an internal form. Its +arguments are: +.sp + \fIpattern\fP A string containing expression to be compiled + \fIlength\fP The length of the string or PCRE2_ZERO_TERMINATED + \fIoptions\fP Primary option bits + \fIerrorcode\fP Where to put an error code + \fIerroffset\fP Where to put an error offset + \fIccontext\fP Pointer to a compile context or NULL +.sp +The length of the pattern and any error offset that is returned are in code +units, not characters. A NULL pattern with zero length is treated as an empty +string. A compile context is needed only if you want to provide custom memory +allocation functions, or to provide an external function for system stack size +checking (see \fBpcre2_set_compile_recursion_guard()\fP), or to change one or +more of these parameters: +.sp + What \eR matches (Unicode newlines, or CR, LF, CRLF only); + PCRE2's character tables; + The newline character sequence; + The compile time nested parentheses limit; + The maximum pattern length (in code units) that is allowed; + The additional options bits. +.sp +The primary option bits are: +.sp + PCRE2_ANCHORED Force pattern anchoring + PCRE2_ALLOW_EMPTY_CLASS Allow empty classes + PCRE2_ALT_BSUX Alternative handling of \eu, \eU, and \ex + PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode + PCRE2_ALT_VERBNAMES Process backslashes in verb names + PCRE2_AUTO_CALLOUT Compile automatic callouts + PCRE2_CASELESS Do caseless matching + PCRE2_DOLLAR_ENDONLY $ not to match newline at end + PCRE2_DOTALL . matches anything including NL + PCRE2_DUPNAMES Allow duplicate names for subpatterns + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_EXTENDED Ignore white space and # comments + PCRE2_FIRSTLINE Force matching to be before newline + PCRE2_LITERAL Pattern characters are all literal + PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF + PCRE2_MATCH_UNSET_BACKREF Match unset backreferences + PCRE2_MULTILINE ^ and $ match newlines within data + PCRE2_NEVER_BACKSLASH_C Lock out the use of \eC in patterns + PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP) + PCRE2_NEVER_UTF Lock out PCRE2_UTF, e.g. via (*UTF) + PCRE2_NO_AUTO_CAPTURE Disable numbered capturing paren- + theses (named ones available) + PCRE2_NO_AUTO_POSSESS Disable auto-possessification + PCRE2_NO_DOTSTAR_ANCHOR Disable automatic anchoring for .* + PCRE2_NO_START_OPTIMIZE Disable match-time start optimizations + PCRE2_NO_UTF_CHECK Do not check the pattern for UTF validity + (only relevant if PCRE2_UTF is set) + PCRE2_UCP Use Unicode properties for \ed, \ew, etc. + PCRE2_UNGREEDY Invert greediness of quantifiers + PCRE2_USE_OFFSET_LIMIT Enable offset limit for unanchored matching + PCRE2_UTF Treat pattern and subjects as UTF strings +.sp +PCRE2 must be built with Unicode support (the default) in order to use +PCRE2_UTF, PCRE2_UCP and related options. +.P +Additional options may be set in the compile context via the +.\" HREF +\fBpcre2_set_compile_extra_options\fP +.\" +function. +.P +If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns +NULL immediately. Otherwise, the yield of this function is a pointer to a +private data structure that contains the compiled pattern, or NULL if an error +was detected. In the error case, a text error message can be obtained by +passing the value returned via the \fIerrorcode\fP argument to the +\fBpcre2_get_error_message()\fP function. The offset (in code units) where the +error was encountered is returned via the \fIerroroffset\fP argument. +.P +If there is no error, the value passed via \fIerrorcode\fP returns the message +"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed +via \fIerroroffset\fP is zero. +.P +There is a complete description of the PCRE2 native API, with more detail on +each option, in the +.\" HREF +\fBpcre2api\fP +.\" +page, and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_compile_context_copy.3 b/doc/pcre2_compile_context_copy.3 new file mode 100644 index 0000000..aea1187 --- /dev/null +++ b/doc/pcre2_compile_context_copy.3 @@ -0,0 +1,29 @@ +.TH PCRE2_COMPILE_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a compile context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_compile_context_create.3 b/doc/pcre2_compile_context_create.3 new file mode 100644 index 0000000..3053df4 --- /dev/null +++ b/doc/pcre2_compile_context_create.3 @@ -0,0 +1,30 @@ +.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a new compile context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_compile_context_free.3 b/doc/pcre2_compile_context_free.3 new file mode 100644 index 0000000..e90d744 --- /dev/null +++ b/doc/pcre2_compile_context_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a compile context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_config.3 b/doc/pcre2_config.3 new file mode 100644 index 0000000..ab9623d --- /dev/null +++ b/doc/pcre2_config.3 @@ -0,0 +1,76 @@ +.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.31" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +. +.SH DESCRIPTION +.rs +.sp +This function makes it possible for a client program to find out which optional +features are available in the version of the PCRE2 library it is using. The +arguments are as follows: +.sp + \fIwhat\fP A code specifying what information is required + \fIwhere\fP Points to where to put the information +.sp +If \fIwhere\fP is NULL, the function returns the amount of memory needed for +the requested information. When the information is a string, the value is in +code units; for other types of data it is in bytes. +.P +If \fBwhere\fP is not NULL, for PCRE2_CONFIG_JITTARGET, +PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a +buffer that is large enough to hold the string. For all other codes it must +point to a uint32_t integer variable. The available codes are: +.sp + PCRE2_CONFIG_BSR Indicates what \eR matches by default: + PCRE2_BSR_UNICODE + PCRE2_BSR_ANYCRLF + PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled + PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit + PCRE2_CONFIG_HEAPLIMIT Default heap memory limit +.\" JOIN + PCRE2_CONFIG_JIT Availability of just-in-time compiler + support (1=yes 0=no) +.\" JOIN + PCRE2_CONFIG_JITTARGET Information (a string) about the target + architecture for the JIT compiler + PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4) + PCRE2_CONFIG_MATCHLIMIT Default internal resource limit + PCRE2_CONFIG_NEVER_BACKSLASH_C Whether or not \eC is disabled + PCRE2_CONFIG_NEWLINE Code for the default newline sequence: + PCRE2_NEWLINE_CR + PCRE2_NEWLINE_LF + PCRE2_NEWLINE_CRLF + PCRE2_NEWLINE_ANY + PCRE2_NEWLINE_ANYCRLF + PCRE2_NEWLINE_NUL + PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit + PCRE2_CONFIG_RECURSIONLIMIT Obsolete: use PCRE2_CONFIG_DEPTHLIMIT + PCRE2_CONFIG_STACKRECURSE Obsolete: always returns 0 +.\" JOIN + PCRE2_CONFIG_UNICODE Availability of Unicode support (1=yes + 0=no) + PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string) + PCRE2_CONFIG_VERSION The PCRE2 version (a string) +.sp +The function yields a non-negative value on success or the negative value +PCRE2_ERROR_BADOPTION otherwise. This is also the result for the +PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is +requested, the function returns the number of code units used, including the +terminating zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_convert_context_copy.3 b/doc/pcre2_convert_context_copy.3 new file mode 100644 index 0000000..827c3e9 --- /dev/null +++ b/doc/pcre2_convert_context_copy.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It makes a new copy of a convert context, using the memory allocation function +that was used for the original context. The result is NULL if the memory cannot +be obtained. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_convert_context_create.3 b/doc/pcre2_convert_context_create.3 new file mode 100644 index 0000000..91c17fb --- /dev/null +++ b/doc/pcre2_convert_context_create.3 @@ -0,0 +1,27 @@ +.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It creates and initializes a new convert context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_convert_context_free.3 b/doc/pcre2_convert_context_free.3 new file mode 100644 index 0000000..3fd5783 --- /dev/null +++ b/doc/pcre2_convert_context_free.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a convert context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_converted_pattern_free.3 b/doc/pcre2_converted_pattern_free.3 new file mode 100644 index 0000000..b0645b5 --- /dev/null +++ b/doc/pcre2_converted_pattern_free.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a converted pattern that was obtained by +calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place +the converted pattern into newly obtained heap memory. If the argument is NULL, +the function returns immediately without doing anything. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_dfa_match.3 b/doc/pcre2_dfa_match.3 new file mode 100644 index 0000000..ea87aac --- /dev/null +++ b/doc/pcre2_dfa_match.3 @@ -0,0 +1,86 @@ +.TH PCRE2_DFA_MATCH 3 "28 August 2021" "PCRE2 10.38" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using an alternative matching algorithm that scans the subject string +just once (except when processing lookaround assertions). This function is +\fInot\fP Perl-compatible (the Perl-compatible matching function is +\fBpcre2_match()\fP). The arguments for this function are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, for results + \fImcontext\fP Points to a match context, or is NULL + \fIworkspace\fP Points to a vector of ints used as working space + \fIwscount\fP Number of elements in the vector +.sp +The size of output vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match +data block is therefore not advisable when using this function. +.P +A match context is needed only if you want to set up a callout function or +specify the heap limit or the match or the recursion depth limits. The +\fIlength\fP and \fIstartoffset\fP values are code units, not characters. The +options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_COPY_MATCHED_SUBJECT + On success, make a private subject copy + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_NOTBOL Subject is not the beginning of a line + PCRE2_NOTEOL Subject is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check the subject for UTF + validity (only relevant if PCRE2_UTF + was set at compile time) +.\" JOIN + PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial + match even if there is a full match +.\" JOIN + PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial + match if no full matches are found + PCRE2_DFA_RESTART Restart after a partial match + PCRE2_DFA_SHORTEST Return only the shortest match +.sp +There are restrictions on what may appear in a pattern when using this matching +function. Details are given in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_general_context_copy.3 b/doc/pcre2_general_context_copy.3 new file mode 100644 index 0000000..637e565 --- /dev/null +++ b/doc/pcre2_general_context_copy.3 @@ -0,0 +1,30 @@ +.TH PCRE2_GENERAL_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a general context, using the memory +allocation functions in the context, if set, to get the necessary memory. +Otherwise \fBmalloc()\fP is used. The result is NULL if the memory cannot be +obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_general_context_create.3 b/doc/pcre2_general_context_create.3 new file mode 100644 index 0000000..e204a95 --- /dev/null +++ b/doc/pcre2_general_context_create.3 @@ -0,0 +1,32 @@ +.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a general context. The arguments define +custom memory management functions and a data value that is passed to them when +they are called. The \fBprivate_malloc()\fP function is used to get memory for +the context. If either of the first two arguments is NULL, the system memory +management function is used. The result is NULL if no memory could be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_general_context_free.3 b/doc/pcre2_general_context_free.3 new file mode 100644 index 0000000..df1aa1f --- /dev/null +++ b/doc/pcre2_general_context_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a general context, using the memory +freeing function within the context, if set. If the argument is NULL, the +function returns immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_error_message.3 b/doc/pcre2_get_error_message.3 new file mode 100644 index 0000000..3d3e0de --- /dev/null +++ b/doc/pcre2_get_error_message.3 @@ -0,0 +1,39 @@ +.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function provides a textual error message for each PCRE2 error code. +Compilation errors are positive numbers; UTF formatting errors and matching +errors are negative numbers. The arguments are: +.sp + \fIerrorcode\fP an error code (positive or negative) + \fIbuffer\fP where to put the message + \fIbufflen\fP the length of the buffer (code units) +.sp +The function returns the length of the message in code units, excluding the +trailing zero, or the negative error code PCRE2_ERROR_NOMEMORY if the buffer is +too small. In this case, the returned message is truncated (but still with a +trailing zero). If \fIerrorcode\fP does not contain a recognized error code +number, the negative value PCRE2_ERROR_BADDATA is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_mark.3 b/doc/pcre2_get_mark.3 new file mode 100644 index 0000000..dce377d --- /dev/null +++ b/doc/pcre2_get_mark.3 @@ -0,0 +1,34 @@ +.TH PCRE2_GET_MARK 3 "13 October 2017" "PCRE2 10.31" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +After a call of \fBpcre2_match()\fP that was passed the match block that is +this function's argument, this function returns a pointer to the last (*MARK), +(*PRUNE), or (*THEN) name that was encountered during the matching process. The +name is zero-terminated, and is within the compiled pattern. The length of the +name is in the preceding code unit. If no name is available, NULL is returned. +.P +After a successful match, the name that is returned is the last one on the +matching path. After a failed match or a partial match, the last encountered +name is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_match_data_heapframes_size.3 b/doc/pcre2_get_match_data_heapframes_size.3 new file mode 100644 index 0000000..1eba42b --- /dev/null +++ b/doc/pcre2_get_match_data_heapframes_size.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the size, in bytes, of the heapframes data block that is +owned by its argument. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_match_data_size.3 b/doc/pcre2_get_match_data_size.3 new file mode 100644 index 0000000..cf5fa5e --- /dev/null +++ b/doc/pcre2_get_match_data_size.3 @@ -0,0 +1,27 @@ +.TH PCRE2_GET_MATCH_DATA_SIZE 3 "16 July 2019" "PCRE2 10.34" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the size, in bytes, of the match data block that is its +argument. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_ovector_count.3 b/doc/pcre2_get_ovector_count.3 new file mode 100644 index 0000000..3f6d748 --- /dev/null +++ b/doc/pcre2_get_ovector_count.3 @@ -0,0 +1,27 @@ +.TH PCRE2_GET_OVECTOR_COUNT 3 "24 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the number of pairs of offsets in the ovector that forms +part of the given match data block. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_ovector_pointer.3 b/doc/pcre2_get_ovector_pointer.3 new file mode 100644 index 0000000..261d652 --- /dev/null +++ b/doc/pcre2_get_ovector_pointer.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GET_OVECTOR_POINTER 3 "24 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns a pointer to the vector of offsets that forms part of the +given match data block. The number of pairs can be found by calling +\fBpcre2_get_ovector_count()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_get_startchar.3 b/doc/pcre2_get_startchar.3 new file mode 100644 index 0000000..c6ac8b0 --- /dev/null +++ b/doc/pcre2_get_startchar.3 @@ -0,0 +1,32 @@ +.TH PCRE2_GET_STARTCHAR 3 "24 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +After a successful call of \fBpcre2_match()\fP that was passed the match block +that is this function's argument, this function returns the code unit offset of +the character at which the successful match started. For a non-partial match, +this can be different to the value of \fIovector[0]\fP if the pattern contains +the \eK escape sequence. After a partial match, however, this value is always +the same as \fIovector[0]\fP because \eK does not affect the result of a +partial match. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_jit_compile.3 b/doc/pcre2_jit_compile.3 new file mode 100644 index 0000000..6cc1788 --- /dev/null +++ b/doc/pcre2_jit_compile.3 @@ -0,0 +1,51 @@ +.TH PCRE2_JIT_COMPILE 3 "29 July 2019" "PCRE2 10.34" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function requests JIT compilation, which, if the just-in-time compiler is +available, further processes a compiled pattern into machine code that executes +much faster than the \fBpcre2_match()\fP interpretive matching function. Full +details are given in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +The first argument is a pointer that was returned by a successful call to +\fBpcre2_compile()\fP, and the second must contain one or more of the following +bits: +.sp + PCRE2_JIT_COMPLETE compile code for full matching + PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching + PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching +.sp +There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been +superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old +option is deprecated and may be removed in the future. +.P +The yield of the function is 0 for success, or a negative error code otherwise. +In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or +if an unknown bit is set in \fIoptions\fP. The function can also return +PCRE2_ERROR_NOMEMORY if JIT is unable to allocate executable memory for the +compiler, even if it was because of a system security restriction. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_jit_free_unused_memory.3 b/doc/pcre2_jit_free_unused_memory.3 new file mode 100644 index 0000000..183bba0 --- /dev/null +++ b/doc/pcre2_jit_free_unused_memory.3 @@ -0,0 +1,31 @@ +.TH PCRE2_JIT_FREE_UNUSED_MEMORY 3 "27 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees unused JIT executable memory. The argument is a general +context, for custom memory management, or NULL for standard memory management. +JIT memory allocation retains some memory in order to improve future JIT +compilation speed. In low memory conditions, +\fBpcre2_jit_free_unused_memory()\fP can be used to cause this memory to be +freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_jit_match.3 b/doc/pcre2_jit_match.3 new file mode 100644 index 0000000..72c4bc1 --- /dev/null +++ b/doc/pcre2_jit_match.3 @@ -0,0 +1,58 @@ +.TH PCRE2_JIT_MATCH 3 "20 January 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression that has been successfully +processed by the JIT compiler against a given subject string, using a matching +algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and +it bypasses some of the sanity checks that \fBpcre2_match()\fP applies. +.P +In UTF mode, the subject string is not checked for UTF validity. Unless +PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an +invalid UTF string results in undefined behaviour. Your program may crash or +loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should only call \fBpcre2_jit_match()\fP in UTF mode if you are sure the +subject is valid. +.P +The arguments for \fBpcre2_jit_match()\fP are exactly the same as for +.\" HREF +\fBpcre2_match()\fP, +.\" +except that the subject string must be specified with a length; +PCRE2_ZERO_TERMINATED is not supported. +.P +The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported +options are ignored. +.P +The return values are the same as for \fBpcre2_match()\fP plus +PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested +that was not compiled. For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the JIT API in the +.\" HREF +\fBpcre2jit\fP +.\" +page. diff --git a/doc/pcre2_jit_stack_assign.3 b/doc/pcre2_jit_stack_assign.3 new file mode 100644 index 0000000..33d2e1c --- /dev/null +++ b/doc/pcre2_jit_stack_assign.3 @@ -0,0 +1,59 @@ +.TH PCRE2_JIT_STACK_ASSIGN 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function provides control over the memory used by JIT as a run-time stack +when \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP is called with a pattern +that has been successfully processed by the JIT compiler. The information that +determines which stack is used is put into a match context that is subsequently +passed to a matching function. The arguments of this function are: +.sp + mcontext a pointer to a match context + callback a callback function + callback_data a JIT stack or a value to be passed to the callback +.P +If \fImcontext\fP is NULL, the function returns immediately, without doing +anything. +.P +If \fIcallback\fP is NULL and \fIcallback_data\fP is NULL, an internal 32KiB +block on the machine stack is used. +.P +If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL, +\fIcallback_data\fP must be a valid JIT stack, the result of calling +\fBpcre2_jit_stack_create()\fP. +.P +If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an +argument at the start of matching, in order to set up a JIT stack. If the +result is NULL, the internal 32KiB stack is used; otherwise the return value +must be a valid JIT stack, the result of calling +\fBpcre2_jit_stack_create()\fP. +.P +You may safely use the same JIT stack for multiple patterns, as long as they +are all matched in the same thread. In a multithread application, each thread +must use its own JIT stack. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_jit_stack_create.3 b/doc/pcre2_jit_stack_create.3 new file mode 100644 index 0000000..74cdb1d --- /dev/null +++ b/doc/pcre2_jit_stack_create.3 @@ -0,0 +1,40 @@ +.TH PCRE2_JIT_STACK_CREATE 3 "24 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is used to create a stack for use by the code compiled by the JIT +compiler. The first two arguments are a starting size for the stack, and a +maximum size to which it is allowed to grow. The final argument is a general +context, for memory allocation functions, or NULL for standard memory +allocation. The result can be passed to the JIT run-time code by calling +\fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern, +which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP. +A maximum stack size of 512KiB to 1MiB should be more than enough for any +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_jit_stack_free.3 b/doc/pcre2_jit_stack_free.3 new file mode 100644 index 0000000..2131a79 --- /dev/null +++ b/doc/pcre2_jit_stack_free.3 @@ -0,0 +1,32 @@ +.TH PCRE2_JIT_STACK_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +. +.SH DESCRIPTION +.rs +.sp +This function is used to free a JIT stack that was created by +\fBpcre2_jit_stack_create()\fP when it is no longer needed. If the argument is +NULL, the function returns immediately without doing anything. For more +details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_maketables.3 b/doc/pcre2_maketables.3 new file mode 100644 index 0000000..7dc8438 --- /dev/null +++ b/doc/pcre2_maketables.3 @@ -0,0 +1,36 @@ +.TH PCRE2_MAKETABLES 3 "17 April 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +. +.SH DESCRIPTION +.rs +.sp +This function builds a set of character tables for character code points that +are less than 256. These can be passed to \fBpcre2_compile()\fP in a compile +context in order to override the internal, built-in tables (which were either +defaulted or made by \fBpcre2_maketables()\fP when PCRE2 was compiled). See the +.\" HREF +\fBpcre2_set_character_tables()\fP +.\" +page. You might want to do this if you are using a non-standard locale. +.P +If the argument is NULL, \fBmalloc()\fP is used to get memory for the tables. +Otherwise it must point to a general context, which can supply pointers to a +custom memory manager. The function yields a pointer to the tables. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_maketables_free.3 b/doc/pcre2_maketables_free.3 new file mode 100644 index 0000000..07986b9 --- /dev/null +++ b/doc/pcre2_maketables_free.3 @@ -0,0 +1,31 @@ +.TH PCRE2_MAKETABLES_FREE 3 "02 September 2019" "PCRE2 10.34" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function discards a set of character tables that were created by a call +to +.\" HREF +\fBpcre2_maketables()\fP. +.\" +.P +The \fIgcontext\fP parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system \fBfree()\fP is used. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page. diff --git a/doc/pcre2_match.3 b/doc/pcre2_match.3 new file mode 100644 index 0000000..efdd892 --- /dev/null +++ b/doc/pcre2_match.3 @@ -0,0 +1,86 @@ +.TH PCRE2_MATCH 3 "27 January 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It returns +offsets to what it has matched and to captured substrings via the +\fBmatch_data\fP block, which can be processed by functions with names that +start with \fBpcre2_get_ovector_...()\fP or \fBpcre2_substring_...()\fP. The +return from \fBpcre2_match()\fP is one more than the highest numbered capturing +pair that has been set (for example, 1 if there are no captures), zero if the +vector of offsets is too small, or a negative error code for no match and other +errors. The function arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, for results + \fImcontext\fP Points to a match context, or is NULL +.sp +A match context is needed only if you want to: +.sp + Set up a callout function + Set a matching offset limit + Change the heap memory limit + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match +.sp +The \fIlength\fP and \fIstartoffset\fP values are code units, not characters. +The length may be given as PCRE2_ZERO_TERMINATED for a subject that is +terminated by a binary zero code unit. The options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_COPY_MATCHED_SUBJECT + On success, make a private subject copy + PCRE2_DISABLE_RECURSELOOP_CHECK + Only useful in rare cases; use with care + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_NOTBOL Subject string is not the beginning of a line + PCRE2_NOTEOL Subject string is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check the subject for UTF + validity (only relevant if PCRE2_UTF + was set at compile time) +.\" JOIN + PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial + match even if there is a full match +.\" JOIN + PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial + match if no full matches are found +.sp +For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_context_copy.3 b/doc/pcre2_match_context_copy.3 new file mode 100644 index 0000000..26c33a6 --- /dev/null +++ b/doc/pcre2_match_context_copy.3 @@ -0,0 +1,29 @@ +.TH PCRE2_MATCH_CONTEXT_COPY 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a match context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_context_create.3 b/doc/pcre2_match_context_create.3 new file mode 100644 index 0000000..d4a2665 --- /dev/null +++ b/doc/pcre2_match_context_create.3 @@ -0,0 +1,30 @@ +.TH PCRE2_MATCH_CONTEXT_CREATE 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a new match context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_context_free.3 b/doc/pcre2_match_context_free.3 new file mode 100644 index 0000000..7d19f98 --- /dev/null +++ b/doc/pcre2_match_context_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_MATCH_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a match context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_data_create.3 b/doc/pcre2_match_data_create.3 new file mode 100644 index 0000000..439dea3 --- /dev/null +++ b/doc/pcre2_match_data_create.3 @@ -0,0 +1,37 @@ +.TH PCRE2_MATCH_DATA_CREATE 3 "28 August 2021" "PCRE2 10.38" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates a new match data block, which is used for holding the +result of a match. The first argument specifies the number of pairs of offsets +that are required. These form the "output vector" (ovector) within the match +data block, and are used to identify the matched string and any captured +substrings when matching with \fBpcre2_match()\fP, or a number of different +matches at the same point when used with \fBpcre2_dfa_match()\fP. There is +always one pair of offsets; if \fBovecsize\fP is zero, it is treated as one. +.P +The second argument points to a general context, for custom memory management, +or is NULL for system memory management. The result of the function is NULL if +the memory for the block could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_data_create_from_pattern.3 b/doc/pcre2_match_data_create_from_pattern.3 new file mode 100644 index 0000000..829bf6c --- /dev/null +++ b/doc/pcre2_match_data_create_from_pattern.3 @@ -0,0 +1,40 @@ +.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "28 August 2021" "PCRE2 10.38" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates a new match data block for holding the result of a match. +The first argument points to a compiled pattern. The number of capturing +parentheses within the pattern is used to compute the number of pairs of +offsets that are required in the match data block. These form the "output +vector" (ovector) within the match data block, and are used to identify the +matched string and any captured substrings when matching with +\fBpcre2_match()\fP. If you are using \fBpcre2_dfa_match()\fP, which uses the +output vector in a different way, you should use \fBpcre2_match_data_create()\fP +instead of this function. +.P +The second argument points to a general context, for custom memory management, +or is NULL to use the same memory allocator as was used for the compiled +pattern. The result of the function is NULL if the memory for the block could +not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_match_data_free.3 b/doc/pcre2_match_data_free.3 new file mode 100644 index 0000000..202035a --- /dev/null +++ b/doc/pcre2_match_data_free.3 @@ -0,0 +1,35 @@ +.TH PCRE2_MATCH_DATA_FREE 3 "18 January 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +If \fImatch_data\fP is NULL, this function does nothing. Otherwise, +\fImatch_data\fP must point to a match data block, which this function frees, +using the memory freeing function from the general context or compiled pattern +with which it was created, or \fBfree()\fP if that was not set. If the match +data block was previously passed to \fBpcre2_match()\fP, it will have an +attached heapframe vector; this is also freed. +.P +If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +match data block, the copy of the subject that was referenced within the block +is also freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_pattern_convert.3 b/doc/pcre2_pattern_convert.3 new file mode 100644 index 0000000..b72acb7 --- /dev/null +++ b/doc/pcre2_pattern_convert.3 @@ -0,0 +1,55 @@ +.TH PCRE2_PATTERN_CONVERT 3 "11 July 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It converts a foreign pattern (for example, a glob) into a PCRE2 regular +expression pattern. Its arguments are: +.sp + \fIpattern\fP The foreign pattern + \fIlength\fP The length of the input pattern or PCRE2_ZERO_TERMINATED + \fIoptions\fP Option bits + \fIbuffer\fP Pointer to pointer to output buffer, or NULL + \fIblength\fP Pointer to output length field + \fIcvcontext\fP Pointer to a convert context or NULL +.sp +The length of the converted pattern (excluding the terminating zero) is +returned via \fIblength\fP. If \fIbuffer\fP is NULL, the function just returns +the output length. If \fIbuffer\fP points to a NULL pointer, heap memory is +obtained for the converted pattern, using the allocator in the context if +present (or else \fBmalloc()\fP), and the field pointed to by \fIbuffer\fP is +updated. If \fIbuffer\fP points to a non-NULL field, that must point to a +buffer whose size is in the variable pointed to by \fIblength\fP. This value is +updated. +.P +The option bits are: +.sp + PCRE2_CONVERT_UTF Input is UTF + PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity + PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern + PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern + PCRE2_CONVERT_GLOB ) Convert + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types + PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob +.sp +The return value from \fBpcre2_pattern_convert()\fP is zero on success or a +non-zero PCRE2 error code. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_pattern_info.3 b/doc/pcre2_pattern_info.3 new file mode 100644 index 0000000..edd8989 --- /dev/null +++ b/doc/pcre2_pattern_info.3 @@ -0,0 +1,108 @@ +.TH PCRE2_PATTERN_INFO 3 "14 February 2019" "PCRE2 10.33" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, +.B " void *\fIwhere\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns information about a compiled pattern. Its arguments are: +.sp + \fIcode\fP Pointer to a compiled regular expression pattern + \fIwhat\fP What information is required + \fIwhere\fP Where to put the information +.sp +The recognized values for the \fIwhat\fP argument, and the information they +request are as follows: +.sp + PCRE2_INFO_ALLOPTIONS Final options after compiling + PCRE2_INFO_ARGOPTIONS Options passed to \fBpcre2_compile()\fP + PCRE2_INFO_BACKREFMAX Number of highest backreference + PCRE2_INFO_BSR What \eR matches: + PCRE2_BSR_UNICODE: Unicode line endings + PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only + PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns +.\" JOIN + PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set, + otherwise PCRE2_ERROR_UNSET + PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the + compile context + PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL + PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information + 0 nothing set + 1 first code unit is set + 2 start of string or after newline + PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 + PCRE2_INFO_FRAMESIZE Size of backtracking frame + PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \eC +.\" JOIN + PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches + exist in the pattern +.\" JOIN + PCRE2_INFO_HEAPLIMIT Heap memory limit if set, + otherwise PCRE2_ERROR_UNSET + PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used + PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 + PCRE2_INFO_LASTCODETYPE Type of must-be-present information + 0 nothing set + 1 code unit is set + PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 +.\" JOIN + PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an + empty string, 0 otherwise +.\" JOIN + PCRE2_INFO_MATCHLIMIT Match limit if set, + otherwise PCRE2_ERROR_UNSET +.\" JOIN + PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest + lookbehind assertion + PCRE2_INFO_MINLENGTH Lower bound length of matching strings + PCRE2_INFO_NAMECOUNT Number of named subpatterns + PCRE2_INFO_NAMEENTRYSIZE Size of name table entries + PCRE2_INFO_NAMETABLE Pointer to name table + PCRE2_CONFIG_NEWLINE Code for the newline sequence: + PCRE2_NEWLINE_CR + PCRE2_NEWLINE_LF + PCRE2_NEWLINE_CRLF + PCRE2_NEWLINE_ANY + PCRE2_NEWLINE_ANYCRLF + PCRE2_NEWLINE_NUL + PCRE2_INFO_RECURSIONLIMIT Obsolete synonym for PCRE2_INFO_DEPTHLIMIT + PCRE2_INFO_SIZE Size of compiled pattern +.sp +If \fIwhere\fP is NULL, the function returns the amount of memory needed for +the requested information, in bytes. Otherwise, the \fIwhere\fP argument must +point to an unsigned 32-bit integer (uint32_t variable), except for the +following \fIwhat\fP values, when it must point to a variable of the type +shown: +.sp + PCRE2_INFO_FIRSTBITMAP const uint8_t * + PCRE2_INFO_JITSIZE size_t + PCRE2_INFO_NAMETABLE PCRE2_SPTR + PCRE2_INFO_SIZE size_t +.sp +The yield of the function is zero on success or: +.sp + PCRE2_ERROR_NULL the argument \fIcode\fP is NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of \fIwhat\fP is invalid + PCRE2_ERROR_BADMODE the pattern was compiled in the wrong mode + PCRE2_ERROR_UNSET the requested information is not set +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_serialize_decode.3 b/doc/pcre2_serialize_decode.3 new file mode 100644 index 0000000..611113f --- /dev/null +++ b/doc/pcre2_serialize_decode.3 @@ -0,0 +1,53 @@ +.TH PCRE2_SERIALIZE_DECODE 3 "27 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function decodes a serialized set of compiled patterns back into a list of +individual patterns. This is possible only on a host that is running the same +version of PCRE2, with the same code unit width, and the host must also have +the same endianness, pointer width and PCRE2_SIZE type. The arguments for +\fBpcre2_serialize_decode()\fP are: +.sp + \fIcodes\fP pointer to a vector in which to build the list + \fInumber_of_codes\fP number of slots in the vector + \fIbytes\fP the serialized byte stream + \fIgcontext\fP pointer to a general context or NULL +.sp +The \fIbytes\fP argument must point to a block of data that was originally +created by \fBpcre2_serialize_encode()\fP, though it may have been saved on +disc or elsewhere in the meantime. If there are more codes in the serialized +data than slots in the list, only those compiled patterns that will fit are +decoded. The yield of the function is the number of decoded patterns, or one of +the following negative error codes: +.sp + PCRE2_ERROR_BADDATA \fInumber_of_codes\fP is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in \fIbytes\fP + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_NULL \fIcodes\fP or \fIbytes\fP is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/doc/pcre2_serialize_encode.3 b/doc/pcre2_serialize_encode.3 new file mode 100644 index 0000000..d529360 --- /dev/null +++ b/doc/pcre2_serialize_encode.3 @@ -0,0 +1,54 @@ +.TH PCRE2_SERIALIZE_ENCODE 3 "27 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function encodes a list of compiled patterns into a byte stream that can +be saved on disc or elsewhere. Note that this is not an abstract format like +Java or .NET. Conversion of the byte stream back into usable compiled patterns +can only happen on a host that is running the same version of PCRE2, with the +same code unit width, and the host must also have the same endianness, pointer +width and PCRE2_SIZE type. The arguments for \fBpcre2_serialize_encode()\fP +are: +.sp + \fIcodes\fP pointer to a vector containing the list + \fInumber_of_codes\fP number of slots in the vector + \fIserialized_bytes\fP set to point to the serialized byte stream + \fIserialized_size\fP set to the number of bytes in the byte stream + \fIgcontext\fP pointer to a general context or NULL +.sp +The context argument is used to obtain memory for the byte stream. When the +serialized data is no longer needed, it must be freed by calling +\fBpcre2_serialize_free()\fP. The yield of the function is the number of +serialized patterns, or one of the following negative error codes: +.sp + PCRE2_ERROR_BADDATA \fInumber_of_codes\fP is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL an argument other than \fIgcontext\fP is NULL +.sp +PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/doc/pcre2_serialize_free.3 b/doc/pcre2_serialize_free.3 new file mode 100644 index 0000000..2c43824 --- /dev/null +++ b/doc/pcre2_serialize_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_SERIALIZE_FREE 3 "27 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory that was obtained by +\fBpcre2_serialize_encode()\fP to hold a serialized byte stream. The argument +must point to such a byte stream or be NULL, in which case the function returns +without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/doc/pcre2_serialize_get_number_of_codes.3 b/doc/pcre2_serialize_get_number_of_codes.3 new file mode 100644 index 0000000..f5eea54 --- /dev/null +++ b/doc/pcre2_serialize_get_number_of_codes.3 @@ -0,0 +1,37 @@ +.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "27 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +The \fIbytes\fP argument must point to a serialized byte stream that was +originally created by \fBpcre2_serialize_encode()\fP (though it may have been +saved on disc or elsewhere in the meantime). The function returns the number of +serialized patterns in the byte stream, or one of the following negative error +codes: +.sp + PCRE2_ERROR_BADMAGIC mismatch of id bytes in \fIbytes\fP + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version + PCRE2_ERROR_NULL the argument is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/doc/pcre2_set_bsr.3 b/doc/pcre2_set_bsr.3 new file mode 100644 index 0000000..ecf2437 --- /dev/null +++ b/doc/pcre2_set_bsr.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_BSR 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the convention for processing \eR within a compile context. +The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The +result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_callout.3 b/doc/pcre2_set_callout.3 new file mode 100644 index 0000000..cb48e14 --- /dev/null +++ b/doc/pcre2_set_callout.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_CALLOUT 3 "21 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the callout fields in a match context (the first argument). +The second argument specifies a callout function, and the third argument is an +opaque data item that is passed to it. The result of this function is always +zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_character_tables.3 b/doc/pcre2_set_character_tables.3 new file mode 100644 index 0000000..1ca4134 --- /dev/null +++ b/doc/pcre2_set_character_tables.3 @@ -0,0 +1,35 @@ +.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets a pointer to custom character tables within a compile +context. The second argument must point to a set of PCRE2 character tables or +be NULL to request the default tables. The result is always zero. Character +tables can be created by calling \fBpcre2_maketables()\fP or by running the +\fBpcre2_dftables\fP maintenance command in binary mode (see the +.\" HREF +\fBpcre2build\fP +.\" +documentation). +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_compile_extra_options.3 b/doc/pcre2_set_compile_extra_options.3 new file mode 100644 index 0000000..a1e07e9 --- /dev/null +++ b/doc/pcre2_set_compile_extra_options.3 @@ -0,0 +1,55 @@ +.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "03 February 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets additional option bits for \fBpcre2_compile()\fP that are +housed in a compile context. It completely replaces all the bits. The extra +options are: +.sp + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \eK in lookarounds +.\" JOIN + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{d800} to \ex{dfff} + in UTF-8 and UTF-32 modes +.\" JOIN + PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and + \ex handling + PCRE2_EXTRA_ASCII_BSD \ed remains ASCII in UCP mode + PCRE2_EXTRA_ASCII_BSS \es remains ASCII in UCP mode + PCRE2_EXTRA_ASCII_BSW \ew remains ASCII in UCP mode +.\" JOIN + PCRE2_EXTRA_ASCII_DIGIT [:digit:] and [:xdigit:] POSIX classes + remain ASCII in UCP mode +.\" JOIN + PCRE2_EXTRA_ASCII_POSIX POSIX classes remain ASCII in + UCP mode +.\" JOIN + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as + a literal following character +.\" JOIN + PCRE2_EXTRA_CASELESS_RESTRICT Disable mixed ASCII/non-ASCII + case folding + PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \er as \en + PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines + PCRE2_EXTRA_MATCH_WORD Pattern matches "words" +.sp +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_compile_recursion_guard.3 b/doc/pcre2_set_compile_recursion_guard.3 new file mode 100644 index 0000000..0575f94 --- /dev/null +++ b/doc/pcre2_set_compile_recursion_guard.3 @@ -0,0 +1,34 @@ +.TH PCRE2_SET_COMPILE_RECURSION_GUARD 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function defines, within a compile context, a function that is called +whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a +pattern. The first argument to the function gives the current depth of +parenthesis nesting, and the second is user data that is supplied when the +function is set up. The callout function should return zero if all is well, or +non-zero to force an error. This feature is provided so that applications can +check the available system stack space, in order to avoid running out. The +result of \fBpcre2_set_compile_recursion_guard()\fP is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_depth_limit.3 b/doc/pcre2_set_depth_limit.3 new file mode 100644 index 0000000..62bc7fe --- /dev/null +++ b/doc/pcre2_set_depth_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_DEPTH_LIMIT 3 "25 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the backtracking depth limit field in a match context. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_glob_escape.3 b/doc/pcre2_set_glob_escape.3 new file mode 100644 index 0000000..d5637af --- /dev/null +++ b/doc/pcre2_set_glob_escape.3 @@ -0,0 +1,29 @@ +.TH PCRE2_SET_GLOB_ESCAPE 3 "11 July 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It sets the escape character that is used when converting globs. The second +argument must either be zero (meaning there is no escape character) or a +punctuation character whose code point is less than 256. The default is grave +accent if running under Windows, otherwise backslash. The result of the +function is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_set_glob_separator.3 b/doc/pcre2_set_glob_separator.3 new file mode 100644 index 0000000..5d78c09 --- /dev/null +++ b/doc/pcre2_set_glob_separator.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_GLOB_SEPARATOR 3 "11 July 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It sets the component separator character that is used when converting globs. +The second argument must be one of the characters forward slash, backslash, or +dot. The default is backslash when running under Windows, otherwise forward +slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if +the second argument is invalid. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/doc/pcre2_set_heap_limit.3 b/doc/pcre2_set_heap_limit.3 new file mode 100644 index 0000000..7c155a2 --- /dev/null +++ b/doc/pcre2_set_heap_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_HEAP_LIMIT 3 "11 April 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the backtracking heap limit field in a match context. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_match_limit.3 b/doc/pcre2_set_match_limit.3 new file mode 100644 index 0000000..523e97f --- /dev/null +++ b/doc/pcre2_set_match_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_MATCH_LIMIT 3 "24 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the match limit field in a match context. The result is +always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_max_pattern_compiled_length.3 b/doc/pcre2_set_max_pattern_compiled_length.3 new file mode 100644 index 0000000..472a7bb --- /dev/null +++ b/doc/pcre2_set_max_pattern_compiled_length.3 @@ -0,0 +1,32 @@ +.TH PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH 3 "24 April 2024" "PCRE2 10.44" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum size (in bytes) for the +memory needed to hold the compiled version of a pattern that is compiled with +this context. The result is always zero. If a pattern that is passed to +\fBpcre2_compile()\fP with this context needs more memory, an error is +generated. The default is the largest number that a PCRE2_SIZE variable can +hold, which is effectively unlimited. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_max_pattern_length.3 b/doc/pcre2_set_max_pattern_length.3 new file mode 100644 index 0000000..7aa01c7 --- /dev/null +++ b/doc/pcre2_set_max_pattern_length.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 October 2016" "PCRE2 10.23" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to \fBpcre2_compile()\fP there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_max_varlookbehind.3 b/doc/pcre2_set_max_varlookbehind.3 new file mode 100644 index 0000000..84639d9 --- /dev/null +++ b/doc/pcre2_set_max_varlookbehind.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_NEWLINE 3 "09 August 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_varlookbehind(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. The result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_newline.3 b/doc/pcre2_set_newline.3 new file mode 100644 index 0000000..0bccfc7 --- /dev/null +++ b/doc/pcre2_set_newline.3 @@ -0,0 +1,39 @@ +.TH PCRE2_SET_NEWLINE 3 "26 May 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the newline convention within a compile context. This +specifies which character(s) are recognized as newlines when compiling and +matching patterns. The second argument must be one of: +.sp + PCRE2_NEWLINE_CR Carriage return only + PCRE2_NEWLINE_LF Linefeed only + PCRE2_NEWLINE_CRLF CR followed by LF only + PCRE2_NEWLINE_ANYCRLF Any of the above + PCRE2_NEWLINE_ANY Any Unicode newline sequence + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_offset_limit.3 b/doc/pcre2_set_offset_limit.3 new file mode 100644 index 0000000..20fa104 --- /dev/null +++ b/doc/pcre2_set_offset_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_OFFSET_LIMIT 3 "22 September 2015" "PCRE2 10.21" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the offset limit field in a match context. The result is +always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_parens_nest_limit.3 b/doc/pcre2_set_parens_nest_limit.3 new file mode 100644 index 0000000..0367619 --- /dev/null +++ b/doc/pcre2_set_parens_nest_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_PARENS_NEST_LIMIT 3 "22 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum depth of nested +parentheses in a pattern. The result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_recursion_limit.3 b/doc/pcre2_set_recursion_limit.3 new file mode 100644 index 0000000..26f4257 --- /dev/null +++ b/doc/pcre2_set_recursion_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_RECURSION_LIMIT 3 "25 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is obsolete and should not be used in new code. Use +\fBpcre2_set_depth_limit()\fP instead. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_recursion_memory_management.3 b/doc/pcre2_set_recursion_memory_management.3 new file mode 100644 index 0000000..743c288 --- /dev/null +++ b/doc/pcre2_set_recursion_memory_management.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_RECURSION_MEMORY_MANAGEMENT 3 "25 March 2017" "PCRE2 10.30" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_recursion_memory_management( +.B " pcre2_match_context *\fImcontext\fP," +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +From release 10.30 onwards, this function is obsolete and does nothing. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_set_substitute_callout.3 b/doc/pcre2_set_substitute_callout.3 new file mode 100644 index 0000000..cdd7ac6 --- /dev/null +++ b/doc/pcre2_set_substitute_callout.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "12 November 2018" "PCRE2 10.33" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the substitute callout fields in a match context (the first +argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substitute.3 b/doc/pcre2_substitute.3 new file mode 100644 index 0000000..7ee4b6a --- /dev/null +++ b/doc/pcre2_substitute.3 @@ -0,0 +1,110 @@ +.TH PCRE2_SUBSTITUTE 3 "22 January 2020" "PCRE2 10.35" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It then makes a +copy of the subject, substituting a replacement string for what was matched. +Its arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, or is NULL + \fImcontext\fP Points to a match context, or is NULL + \fIreplacement\fP Points to the replacement string + \fIrlength\fP Length of the replacement string + \fIoutputbuffer\fP Points to the output buffer + \fIoutlengthptr\fP Points to the length of the output buffer +.sp +A match data block is needed only if you want to inspect the data from the +final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is +set. A match context is needed only if you want to: +.sp + Set up a callout function + Set a matching offset limit + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management in the match context +.sp +The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code units, +not characters, as is the contents of the variable pointed at by +\fIoutlengthptr\fP. This variable must contain the length of the output buffer +when the function is called. If the function is successful, the value is +changed to the length of the new string, excluding the trailing zero that is +automatically added. +.P +The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for +zero-terminated strings. The options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Match only at end of subject +.\" JOIN + PCRE2_NOTBOL Subject is not the beginning of a + line + PCRE2_NOTEOL Subject is not the end of a line +.\" JOIN + PCRE2_NOTEMPTY An empty string is not a + valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of + the subject is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check for UTF validity in + the subject or replacement +.\" JOIN + (only relevant if PCRE2_UTF was + set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing +.\" JOIN + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the + subject + PCRE2_SUBSTITUTE_LITERAL The replacement string is literal +.\" JOIN + PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for + first match + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s) + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string +.sp +If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, +PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. +.P +If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its +contents must be the result of a call to \fBpcre2_match()\fP using the same +pattern and subject. +.P +The function returns the number of substitutions, which may be zero if there +are no matches. The result may be greater than one only when +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_copy_byname.3 b/doc/pcre2_substring_copy_byname.3 new file mode 100644 index 0000000..d2af63b --- /dev/null +++ b/doc/pcre2_substring_copy_byname.3 @@ -0,0 +1,46 @@ +.TH PCRE2_SUBSTRING_COPY_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring, identified +by name, into a given buffer. The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fIname\fP Name of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbufflen\fP Length of buffer (code units) +.sp +The \fIbufflen\fP variable is updated to contain the length of the extracted +string, excluding the trailing zero. The yield of the function is zero for +success or one of the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that name + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY the buffer is not big enough +.sp +If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_copy_bynumber.3 b/doc/pcre2_substring_copy_bynumber.3 new file mode 100644 index 0000000..4cee2b4 --- /dev/null +++ b/doc/pcre2_substring_copy_bynumber.3 @@ -0,0 +1,44 @@ +.TH PCRE2_SUBSTRING_COPY_BYNUMBER 3 "13 December 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring into a given +buffer. The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fInumber\fP Number of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbufflen\fP Length of buffer +.sp +The \fIbufflen\fP variable is updated with the length of the extracted string, +excluding the terminating zero. The yield of the function is zero for success +or one of the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that number + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY the buffer is too small +.sp +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_free.3 b/doc/pcre2_substring_free.3 new file mode 100644 index 0000000..6d0fd58 --- /dev/null +++ b/doc/pcre2_substring_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SUBSTRING_FREE 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the memory obtained by a previous +call to \fBpcre2_substring_get_byname()\fP or +\fBpcre2_substring_get_bynumber()\fP. Its only argument is a pointer to the +string. If the argument is NULL, the function does nothing. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_get_byname.3 b/doc/pcre2_substring_get_byname.3 new file mode 100644 index 0000000..6c3f7d5 --- /dev/null +++ b/doc/pcre2_substring_get_byname.3 @@ -0,0 +1,48 @@ +.TH PCRE2_SUBSTRING_GET_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring by name into +newly acquired memory. The arguments are: +.sp + \fImatch_data\fP The match data for the match + \fIname\fP Name of the required substring + \fIbufferptr\fP Where to put the string pointer + \fIbufflen\fP Where to put the string length +.sp +The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function \fBpcre2_substring_free()\fP can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that name + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY memory could not be obtained +.sp +If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_get_bynumber.3 b/doc/pcre2_substring_get_bynumber.3 new file mode 100644 index 0000000..51b6a04 --- /dev/null +++ b/doc/pcre2_substring_get_bynumber.3 @@ -0,0 +1,45 @@ +.TH PCRE2_SUBSTRING_GET_BYNUMBER 3 "13 December 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring by number +into newly acquired memory. The arguments are: +.sp + \fImatch_data\fP The match data for the match + \fInumber\fP Number of the required substring + \fIbufferptr\fP Where to put the string pointer + \fIbufflen\fP Where to put the string length +.sp +The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function \fBpcre2_substring_free()\fP can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that number + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY memory could not be obtained +.sp +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_length_byname.3 b/doc/pcre2_substring_length_byname.3 new file mode 100644 index 0000000..84cdc6a --- /dev/null +++ b/doc/pcre2_substring_length_byname.3 @@ -0,0 +1,34 @@ +.TH PCRE2_SUBSTRING_LENGTH_BYNAME 3 "21 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the length of a matched substring, identified by name. +The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fIname\fP The substring name + \fIlength\fP Where to return the length +.sp +The yield is zero on success, or an error code if the substring is not found. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_length_bynumber.3 b/doc/pcre2_substring_length_bynumber.3 new file mode 100644 index 0000000..12778d6 --- /dev/null +++ b/doc/pcre2_substring_length_bynumber.3 @@ -0,0 +1,36 @@ +.TH PCRE2_SUBSTRING_LENGTH_BYNUMBER 3 "22 December 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the length of a matched substring, identified by number. +The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fInumber\fP The substring number + \fIlength\fP Where to return the length, or NULL +.sp +The third argument may be NULL if all you want to know is whether or not a +substring is set. The yield is zero on success, or a negative error code +otherwise. After a partial match, only substring 0 is available. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_list_free.3 b/doc/pcre2_substring_list_free.3 new file mode 100644 index 0000000..2c6fb02 --- /dev/null +++ b/doc/pcre2_substring_list_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SUBSTRING_LIST_FREE 3 "02 December 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the store obtained by a previous +call to \fBpcre2substring_list_get()\fP. Its only argument is a pointer to +the list of string pointers. If the argument is NULL, the function returns +immediately, without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_list_get.3 b/doc/pcre2_substring_list_get.3 new file mode 100644 index 0000000..bdc400e --- /dev/null +++ b/doc/pcre2_substring_list_get.3 @@ -0,0 +1,44 @@ +.TH PCRE2_SUBSTRING_LIST_GET 3 "21 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting all the captured substrings after +a pattern match. It builds a list of pointers to the strings, and (optionally) +a second list that contains their lengths (in code units), excluding a +terminating zero that is added to each of them. All this is done in a single +block of memory that is obtained using the same memory allocation function that +was used to get the match data block. The convenience function +\fBpcre2_substring_list_free()\fP can be used to free it when it is no longer +needed. The arguments are: +.sp + \fImatch_data\fP The match data block + \fIlistptr\fP Where to put a pointer to the list + \fIlengthsptr\fP Where to put a pointer to the lengths, or NULL +.sp +A pointer to a list of pointers is put in the variable whose address is in +\fIlistptr\fP. The list is terminated by a NULL pointer. If \fIlengthsptr\fP is +not NULL, a matching list of lengths is created, and its address is placed in +\fIlengthsptr\fP. The yield of the function is zero on success or +PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_nametable_scan.3 b/doc/pcre2_substring_nametable_scan.3 new file mode 100644 index 0000000..9ab58cd --- /dev/null +++ b/doc/pcre2_substring_nametable_scan.3 @@ -0,0 +1,41 @@ +.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "03 February 2019" "PCRE2 10.33" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds, for a compiled pattern, the first and last +entries for a given name in the table that translates capture group names into +numbers. +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose entries required + \fIfirst\fP Where to return a pointer to the first entry + \fIlast\fP Where to return a pointer to the last entry +.sp +When the name is found in the table, if \fIfirst\fP is NULL, the function +returns a group number, but if there is more than one matching entry, it is not +defined which one. Otherwise, when both pointers have been set, the yield of +the function is the length of each entry in code units. If the name is not +found, PCRE2_ERROR_NOSUBSTRING is returned. +.P +There is a complete description of the PCRE2 native API, including the format of +the table entries, in the +.\" HREF +\fBpcre2api\fP +.\" +page, and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2_substring_number_from_name.3 b/doc/pcre2_substring_number_from_name.3 new file mode 100644 index 0000000..b077b1d --- /dev/null +++ b/doc/pcre2_substring_number_from_name.3 @@ -0,0 +1,38 @@ +.TH PCRE2_SUBSTRING_NUMBER_FROM_NAME 3 "21 October 2014" "PCRE2 10.00" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds the number of a named substring capturing +parenthesis in a compiled pattern, provided that it is a unique name. The +function arguments are: +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose number is required +.sp +The yield of the function is the number of the parenthesis if the name is +found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are +allowed (PCRE2_DUPNAMES is set), if the name is not unique, +PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers +with the same name by calling \fBpcre2_substring_nametable_scan()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 new file mode 100644 index 0000000..6028d62 --- /dev/null +++ b/doc/pcre2api.3 @@ -0,0 +1,4187 @@ +.TH PCRE2API 3 "24 April 2024" "PCRE2 10.44" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.sp +.B #include +.sp +PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a +description of all its native functions. See the +.\" HREF +\fBpcre2\fP +.\" +document for an overview of all the PCRE2 documentation. +. +. +.SH "PCRE2 NATIVE API BASIC FUNCTIONS" +.rs +.sp +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.sp +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.sp +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +. +. +.SH "PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS" +.rs +.sp +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.sp +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +. +. +.SH "PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.sp +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +. +. +.SH "PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.sp +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.sp +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.sp +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP, +.B " uint32_t \fIvalue\fP); +.sp +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.sp +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.sp +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.sp +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.sp +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.sp +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.sp +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +.sp +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.fi +. +. +.SH "PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION" +.rs +.sp +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementz\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API JIT FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.sp +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.sp +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.sp +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +.fi +. +. +.SH "PCRE2 NATIVE API SERIALIZATION FUNCTIONS" +.rs +.sp +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.sp +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +. +. +.SH "PCRE2 NATIVE API AUXILIARY FUNCTIONS" +.rs +.sp +.nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.sp +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.sp +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +.sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.sp +.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, +.B " void *\fIwhere\fP);" +.sp +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.sp +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +.fi +. +. +.SH "PCRE2 NATIVE API OBSOLETE FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_recursion_memory_management( +.B " pcre2_match_context *\fImcontext\fP," +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +.sp +These functions became obsolete at release 10.30 and are retained only for +backward compatibility. They should not be used in new code. The first is +replaced by \fBpcre2_set_depth_limit()\fP; the second is no longer needed and +has no effect (it always returns zero). +. +. +.SH "PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS" +.rs +.sp +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.sp +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.sp +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.sp +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +.sp +These functions provide a way of converting non-PCRE2 patterns into +patterns that can be processed by \fBpcre2_compile()\fP. This facility is +experimental and may be changed in future releases. At present, "globs" and +POSIX basic and extended patterns can be converted. Details are given in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. +. +. +.SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES" +.rs +.sp +There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code +units, respectively. However, there is just one header file, \fBpcre2.h\fP. +This contains the function prototypes and other definitions for all three +libraries. One, two, or all three can be installed simultaneously. On Unix-like +systems the libraries are called \fBlibpcre2-8\fP, \fBlibpcre2-16\fP, and +\fBlibpcre2-32\fP, and they can also co-exist with the original PCRE libraries. +Every PCRE2 function comes in three different forms, one for each library, for +example: +.sp + \fBpcre2_compile_8()\fP + \fBpcre2_compile_16()\fP + \fBpcre2_compile_32()\fP +.sp +There are also three different sets of data types: +.sp + \fBPCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32\fP + \fBPCRE2_SPTR8, PCRE2_SPTR16, PCRE2_SPTR32\fP +.sp +The UCHAR types define unsigned code units of the appropriate widths. +For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. +The SPTR types are pointers to constants of the equivalent UCHAR types, +that is, they are pointers to vectors of unsigned code units. +.P +Character strings are passed to a PCRE2 library as sequences of unsigned +integers in code units of the appropriate width. The length of a string may +be given as a number of code units, or the string may be specified as +zero-terminated. +.P +Many applications use only one code unit width. For their convenience, macros +are defined whose names are the generic forms such as \fBpcre2_compile()\fP and +PCRE2_SPTR. These macros use the value of the macro PCRE2_CODE_UNIT_WIDTH to +generate the appropriate width-specific function and macro names. +PCRE2_CODE_UNIT_WIDTH is not defined by default. An application must define it +to be 8, 16, or 32 before including \fBpcre2.h\fP in order to make use of the +generic names. +.P +Applications that use more than one code unit width can be linked with more +than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to be 0 before +including \fBpcre2.h\fP, and then use the real function names. Any code that is +to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is +unknown should also use the real function names. (Unfortunately, it is not +possible in C code to save and restore the value of a macro.) +.P +If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a +compiler error occurs. +.P +When using multiple libraries in an application, you must take care when +processing any particular pattern to use only functions from a single library. +For example, if you want to run a match using a pattern that was compiled with +\fBpcre2_compile_16()\fP, you must do so with \fBpcre2_match_16()\fP, not +\fBpcre2_match_8()\fP or \fBpcre2_match_32()\fP. +.P +In the function summaries above, and in the rest of this document and other +PCRE2 documents, functions and data types are described using their generic +names, without the _8, _16, or _32 suffix. +. +. +.SH "PCRE2 API OVERVIEW" +.rs +.sp +PCRE2 has its own native API, which is described in this document. There are +also some wrapper functions for the 8-bit library that correspond to the +POSIX regular expression API, but they do not give access to all the +functionality of PCRE2 and they are not thread-safe. They are described in the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. Both these APIs define a set of C function calls. +.P +The native API C data types, function prototypes, option values, and error +codes are defined in the header file \fBpcre2.h\fP, which also contains +definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers +for the library. Applications can use these to include support for different +releases of PCRE2. +.P +In a Windows environment, if you want to statically link an application program +against a non-dll PCRE2 library, you must define PCRE2_STATIC before including +\fBpcre2.h\fP. +.P +The functions \fBpcre2_compile()\fP and \fBpcre2_match()\fP are used for +compiling and matching regular expressions in a Perl-compatible manner. A +sample program that demonstrates the simplest way of using them is provided in +the file called \fIpcre2demo.c\fP in the PCRE2 source distribution. A listing +of this program is given in the +.\" HREF +\fBpcre2demo\fP +.\" +documentation, and the +.\" HREF +\fBpcre2sample\fP +.\" +documentation describes how to compile and run it. +.P +The compiling and matching functions recognize various options that are passed +as bits in an options argument. There are also some more complicated parameters +such as custom memory management functions and resource limits that are passed +in "contexts" (which are just memory blocks, described below). Simple +applications do not need to make use of contexts. +.P +Just-in-time (JIT) compiler support is an optional feature of PCRE2 that can be +built in appropriate hardware environments. It greatly speeds up the matching +performance of many patterns. Programs can request that it be used if +available by calling \fBpcre2_jit_compile()\fP after a pattern has been +successfully compiled by \fBpcre2_compile()\fP. This does nothing if JIT +support is not available. +.P +More complicated programs might need to make use of the specialist functions +\fBpcre2_jit_stack_create()\fP, \fBpcre2_jit_stack_free()\fP, and +\fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage. +.P +JIT matching is automatically used by \fBpcre2_match()\fP if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance at the expense of less sanity +checking. The JIT-specific functions are discussed in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +A second matching function, \fBpcre2_dfa_match()\fP, which is not +Perl-compatible, is also provided. This uses a different algorithm for the +matching. The alternative algorithm finds all possible matches (at a given +point in the subject), and scans the subject just once (unless there are +lookaround assertions). However, this algorithm does not return captured +substrings. A description of the two matching algorithms and their advantages +and disadvantages is given in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. There is no JIT support for \fBpcre2_dfa_match()\fP. +.P +In addition to the main compiling and matching functions, there are convenience +functions for extracting captured substrings from a subject string that has +been matched by \fBpcre2_match()\fP. They are: +.sp + \fBpcre2_substring_copy_byname()\fP + \fBpcre2_substring_copy_bynumber()\fP + \fBpcre2_substring_get_byname()\fP + \fBpcre2_substring_get_bynumber()\fP + \fBpcre2_substring_list_get()\fP + \fBpcre2_substring_length_byname()\fP + \fBpcre2_substring_length_bynumber()\fP + \fBpcre2_substring_nametable_scan()\fP + \fBpcre2_substring_number_from_name()\fP +.sp +\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also +provided, to free memory used for extracted strings. If either of these +functions is called with a NULL argument, the function returns immediately +without doing anything. +.P +The function \fBpcre2_substitute()\fP can be called to match a pattern and +return a copy of the subject string with substitutions for parts that were +matched. +.P +Functions whose names begin with \fBpcre2_serialize_\fP are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +.P +Finally, there are functions for finding out information about a compiled +pattern (\fBpcre2_pattern_info()\fP) and about the configuration with which +PCRE2 was built (\fBpcre2_config()\fP). +.P +Functions with names ending with \fB_free()\fP are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. +. +. +.SH "STRING LENGTHS AND OFFSETS" +.rs +.sp +The PCRE2 API uses string lengths and offsets into strings of code units in +several places. These values are always of type PCRE2_SIZE, which is an +unsigned integer type, currently always defined as \fIsize_t\fP. The largest +value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved +as a special indicator for zero-terminated strings and unset offsets. +Therefore, the longest string that can be handled is one less than this +maximum. Note that string lengths are always given in code units. Only in the +8-bit library is such a length the same as the number of bytes in the string. +. +. +.\" HTML +.SH NEWLINES +.rs +.sp +PCRE2 supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The Unicode newline sequences are the three just +mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +.P +Each of the first three conventions is used by at least one operating system as +its standard newline sequence. When PCRE2 is built, a default can be specified. +If it is not, the default is set to LF, which is the Unix standard. However, +the newline convention can be changed by an application when calling +\fBpcre2_compile()\fP, or it can be specified by special text at the start of +the pattern itself; this overrides any other settings. See the +.\" HREF +\fBpcre2pattern\fP +.\" +page for details of the special character sequences. +.P +In the PCRE2 documentation the word "newline" is used to mean "the character or +pair of characters that indicate a line break". The choice of newline +convention affects the handling of the dot, circumflex, and dollar +metacharacters, the handling of #-comments in /x mode, and, when CRLF is a +recognized line ending sequence, the match position advancement for a +non-anchored pattern. There is more detail about this in the +.\" HTML +.\" +section on \fBpcre2_match()\fP options +.\" +below. +.P +The choice of newline convention does not affect the interpretation of +the \en or \er escape sequences, nor does it affect what \eR matches; this has +its own separate convention. +. +. +.SH MULTITHREADING +.rs +.sp +In a multithreaded application it is important to keep thread-specific data +separate from data that can be shared between threads. The PCRE2 library code +itself is thread-safe: it contains no static or global variables. The API is +designed to be fairly simple for non-threaded applications while at the same +time ensuring that multithreaded applications can use it. +.P +There are several different blocks of data that are used to pass information +between the application and the PCRE2 libraries. +. +. +.SS "The compiled pattern" +.rs +.sp +A pointer to the compiled form of a pattern is returned to the user when +\fBpcre2_compile()\fP is successful. The data in the compiled pattern is fixed, +and does not change when the pattern is matched. Therefore, it is thread-safe, +that is, the same compiled pattern can be used by more than one thread +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time (JIT) optimization feature is being used, it needs separate memory +stack areas for each thread. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.P +In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads. This +is somewhat tricky to do correctly. If you know that writing to a pointer is +atomic in your environment, you can use logic like this: +.sp + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + if (pointer == NULL) pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() +.sp +Of course, testing for compilation errors should also be included in the code. +.P +The reason for checking the pointer a second time is as follows: Several +threads may have acquired the shared lock and tested the pointer for being +NULL, but only one of them will be given the write lock, with the rest kept +waiting. The winning thread will compile the pattern and store the result. +After this thread releases the write lock, another thread will get it, and if +it does not retest pointer for being NULL, will recompile the pattern and +overwrite the pointer, creating a memory leak and possibly causing other +issues. +.P +In an environment where writing to a pointer may not be atomic, the above logic +is not sufficient. The thread that is doing the compiling may be descheduled +after writing only part of the pointer, which could cause other threads to use +an invalid value. Instead of checking the pointer itself, a separate "pointer +is valid" flag (that can be updated atomically) must be used: +.sp + Get a read-only (shared) lock (mutex) for pointer + if (!pointer_is_valid) + { + Get a write (unique) lock for pointer + if (!pointer_is_valid) + { + pointer = pcre2_compile(... + pointer_is_valid = TRUE + } + } + Release the lock + Use pointer in pcre2_match() +.sp +If JIT is being used, but the JIT compilation is not being done immediately +(perhaps waiting to see if the pattern is used often enough), similar logic is +required. JIT compilation updates a value within the compiled code block, so a +thread must gain unique write access to the pointer before calling +\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or +\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the +compiled code before calling the JIT compiler. +. +. +.SS "Context blocks" +.rs +.sp +The next main section below introduces the idea of "contexts" in which PCRE2 +functions are called. A context is nothing more than a collection of parameters +that control the way PCRE2 operates. Grouping a number of parameters together +in a context is a convenient way of passing them to a PCRE2 function without +using lots of arguments. The parameters that are stored in contexts are in some +sense "advanced features" of the API. Many straightforward applications will +not need to use contexts. +.P +In a multithreaded application, if the parameters in a context are values that +are never changed, the same context can be used by all the threads. However, if +any thread needs to change any value in a context, it must make its own +thread-specific copy. +. +. +.SS "Match blocks" +.rs +.sp +The matching functions need a block of memory for storing the results of a +match. This includes details of what was matched, as well as additional +information such as the name of a (*MARK) setting. Each thread must provide its +own copy of this memory. +. +. +.SH "PCRE2 CONTEXTS" +.rs +.sp +Some PCRE2 functions have a lot of parameters, many of which are used only by +specialist applications, for example, those that use custom memory management +or non-standard character tables. To keep function argument lists at a +reasonable size, and at the same time to keep the API extensible, "uncommon" +parameters are passed to certain functions in a \fBcontext\fP instead of +directly. A context is just a block of memory that holds the parameter values. +Applications that do not need to adjust any of the context parameters can pass +NULL when a context pointer is required. +.P +There are three different types of context: a general context that is relevant +for several PCRE2 operations, a compile-time context, and a match-time context. +. +. +.SS "The general context" +.rs +.sp +At present, this context just contains pointers to (and data for) external +memory management functions that are called from several places in the PCRE2 +library. The context is named `general' rather than specifically `memory' +because in future other fields may be added. If you do not want to supply your +own custom memory management functions, you do not need to bother with a +general context. A general context is created by: +.sp +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +.sp +The two function pointers specify custom memory management functions, whose +prototypes are: +.sp + \fBvoid *private_malloc(PCRE2_SIZE, void *);\fP + \fBvoid private_free(void *, void *);\fP +.sp +Whenever code in PCRE2 calls these functions, the final argument is the value +of \fImemory_data\fP. Either of the first two arguments of the creation +function may be NULL, in which case the system memory management functions +\fImalloc()\fP and \fIfree()\fP are used. (This is not currently useful, as +there are no other fields in a general context, but in future there might be.) +The \fIprivate_malloc()\fP function is used (if supplied) to obtain memory for +storing the context, and all three values are saved as part of the context. +.P +Whenever PCRE2 creates a data block of any kind, the block contains a pointer +to the \fIfree()\fP function that matches the \fImalloc()\fP function that was +used. When the time comes to free the block, this function is called. +.P +A general context can be copied by calling: +.sp +.nf +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +.sp +The memory used for a general context should be freed by calling: +.sp +.nf +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +.sp +If this function is passed a NULL argument, it returns immediately without +doing anything. +. +. +.\" HTML +.SS "The compile context" +.rs +.sp +A compile context is required if you want to provide an external function for +stack checking during compilation or to change the default values of any of the +following compile-time parameters: +.sp + What \eR matches (Unicode newlines or CR, LF, CRLF only) + PCRE2's character tables + The newline character sequence + The compile time nested parentheses limit + The maximum length of the pattern string + The extra options bits (none set by default) +.sp +A compile context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of +\fIpcre2_compile()\fP. +.P +A compile context is created, copied, and freed by the following functions: +.sp +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.fi +.sp +A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +.sp +.nf +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line +ending sequence. The value is used by the JIT compiler and by the two +interpreted matching functions, \fIpcre2_match()\fP and +\fIpcre2_dfa_match()\fP. +.sp +.nf +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +.sp +The value must be the result of a call to \fBpcre2_maketables()\fP, whose only +argument is a general context. This function builds a set of character tables +in the current locale. +.sp +.nf +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.fi +.sp +As PCRE2 has developed, almost all the 32 option bits that are available in +the \fIoptions\fP argument of \fBpcre2_compile()\fP have been used up. To avoid +running out, the compile context contains a set of extra option bits which are +used for some newer, assumed rarer, options. This function sets those bits. It +always sets all the bits (either on or off). It does not modify any existing +setting. The available options are defined in the section entitled "Extra +compile options" +.\" HTML +.\" +below. +.\" +.sp +.nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +This sets a maximum length, in code units, for any pattern string that is +compiled with this context. If the pattern is longer, an error is generated. +This facility is provided so that applications that accept patterns from +external sources can limit their size. The default is the largest number that a +PCRE2_SIZE variable can hold, which is effectively unlimited. +.sp +.nf +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +This sets a maximum size, in bytes, for the memory needed to hold the compiled +version of a pattern that is compiled with this context. If the pattern needs +more memory, an error is generated. This facility is provided so that +applications that accept patterns from external sources can limit the amount of +memory they use. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. +.sp +.nf +.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP, +.B " uint32_t \fIvalue\fP); +.fi +.sp +This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. +.sp +.nf +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This specifies which characters or character sequences are to be recognized as +newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), +PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the +NUL character, that is a binary zero). +.P +A pattern can override the value set in the compile context by starting with a +sequence such as (*CRLF). See the +.\" HREF +\fBpcre2pattern\fP +.\" +page for details. +.P +When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE +option, the newline convention affects the recognition of the end of internal +comments starting with #. The value is saved with the compiled pattern for +subsequent use by the JIT compiler and by the two interpreted matching +functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. +.sp +.nf +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This parameter adjusts the limit, set when PCRE2 is built (default 250), on the +depth of parenthesis nesting in a pattern. This limit stops rogue patterns +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. +.sp +.nf +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.fi +.sp +There is at least one application that runs PCRE2 in threads with very limited +system stack, where running out of stack is to be avoided at all costs. The +parenthesis limit above cannot take account of how much stack is actually +available during compilation. For a finer control, you can supply a function +that is called whenever \fBpcre2_compile()\fP starts to compile a parenthesized +part of a pattern. This function can check the actual stack size (or anything +else that it wants to, of course). +.P +The first argument to the callout function gives the current depth of +nesting, and the second is user data that is set up by the last argument of +\fBpcre2_set_compile_recursion_guard()\fP. The callout function should return +zero if all is well, or non-zero to force an error. +. +. +.\" HTML +.SS "The match context" +.rs +.sp +A match context is required if you want to: +.sp + Set up a callout function + Set an offset limit for matching an unanchored pattern + Change the limit on the amount of heap used when matching + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match +.sp +If none of these apply, just pass NULL as the context argument of +\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP. +.P +A match context is created, copied, and freed by the following functions: +.sp +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.fi +.sp +A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +.sp +.nf +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call at specified points +during a matching operation. Details are given in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.sp +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call after each substitution +made by \fBpcre2_substitute()\fP. Details are given in the section entitled +"Creating a new string with substitutions" +.\" HTML +.\" +below. +.\" +.sp +.nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +The \fIoffset_limit\fP parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. The \fBpcre2_substitute()\fP function makes no more +substitutions. +.P +For example, if the pattern /abc/ is matched against "123abc" with an offset +limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be +found if the \fIstartoffset\fP argument of \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, or \fBpcre2_substitute()\fP is greater than the offset +limit set in the match context. +.P +When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT option when +calling \fBpcre2_compile()\fP so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +.P +The offset limit facility can be used to track progress when searching large +subject strings or to limit the extent of global substitutions. See also the +PCRE2_FIRSTLINE option, which requires a match to start before or at the first +newline that follows the start of matching in the subject. If this is set with +an offset limit, a match must occur in the first line and also within the +offset limit. In other words, whichever limit comes first is used. +.sp +.nf +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The \fIheap_limit\fP parameter specifies, in units of kibibytes (1024 bytes), +the maximum amount of heap memory that \fBpcre2_match()\fP may use to hold +backtracking information when running an interpretive match. This limit also +applies to \fBpcre2_dfa_match()\fP, which may use the heap when processing +patterns with a lot of nested pattern recursion or lookarounds or atomic +groups. This limit does not apply to matching with the JIT optimization, which +has its own memory control arrangements (see the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details). If the limit is reached, the negative error +code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2 +is built; if it is not, the default is set very large and is essentially +unlimited. +.P +A value for the heap limit may also be supplied by an item at the start of a +pattern of the form +.sp + (*LIMIT_HEAP=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or, if no such +limit is set, less than the default. +.P +The \fBpcre2_match()\fP function always needs some heap memory, so setting a +value of zero guarantees a "heap limit exceeded" error. Details of how +\fBpcre2_match()\fP uses the heap are given in the +.\" HREF +\fBpcre2perform\fP +.\" +documentation. +.P +For \fBpcre2_dfa_match()\fP, a vector on the system stack is used when +processing pattern recursions, lookarounds, or atomic groups, and only if this +is not big enough is heap memory used. In this case, setting a value of zero +disables the use of the heap. +.sp +.nf +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using +up too many computing resources when processing patterns that are not going to +match, but which have a very large number of possibilities in their search +trees. The classic example is a pattern that uses nested unlimited repeats. +.P +There is an internal counter in \fBpcre2_match()\fP that is incremented each +time round its main matching loop. If this value reaches the match limit, +\fBpcre2_match()\fP returns the negative value PCRE2_ERROR_MATCHLIMIT. This has +the effect of limiting the amount of backtracking that can take place. For +patterns that are not anchored, the count restarts from zero for each position +in the subject string. This limit also applies to \fBpcre2_dfa_match()\fP, +though the counting is done in a different way. +.P +When \fBpcre2_match()\fP is called with a pattern that was successfully +processed by \fBpcre2_jit_compile()\fP, the way in which matching is executed +is entirely different. However, there is still the possibility of runaway +matching that goes on for a very long time, and so the \fImatch_limit\fP value +is also used in this case (but in a different way) to limit how long the +matching can continue. +.P +The default value for the limit can be set when PCRE2 is built; the default is +10 million, which handles all but the most extreme cases. A value for the match +limit may also be supplied by an item at the start of a pattern of the form +.sp + (*LIMIT_MATCH=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP or, if no such limit is set, less than the default. +.sp +.nf +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP. +Each time a nested backtracking point is passed, a new memory frame is used +to remember the state of matching at that point. Thus, this parameter +indirectly limits the amount of memory that is used in a match. However, +because the size of each memory frame depends on the number of capturing +parentheses, the actual memory limit varies from pattern to pattern. This limit +was more useful in versions before 10.30, where function recursion was used for +backtracking. +.P +The depth limit is not relevant, and is ignored, when matching is done using +JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which +uses it to limit the depth of nested internal recursive function calls that +implement atomic groups, lookaround assertions, and pattern recursions. This +limits, indirectly, the amount of system stack that is used. It was more useful +in versions before 10.32, when stack memory was used for local workspace +vectors for recursive function calls. From version 10.32, only local variables +are allocated on the stack and as each call uses only a few hundred bytes, even +a small stack can support quite a lot of recursion. +.P +If the depth of internal recursive function calls is great enough, local +workspace vectors are allocated on the heap from version 10.32 onwards, so the +depth limit also indirectly limits the amount of heap memory that is used. A +recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string +using \fBpcre2_dfa_match()\fP, can use a great deal of memory. However, it is +probably better to limit heap usage directly by calling +\fBpcre2_set_heap_limit()\fP. +.P +The default value for the depth limit can be set when PCRE2 is built; if it is +not, the default is set to the same value as the default for the match limit. +If the limit is exceeded, \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP +returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be +supplied by an item at the start of a pattern of the form +.sp + (*LIMIT_DEPTH=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP or, if no such limit is set, less than the default. +. +. +.SH "CHECKING BUILD-TIME OPTIONS" +.rs +.sp +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +.P +The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find +the value of certain configuration parameters and to discover which optional +features have been compiled into the PCRE2 library. The +.\" HREF +\fBpcre2build\fP +.\" +documentation has more details about these features. +.P +The first argument for \fBpcre2_config()\fP specifies which information is +required. The second argument is a pointer to memory into which the information +is placed. If NULL is passed, the function returns the amount of memory that is +needed for the requested information. For calls that return numerical values, +the value is in bytes; when requesting these values, \fIwhere\fP should point +to appropriately aligned memory. For calls that return strings, the required +length is given in code units, not counting the terminating zero. +.P +When requesting information, the returned value from \fBpcre2_config()\fP is +non-negative on success, or the negative error code PCRE2_ERROR_BADOPTION if +the value in the first argument is not recognized. The following information is +available: +.sp + PCRE2_CONFIG_BSR +.sp +The output is a uint32_t integer whose value indicates what character +sequences the \eR escape sequence matches by default. A value of +PCRE2_BSR_UNICODE means that \eR matches any Unicode line ending sequence; a +value of PCRE2_BSR_ANYCRLF means that \eR matches only CR, LF, or CRLF. The +default can be overridden when a pattern is compiled. +.sp + PCRE2_CONFIG_COMPILED_WIDTHS +.sp +The output is a uint32_t integer whose lower bits indicate which code unit +widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support, +and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively. +.sp + PCRE2_CONFIG_DEPTHLIMIT +.sp +The output is a uint32_t integer that gives the default limit for the depth of +nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions, +lookarounds, and atomic groups in \fBpcre2_dfa_match()\fP. Further details are +given with \fBpcre2_set_depth_limit()\fP above. +.sp + PCRE2_CONFIG_HEAPLIMIT +.sp +The output is a uint32_t integer that gives, in kibibytes, the default limit +for the amount of heap memory used by \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP. Further details are given with +\fBpcre2_set_heap_limit()\fP above. +.sp + PCRE2_CONFIG_JIT +.sp +The output is a uint32_t integer that is set to one if support for just-in-time +compiling is included in the library; otherwise it is set to zero. Note that +having the support in the library does not guarantee that JIT will be used for +any given match. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.sp + PCRE2_CONFIG_JITTARGET +.sp +The \fIwhere\fP argument should point to a buffer that is at least 48 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a +string that contains the name of the architecture for which the JIT compiler is +configured, for example "x86 32bit (little endian + unaligned)". If JIT support +is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the number of +code units used is returned. This is the length of the string, plus one unit +for the terminating zero. +.sp + PCRE2_CONFIG_LINKSIZE +.sp +The output is a uint32_t integer that contains the number of bytes used for +internal linkage in compiled regular expressions. When PCRE2 is configured, the +value can be set to 2, 3, or 4, with the default being 2. This is the value +that is returned by \fBpcre2_config()\fP. However, when the 16-bit library is +compiled, a value of 3 is rounded up to 4, and when the 32-bit library is +compiled, internal linkages always use 4 bytes, so the configured value is not +relevant. +.P +The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all +but the most massive patterns, since it allows the size of the compiled pattern +to be up to 65535 code units. Larger values allow larger regular expressions to +be compiled by those two libraries, but at the expense of slower matching. +.sp + PCRE2_CONFIG_MATCHLIMIT +.sp +The output is a uint32_t integer that gives the default match limit for +\fBpcre2_match()\fP. Further details are given with +\fBpcre2_set_match_limit()\fP above. +.sp + PCRE2_CONFIG_NEWLINE +.sp +The output is a uint32_t integer whose value specifies the default character +sequence that is recognized as meaning "newline". The values are: +.sp + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +The default should normally correspond to the standard sequence for your +operating system. +.sp + PCRE2_CONFIG_NEVER_BACKSLASH_C +.sp +The output is a uint32_t integer that is set to one if the use of \eC was +permanently disabled when PCRE2 was built; otherwise it is set to zero. +.sp + PCRE2_CONFIG_PARENSLIMIT +.sp +The output is a uint32_t integer that gives the maximum depth of nesting +of parentheses (of any kind) in a pattern. This limit is imposed to cap the +amount of system stack used when a pattern is compiled. It is specified when +PCRE2 is built; the default is 250. This limit does not take into account the +stack that may already be used by the calling application. For finer control +over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP. +.sp + PCRE2_CONFIG_STACKRECURSE +.sp +This parameter is obsolete and should not be used in new code. The output is a +uint32_t integer that is always set to zero. +.sp + PCRE2_CONFIG_TABLES_LENGTH +.sp +The output is a uint32_t integer that gives the length of PCRE2's character +processing tables in bytes. For details of these tables see the +.\" HTML +.\" +section on locale support +.\" +below. +.sp + PCRE2_CONFIG_UNICODE_VERSION +.sp +The \fIwhere\fP argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled +without Unicode support, the buffer is filled with the text "Unicode not +supported". Otherwise, the Unicode version string (for example, "8.0.0") is +inserted. The number of code units used is returned. This is the length of the +string plus one unit for the terminating zero. +.sp + PCRE2_CONFIG_UNICODE +.sp +The output is a uint32_t integer that is set to one if Unicode support is +available; otherwise it is set to zero. Unicode support implies UTF support. +.sp + PCRE2_CONFIG_VERSION +.sp +The \fIwhere\fP argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with +the PCRE2 version string, zero-terminated. The number of code units used is +returned. This is the length of the string plus one unit for the terminating +zero. +. +. +.\" HTML +.SH "COMPILING A PATTERN" +.rs +.sp +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.fi +.P +The \fBpcre2_compile()\fP function compiles a pattern into an internal form. +The pattern is defined by a pointer to a string of code units and a length in +code units. If the pattern is zero-terminated, the length can be specified as +PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated +as an empty string (NULL with a non-zero length causes an error return). The +function returns a pointer to a block of memory that contains the compiled +pattern and related data, or NULL if an error occurred. +.P +If the compile context argument \fIccontext\fP is NULL, memory for the compiled +pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from +the same memory function that was used for the compile context. The caller must +free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed. +If \fBpcre2_code_free()\fP is called with a NULL argument, it returns +immediately, without doing anything. +.P +The function \fBpcre2_code_copy()\fP makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +.\" HTML +.\" +below), +.\" +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to \fBpcre2_jit_compile()\fP if required. If \fBpcre2_code_copy()\fP is +called with a NULL argument, it returns NULL. +.P +The \fBpcre2_code_copy()\fP function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +.\" HTML +.\" +"Locale Support" +.\" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +\fBpcre2_code_free()\fP is called for the new copy of the compiled code. If +\fBpcre2_code_copy_with_tables()\fP is called with a NULL argument, it returns +NULL. +.P +NOTE: When one of the matching functions is called, pointers to the compiled +pattern and the subject string are set in the match data block so that they can +be referenced by the substring extraction functions after a successful match. +After running a match, you must not free a compiled pattern or a subject string +until after all operations on the +.\" HTML +.\" +match data block +.\" +have taken place, unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for \fBpcre2_match()\fP" +.\" HTML +.\" +below. +.\" +.P +The \fIoptions\fP argument for \fBpcre2_compile()\fP contains various bit +settings that affect the compilation. It should be zero if none of them are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation). +.P +For those options that can be different in different parts of the pattern, the +contents of the \fIoptions\fP argument specifies their settings at the start of +compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK +options can be set at the time of matching as well as at compile time. +.P +Some additional options and less frequently required compile-time parameters +(for example, the newline setting) can be provided in a compile context (as +described +.\" HTML +.\" +above). +.\" +.P +If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when \fBpcre2_compile()\fP returns NULL because a compilation +error has occurred. +.P +There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return +if it finds an error in the pattern. There are also some negative error codes +that are used for invalid UTF strings when validity checking is in force. These +are the same as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and +are described in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. There is no separate documentation for the positive error codes, +because the textual error messages that are obtained by calling the +\fBpcre2_get_error_message()\fP function (see "Obtaining a textual error +message" +.\" HTML +.\" +below) +.\" +should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined +for both positive and negative error codes in \fBpcre2.h\fP. When compilation +is successful \fIerrorcode\fP is set to a value that returns the message "no +error" if passed to \fBpcre2_get_error_message()\fP. +.P +The value returned in \fIerroroffset\fP is an indication of where in the +pattern an error occurred. When there is no error, zero is returned. A non-zero +value is not necessarily the furthest point in the pattern that was read. For +example, after the error "lookbehind assertion is not fixed length", the error +offset points to the start of the failing assertion. For an invalid UTF-8 or +UTF-16 string, the offset is that of the first code unit of the failing +character. +.P +Some errors are not detected until the whole pattern has been scanned; in these +cases, the offset passed back is the length of the pattern. Note that the +offset is in code units, not characters, even in a UTF mode. It may sometimes +point into the middle of a UTF-8 or UTF-16 character. +.P +This code fragment shows a typical straightforward call to +\fBpcre2_compile()\fP: +.sp + pcre2_code *re; + PCRE2_SIZE erroffset; + int errorcode; + re = pcre2_compile( + "^A.*Z", /* the pattern */ + PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ + 0, /* default options */ + &errorcode, /* for error code */ + &erroffset, /* for error offset */ + NULL); /* no compile context */ +.sp +. +. +.SS "Main compile options" +.rs +.sp +The following names for option bits are defined in the \fBpcre2.h\fP header +file: +.sp + PCRE2_ANCHORED +.sp +If this bit is set, the pattern is forced to be "anchored", that is, it is +constrained to match only at the first matching point in the string that is +being searched (the "subject string"). This effect can also be achieved by +appropriate constructs in the pattern itself, which is the only way to do it in +Perl. +.sp + PCRE2_ALLOW_EMPTY_CLASS +.sp +By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match. +.sp + PCRE2_ALT_BSUX +.sp +This option request alternative handling of three escape sequences, which +makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: +.P +(1) \eU matches an upper case "U" character; by default \eU causes a compile +time error (Perl uses \eU to upper case subsequent characters). +.P +(2) \eu matches a lower case "u" character unless it is followed by four +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, \eu causes a compile time error (Perl uses it to upper +case the following character). +.P +(3) \ex matches a lower case "x" character unless it is followed by two +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, as in Perl, a hexadecimal number is always expected after +\ex, but it may have zero, one, or two digits (so, for example, \exz matches a +binary zero character followed by z). +.P +ECMAscript 6 added additional functionality to \eu. This can be accessed using +the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" +.\" HTML +.\" +below). +.\" +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to +\fBpcre2_substitute()\fP. +.sp + PCRE2_ALT_CIRCUMFLEX +.sp +In multiline mode (when PCRE2_MULTILINE is set), the circumflex metacharacter +matches at the start of the subject (unless PCRE2_NOTBOL is set), and also +after any internal newline. However, it does not match after a newline at the +end of the subject, for compatibility with Perl. If you want a multiline +circumflex also to match after a terminating newline, you must set +PCRE2_ALT_CIRCUMFLEX. +.sp + PCRE2_ALT_VERBNAMES +.sp +By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED +or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped +whitespace in verb names is skipped and #-comments are recognized, exactly as +in the rest of the pattern. +.sp + PCRE2_AUTO_CALLOUT +.sp +If this bit is set, \fBpcre2_compile()\fP automatically inserts callout items, +all with number 255, before each pattern item, except immediately before or +after an explicit callout in the pattern. For discussion of the callout +facility, see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.sp + PCRE2_CASELESS +.sp +If this bit is set, letters in the pattern match both upper and lower case +letters in the subject. It is equivalent to Perl's /i option, and it can be +changed within a pattern by a (?i) option setting. If either PCRE2_UTF or +PCRE2_UCP is set, Unicode properties are used for all characters with more than +one other case, and for all characters whose code points are greater than +U+007F. Note that there are two ASCII characters, K and S, that, in addition to +their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin +sign) and U+017F (long S) respectively. If you do not want this case +equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT. +.P +For lower valued characters with only one other case, a lookup table is used +for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used +for all code points less than 256, and higher code points (available only in +16-bit or 32-bit mode) are treated as not having another case. +.sp + PCRE2_DOLLAR_ENDONLY +.sp +If this bit is set, a dollar metacharacter in the pattern matches only at the +end of the subject string. Without this option, a dollar also matches +immediately before a newline at the end of the string (but not before any other +newlines). The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is +set. There is no equivalent to this option in Perl, and no way to set it within +a pattern. +.sp + PCRE2_DOTALL +.sp +If this bit is set, a dot metacharacter in the pattern matches any character, +including one that indicates a newline. However, it only ever matches one +character, even if newlines are coded as CRLF. Without this option, a dot does +not match when the current position in the subject is at a newline. This option +is equivalent to Perl's /s option, and it can be changed within a pattern by a +(?s) option setting. A negative class such as [^a] always matches newline +characters, and the \eN escape sequence always matches a non-newline character, +independent of the setting of PCRE2_DOTALL. +.sp + PCRE2_DUPNAMES +.sp +If this bit is set, names used to identify capture groups need not be unique. +This can be helpful for certain types of pattern when it is known that only one +instance of the named group can ever be matched. There are more details of +named capture groups below; see also the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.sp + PCRE2_ENDANCHORED +.sp +If this bit is set, the end of any pattern match must be right at the end of +the string being searched (the "subject string"). If the pattern match +succeeds by reaching (*ACCEPT), but does not reach the end of the subject, the +match fails at the current starting point. For unanchored patterns, a new match +is then tried at the next starting point. However, if the match succeeds by +reaching the end of the pattern, but not the end of the subject, backtracking +occurs and an alternative match may be found. Consider these two patterns: +.sp + .(*ACCEPT)|.. + .|.. +.sp +If matched against "abc" with PCRE2_ENDANCHORED set, the first matches "c" +whereas the second matches "bc". The effect of PCRE2_ENDANCHORED can also be +achieved by appropriate constructs in the pattern itself, which is the only way +to do it in Perl. +.P +For DFA matching with \fBpcre2_dfa_match()\fP, PCRE2_ENDANCHORED applies only +to the first (that is, the longest) matched string. Other parallel matches, +which are necessarily substrings of the first one, must obviously end before +the end of the subject. +.sp + PCRE2_EXTENDED +.sp +If this bit is set, most white space characters in the pattern are totally +ignored except when escaped, inside a character class, or inside a \eQ...\eE +sequence. However, white space is not allowed within sequences such as (?> that +introduce various parenthesized groups, nor within numerical quantifiers such +as {1,3}. Ignorable white space is permitted between an item and a following +quantifier and between a quantifier and a following + that indicates +possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be +changed within a pattern by a (?x) option setting. +.P +When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as +white space only those characters with code points less than 256 that are +flagged as white space in its low-character table. The table is normally +created by +.\" HREF +\fBpcre2_maketables()\fP, +.\" +which uses the \fBisspace()\fP function to identify space characters. In most +ASCII environments, the relevant characters are those with code points 0x0009 +(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D +(carriage return), and 0x0020 (space). +.P +When PCRE2 is compiled with Unicode support, in addition to these characters, +five more Unicode "Pattern White Space" characters are recognized by +PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark), +U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph +separator). This set of characters is the same as recognized by Perl's /x +option. Note that the horizontal and vertical space characters that are matched +by the \eh and \ev escapes in patterns are a much bigger set. +.P +As well as ignoring most white space, PCRE2_EXTENDED also causes characters +between an unescaped # outside a character class and the next newline, +inclusive, to be ignored, which makes it possible to include comments inside +complicated patterns. Note that the end of this type of comment is a literal +newline sequence in the pattern; escape sequences that happen to represent a +newline do not count. +.P +Which characters are interpreted as newlines can be specified by a setting in +the compile context that is passed to \fBpcre2_compile()\fP or by a special +sequence at the start of the pattern, as described in the section entitled +.\" HTML +.\" +"Newline conventions" +.\" +in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is +built. +.sp + PCRE2_EXTENDED_MORE +.sp +This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space +and horizontal tab characters are ignored inside a character class. Note: only +these two characters are ignored, not the full set of pattern white space +characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is +equivalent to Perl's /xx option, and it can be changed within a pattern by a +(?xx) option setting. +.sp + PCRE2_FIRSTLINE +.sp +If this option is set, the start of an unanchored pattern match must be before +or at the first newline in the subject string following the start of matching, +though the matched text may continue over the newline. If \fIstartoffset\fP is +non-zero, the limiting newline is not necessarily the first newline in the +subject. For example, if the subject string is "abc\enxyz" (where \en +represents a single-character newline) a pattern match for "yz" succeeds with +PCRE2_FIRSTLINE if \fIstartoffset\fP is greater than 3. See also +PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If +PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first +line and also within the offset limit. In other words, whichever limit comes +first is used. This option has no effect for anchored patterns. +.sp + PCRE2_LITERAL +.sp +If this option is set, all meta-characters in the pattern are disabled, and it +is treated as a literal string. Matching literal strings with a regular +expression engine is not the most efficient way of doing it. If you are doing a +lot of literal matching and are worried about efficiency, you should consider +using other approaches. The only other main options that are allowed with +PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, +PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_MATCH_INVALID_UTF, +PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and +PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and +PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error. +.sp + PCRE2_MATCH_INVALID_UTF +.sp +This option forces PCRE2_UTF (see below) and also enables support for matching +by \fBpcre2_match()\fP in subject strings that contain invalid UTF sequences. +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. This facility is not supported for DFA matching. For details, +see the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.sp + PCRE2_MATCH_UNSET_BACKREF +.sp +If this option is set, a backreference to an unset capture group matches an +empty string (by default this causes the current matching alternative to fail). +A pattern such as (\e1)(a) succeeds when this option is set (assuming it can +find an "a" in the subject), whereas it fails by default, for Perl +compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka +JavaScript). +.sp + PCRE2_MULTILINE +.sp +By default, for the purposes of matching "start of line" and "end of line", +PCRE2 treats the subject string as consisting of a single line of characters, +even if it actually contains newlines. The "start of line" metacharacter (^) +matches only at the start of the string, and the "end of line" metacharacter +($) matches only at the end of the string, or before a terminating newline +(except when PCRE2_DOLLAR_ENDONLY is set). Note, however, that unless +PCRE2_DOTALL is set, the "any character" metacharacter (.) does not match at a +newline. This behaviour (for ^, $, and dot) is the same as Perl. +.P +When PCRE2_MULTILINE it is set, the "start of line" and "end of line" +constructs match immediately following or immediately before internal newlines +in the subject string, respectively, as well as at the very start and end. This +is equivalent to Perl's /m option, and it can be changed within a pattern by a +(?m) option setting. Note that the "start of line" metacharacter does not match +after a newline at the end of the subject, for compatibility with Perl. +However, you can change this by setting the PCRE2_ALT_CIRCUMFLEX option. If +there are no newlines in a subject string, or no occurrences of ^ or $ in a +pattern, setting PCRE2_MULTILINE has no effect. +.sp + PCRE2_NEVER_BACKSLASH_C +.sp +This option locks out the use of \eC in the pattern that is being compiled. +This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because +it may leave the current matching point in the middle of a multi-code-unit +character. This option may be useful in applications that process patterns from +external sources. Note that there is also a build-time option that permanently +locks out the use of \eC. +.sp + PCRE2_NEVER_UCP +.sp +This option locks out the use of Unicode properties for handling \eB, \eb, \eD, +\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +option may be useful in applications that process patterns from external +sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. +.sp + PCRE2_NEVER_UTF +.sp +This option locks out interpretation of the pattern as UTF-8, UTF-16, or +UTF-32, depending on which library is in use. In particular, it prevents the +creator of the pattern from switching to UTF interpretation by starting the +pattern with (*UTF). This option may be useful in applications that process +patterns from external sources. The combination of PCRE2_UTF and +PCRE2_NEVER_UTF causes an error. +.sp + PCRE2_NO_AUTO_CAPTURE +.sp +If this option is set, it disables the use of numbered capturing parentheses in +the pattern. Any opening parenthesis that is not followed by ? behaves as if it +were followed by ?: but named parentheses can still be used for capturing (and +they acquire numbers in the usual way). This is the same as Perl's /n option. +Note that, when this option is set, references to capture groups +(backreferences or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number. +.sp + PCRE2_NO_AUTO_POSSESS +.sp +If this option is set, it disables "auto-possessification", which is an +optimization that, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +set this option if you want the matching functions to do a full unoptimized +search and run all the callouts, but it is mainly provided for testing +purposes. +.sp + PCRE2_NO_DOTSTAR_ANCHOR +.sp +If this option is set, it disables an optimization that is applied when .* is +the first significant item in a top-level branch of a pattern, and all the +other branches also start with .* or with \eA or \eG or ^. The optimization is +automatically disabled for .* if it is inside an atomic group or a capture +group that is the subject of a backreference, or if the pattern contains +(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is +automatically anchored if PCRE2_DOTALL is set for all the .* items and +PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match +must start either at the start of the subject or following a newline is +remembered. Like other optimizations, this can cause callouts to be skipped. +.sp + PCRE2_NO_START_OPTIMIZE +.sp +This is an option whose main effect is at matching time. It does not change +what \fBpcre2_compile()\fP generates, but it does affect the output of the JIT +compiler. +.P +There are a number of optimizations that may occur at the start of a match, in +order to speed up the process. For example, if it is known that an unanchored +match must start with a specific code unit value, the matching code searches +the subject for that value, and fails immediately if it cannot find it, without +actually running the main matching function. This means that a special item +such as (*COMMIT) at the start of a pattern is not considered until after a +suitable starting point for the match has been found. Also, when callouts or +(*MARK) items are in use, these "start-up" optimizations can cause them to be +skipped if the pattern is never actually used. The start-up optimizations are +in effect a pre-scan of the subject that takes place before the pattern is run. +.P +The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations, +possibly causing performance to suffer, but ensuring that in cases where the +result is "no match", the callouts do occur, and that items such as (*COMMIT) +and (*MARK) are considered at every possible starting position in the subject +string. +.P +Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching operation. +Consider the pattern +.sp + (*COMMIT)ABC +.sp +When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the +subject string does not happen. The first match attempt is run starting from +"D" and when this fails, (*COMMIT) prevents any further matches being tried, so +the overall result is "no match". +.P +As another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +.sp + (*MARK:1)B(*MARK:2)(X|Y) +.sp +The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". If NO_START_OPTIMIZE is set, however, matches are tried +at every possible starting position, including at the end of the subject, where +(*MARK:1) is encountered, but there is no "B", so the "last mark seen" that is +returned is "1". In this case, the optimizations do not affect the overall +match result, which is still "no match", but they do affect the auxiliary +information that is returned. +.sp + PCRE2_NO_UTF_CHECK +.sp +When PCRE2_UTF is set, the validity of the pattern as a UTF string is +automatically checked. There are discussions about the validity of +.\" HTML +.\" +UTF-8 strings, +.\" +.\" HTML +.\" +UTF-16 strings, +.\" +and +.\" HTML +.\" +UTF-32 strings +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a +negative error code. +.P +If you know that your pattern is a valid UTF string, and you want to skip this +check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When +it is set, the effect of passing an invalid UTF string as a pattern is +undefined. It may cause your program to crash or loop. +.P +Note that this option can also be passed to \fBpcre2_match()\fP and +\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject +string. +.P +Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the +error that is given if an escape sequence for an invalid Unicode code point is +encountered in the pattern. In particular, the so-called "surrogate" code +points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences +such as \ex{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option, as described in the section entitled "Extra compile options" +.\" HTML +.\" +below. +.\" +However, this is possible only in UTF-8 and UTF-32 modes, because these values +are not representable in UTF-16. +.sp + PCRE2_UCP +.sp +This option has two effects. Firstly, it change the way PCRE2 processes \eB, +\eb, \eD, \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes. By +default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode +properties are used to classify characters. There are some PCRE2_EXTRA +options (see below) that add finer control to this behaviour. More details are +given in the section on +.\" HTML +.\" +generic character types +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. +.P +The second effect of PCRE2_UCP is to force the use of Unicode properties for +upper/lower casing operations, even when PCRE2_UTF is not set. This makes it +possible to process strings in the 16-bit UCS-2 code. This option is available +only if PCRE2 has been compiled with Unicode support (which is the default). +The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless +matching such that ASCII characters match only ASCII characters and non-ASCII +characters match only non-ASCII characters. +.sp + PCRE2_UNGREEDY +.sp +This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. +.sp + PCRE2_USE_OFFSET_LIMIT +.sp +This option must be set for \fBpcre2_compile()\fP if +\fBpcre2_set_offset_limit()\fP is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of \fBpcre2_set_offset_limit()\fP in the +.\" HTML +.\" +section +.\" +that describes match contexts. See also the PCRE2_FIRSTLINE +option above. +.sp + PCRE2_UTF +.sp +This option causes PCRE2 to regard both the pattern and the subject strings +that are subsequently processed as strings of UTF characters instead of +single-code-unit strings. It is available when PCRE2 is built to include +Unicode support (which is the default). If Unicode support is not available, +the use of this option provokes an error. Details of how PCRE2_UTF changes the +behaviour of PCRE2 are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. In particular, note that it changes the way PCRE2_CASELESS works. +. +. +.\" HTML +.SS "Extra compile options" +.rs +.sp +The option bits that can be set in a compile context by calling the +\fBpcre2_set_compile_extra_options()\fP function are as follows: +.sp + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +.sp +Since release 10.38 PCRE2 has forbidden the use of \eK within lookaround +assertions, following Perl's lead. This option is provided to re-enable the +previous behaviour (act in positive lookarounds, ignore in negative ones) in +case anybody is relying on it. +.sp + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES +.sp +This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is +forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate" +code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode +code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot +therefore be represented in UTF-16. They can be represented in UTF-8 and +UTF-32, but are defined as invalid code points, and cause errors if encountered +in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. +.P +These values also cause errors if encountered in escape sequences such as +\ex{d912} within a pattern. However, it seems that some applications, when +using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test +for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does +not disable the error that occurs, because it applies only to the testing of +input strings for UTF validity. +.P +If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code +point values in UTF-8 and UTF-32 patterns no longer provoke errors and are +incorporated in the compiled pattern. However, they can only match subject +characters if the matching function is called with PCRE2_NO_UTF_CHECK set. +.sp + PCRE2_EXTRA_ALT_BSUX +.sp +The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal +character code, where hhh.. is any number of hexadecimal digits. +.sp + PCRE2_EXTRA_ASCII_BSD +.sp +This option forces \ed to match only ASCII digits, even when PCRE2_UCP is set. +It can be changed within a pattern by means of the (?aD) option setting. +.sp + PCRE2_EXTRA_ASCII_BSS +.sp +This option forces \es to match only ASCII space characters, even when +PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS) +option setting. +.sp + PCRE2_EXTRA_ASCII_BSW +.sp +This option forces \ew to match only ASCII word characters, even when PCRE2_UCP +is set. It can be changed within a pattern by means of the (?aW) option +setting. +.sp + PCRE2_EXTRA_ASCII_DIGIT +.sp +This option forces the POSIX character classes [:digit:] and [:xdigit:] to +match only ASCII digits, even when PCRE2_UCP is set. It can be changed within +a pattern by means of the (?aT) option setting. +.sp + PCRE2_EXTRA_ASCII_POSIX +.sp +This option forces all the POSIX character classes, including [:digit:] and +[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can +be changed within a pattern by means of the (?aP) option setting, but note that +this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets +all ASCII restrictions for POSIX classes. +.sp + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL +.sp +This is a dangerous option. Use with care. By default, an unrecognized escape +such as \ej or a malformed one such as \ex{2z} causes a compile-time error when +detected by \fBpcre2_compile()\fP. Perl is somewhat inconsistent in handling +such items: for example, \ej is treated as a literal "j", and non-hexadecimal +digits in \ex{} are just ignored, though warnings are given in both cases if +Perl's warning switch is enabled. However, a malformed octal number after \eo{ +always causes an error in Perl. +.P +If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to +\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are +treated as single-character escapes. For example, \ej is a literal "j" and +\ex{2z} is treated as the literal string "x{2z}". Setting this option means +that typos in patterns may go undetected and have unexpected results. Also note +that a sequence such as [\eN{] is interpreted as a malformed attempt at +[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an +unqualified \eN is a valid escape sequence but is not supported in a character +class. To reiterate: this is a dangerous option. Use with great care. +.sp + PCRE2_EXTRA_CASELESS_RESTRICT +.sp +When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode +rules, which allow for more than two cases per character. There are two +case-equivalent character sets that contain both ASCII and non-ASCII +characters. The ASCII letter S is case-equivalent to U+017f (long S) and the +ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables +recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a +caseless match, both characters must either be ASCII or non-ASCII. The option +can be changed with a pattern by the (?r) option setting. +.sp + PCRE2_EXTRA_ESCAPED_CR_IS_LF +.sp +There are some legacy applications where the escape sequence \er in a pattern +is expected to match a newline. If this option is set, \er in a pattern is +converted to \en so that it matches a LF (linefeed) instead of a CR (carriage +return) character. The option does not affect a literal CR in the pattern, nor +does it affect CR specified as an explicit code point such as \ex{0D}. +.sp + PCRE2_EXTRA_MATCH_LINE +.sp +This option is provided for use by the \fB-x\fP option of \fBpcre2grep\fP. It +causes the pattern only to match complete lines. This is achieved by +automatically inserting the code for "^(?:" at the start of the compiled +pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched +line may be in the middle of the subject string. This option can be used with +PCRE2_LITERAL. +.sp + PCRE2_EXTRA_MATCH_WORD +.sp +This option is provided for use by the \fB-w\fP option of \fBpcre2grep\fP. It +causes the pattern only to match strings that have a word boundary at the start +and the end. This is achieved by automatically inserting the code for "\eb(?:" +at the start of the compiled pattern and ")\eb" at the end. The option may be +used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is +also set. +. +. +.\" HTML +.SH "JUST-IN-TIME (JIT) COMPILATION" +.rs +.sp +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.sp +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.sp +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.sp +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +.fi +.P +These functions provide support for JIT compilation, which, if the just-in-time +compiler is available, further processes a compiled pattern into machine code +that executes much faster than the \fBpcre2_match()\fP interpretive matching +function. Full details are given in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +JIT compilation is a heavyweight optimization. It can take some time for +patterns to be analyzed, and for one-off matches and simple patterns the +benefit of faster execution might be offset by a much slower compilation time. +Most (but not all) patterns can be optimized by the JIT compiler. +. +. +.\" HTML +.SH "LOCALE SUPPORT" +.rs +.sp +.nf +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +.sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +.P +PCRE2 handles caseless matching, and determines whether characters are letters, +digits, or whatever, by reference to a set of tables, indexed by character code +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \ew or +\ed. +.P +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \ep and \eP, or, alternatively, the +PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and +friends to use Unicode property support instead of the built-in tables. +PCRE2_UCP also causes upper/lower casing operations on characters with code +points greater than 127 to use Unicode properties. These effects apply even +when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see +above) that can be used to modify or suppress them. +.P +The use of locales with Unicode is discouraged. If you are handling characters +with code points greater than 127, you should either use Unicode support, or +use locales, but not try to mix the two. +.P +PCRE2 contains a built-in set of character tables that are used by default. +These are sufficient for many applications. Normally, the internal tables +recognize only ASCII characters. However, when PCRE2 is built, it is possible +to cause the internal tables to be rebuilt in the default "C" locale of the +local system, which may cause them to be different. +.P +The built-in tables can be overridden by tables supplied by the application +that calls PCRE2. These may be created in a different locale from the default. +As more and more applications change to using Unicode, the need for this locale +support is expected to die away. +.P +External tables are built by calling the \fBpcre2_maketables()\fP function, in +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system \fBmalloc()\fP is used. The result can be passed to +\fBpcre2_compile()\fP as often as necessary, by creating a compile context and +calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein. +.P +For example, to build and use tables that are appropriate for the French locale +(where accented characters with values greater than 127 are treated as +letters), the following code could be used: +.sp + setlocale(LC_CTYPE, "fr_FR"); + tables = pcre2_maketables(NULL); + ccontext = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(ccontext, tables); + re = pcre2_compile(..., ccontext); +.sp +The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +.P +The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP +is saved with the compiled pattern, and the same tables are used by the +matching functions. Thus, for any single pattern, compilation and matching both +happen in the same locale, but different patterns can be processed in different +locales. +.P +It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using \fBpcre2_maketables_free()\fP, which should +pass as its first parameter the same global context that was used to create the +tables. +. +. +.SS "Saving locale tables" +.rs +.sp +The tables described above are just a sequence of binary bytes, which makes +them independent of hardware characteristics such as endianness or whether the +processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP +can therefore be saved in a file or elsewhere and re-used later, even in a +different program or on another computer. The size of the tables (number of +bytes) must be obtained by calling \fBpcre2_config()\fP with the +PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not +return this value. Note that the \fBpcre2_dftables\fP program, which is part of +the PCRE2 build system, can be used stand-alone to create a file that contains +a set of binary tables. See the +.\" HTML +.\" +\fBpcre2build\fP +.\" +documentation for details. +. +. +.\" HTML +.SH "INFORMATION ABOUT A COMPILED PATTERN" +.rs +.sp +.nf +.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP); +.fi +.P +The \fBpcre2_pattern_info()\fP function returns general information about a +compiled pattern. For information about callouts, see the +.\" HTML +.\" +next section. +.\" +The first argument for \fBpcre2_pattern_info()\fP is a pointer to the compiled +pattern. The second argument specifies which piece of information is required, +and the third argument is a pointer to a variable to receive the data. If the +third argument is NULL, the first argument is ignored, and the function returns +the size in bytes of the variable that is required for the information +requested. Otherwise, the yield of the function is zero for success, or one of +the following negative numbers: +.sp + PCRE2_ERROR_NULL the argument \fIcode\fP was NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of \fIwhat\fP was invalid + PCRE2_ERROR_UNSET the requested field is not set +.sp +The "magic number" is placed at the start of each compiled pattern as a simple +check against passing an arbitrary memory pointer. Here is a typical call of +\fBpcre2_pattern_info()\fP, to obtain the length of the compiled pattern: +.sp + int rc; + size_t length; + rc = pcre2_pattern_info( + re, /* result of pcre2_compile() */ + PCRE2_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ +.sp +The possible values for the second argument are defined in \fBpcre2.h\fP, and +are as follows: +.sp + PCRE2_INFO_ALLOPTIONS + PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_EXTRAOPTIONS +.sp +Return copies of the pattern's options. The third argument should point to a +\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that +were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the +extra options that were set in the compile context by calling the +pcre2_set_compile_extra_options() function. +.P +For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.) +.P +A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if +the first significant item in every top-level branch is one of the following: +.sp + ^ unless PCRE2_MULTILINE is set + \eA always + \eG always + .* sometimes - see below +.sp +When .* is the first significant item, anchoring is possible only when all the +following are true: +.sp + .* is not in an atomic group +.\" JOIN + .* is not in a capture group that is the subject + of a backreference + PCRE2_DOTALL is in force for .* + Neither (*PRUNE) nor (*SKIP) appears in the pattern + PCRE2_NO_DOTSTAR_ANCHOR is not set +.sp +For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the +options returned for PCRE2_INFO_ALLOPTIONS. +.sp + PCRE2_INFO_BACKREFMAX +.sp +Return the number of the highest backreference in the pattern. The third +argument should point to a \fBuint32_t\fP variable. Named capture groups +acquire numbers as well as names, and these count towards the highest +backreference. Backreferences such as \e4 or \eg{12} match the captured +characters of the given group, but in addition, the check that a capture +group is set in a conditional group such as (?(3)a|b) is also a backreference. +Zero is returned if there are no backreferences. +.sp + PCRE2_INFO_BSR +.sp +The output is a uint32_t integer whose value indicates what character sequences +the \eR escape sequence matches. A value of PCRE2_BSR_UNICODE means that \eR +matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means +that \eR matches only CR, LF, or CRLF. +.sp + PCRE2_INFO_CAPTURECOUNT +.sp +Return the highest capture group number in the pattern. In patterns where (?| +is not used, this is also the total number of capture groups. The third +argument should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_DEPTHLIMIT +.sp +If the pattern set a backtracking depth limit by including an item of the form +(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_FIRSTBITMAP +.sp +In the absence of a single first code unit for a non-anchored pattern, +\fBpcre2_compile()\fP may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to a +\fBconst uint8_t *\fP variable. +.sp + PCRE2_INFO_FIRSTCODETYPE +.sp +Return information about the first code unit of any matched string, for a +non-anchored pattern. The third argument should point to a \fBuint32_t\fP +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved +using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is +known that a match can occur only at the start of the subject or following a +newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0 +is returned. +.sp + PCRE2_INFO_FIRSTCODEUNIT +.sp +Return the value of the first code unit of any matched string for a pattern +where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third +argument should point to a \fBuint32_t\fP variable. In the 8-bit library, the +value is always less than 256. In the 16-bit library the value can be up to +0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, +and up to 0xffffffff when not using UTF-32 mode. +.sp + PCRE2_INFO_FRAMESIZE +.sp +Return the size (in bytes) of the data frames that are used to remember +backtracking positions when the pattern is processed by \fBpcre2_match()\fP +without the use of JIT. The third argument should point to a \fBsize_t\fP +variable. The frame size depends on the number of capturing parentheses in the +pattern. Each additional capture group adds two PCRE2_SIZE variables. +.sp + PCRE2_INFO_HASBACKSLASHC +.sp +Return 1 if the pattern contains any instances of \eC, otherwise 0. The third +argument should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_HASCRORLF +.sp +Return 1 if the pattern contains any explicit matches for CR or LF characters, +otherwise 0. The third argument should point to a \fBuint32_t\fP variable. An +explicit match is either a literal CR or LF character, or \er or \en or one of +the equivalent hexadecimal or octal escape sequences. +.sp + PCRE2_INFO_HEAPLIMIT +.sp +If the pattern set a heap memory limit by including an item of the form +(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_JCHANGED +.sp +Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise +0. The third argument should point to a \fBuint32_t\fP variable. (?J) and +(?-J) set and unset the local PCRE2_DUPNAMES option, respectively. +.sp + PCRE2_INFO_JITSIZE +.sp +If the compiled pattern was successfully processed by +\fBpcre2_jit_compile()\fP, return the size of the JIT compiled code, otherwise +return zero. The third argument should point to a \fBsize_t\fP variable. +.sp + PCRE2_INFO_LASTCODETYPE +.sp +Returns 1 if there is a rightmost literal code unit that must exist in any +matched string, other than at its start. The third argument should point to a +\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is +returned, the code unit value itself can be retrieved using +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\ed+z\ed+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\edz\ed/ the returned value is 0. +.sp + PCRE2_INFO_LASTCODEUNIT +.sp +Return the value of the rightmost literal code unit that must exist in any +matched string, other than at its start, for a pattern where +PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument +should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_MATCHEMPTY +.sp +Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to a \fBuint32_t\fP variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases. +.sp + PCRE2_INFO_MATCHLIMIT +.sp +If the pattern set a match limit by including an item of the form +(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_MAXLOOKBEHIND +.sp +A lookbehind assertion moves back a certain number of characters (not code +units) when it starts to process each of its branches. This request returns the +largest of these backward moves. The third argument should point to a uint32_t +integer. The simple assertions \eb and \eB require a one-character lookbehind +and cause PCRE2_INFO_MAXLOOKBEHIND to return 1 in the absence of anything +longer. \eA also registers a one-character lookbehind, though it does not +actually inspect the previous character. +.P +Note that this information is useful for multi-segment matching only +if the pattern contains no nested lookbehinds. For example, the pattern +(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the +first lookbehind moves back by two characters, matches one character, then the +nested lookbehind also moves back by two characters. This puts the matching +point three characters earlier than it was at the start. +PCRE2_INFO_MAXLOOKBEHIND is really only useful as a debugging tool. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for a discussion of multi-segment matching. +.sp + PCRE2_INFO_MINLENGTH +.sp +If a minimum length for matching subject strings was computed, its value is +returned. Otherwise the returned value is 0. This value is not computed when +PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in +UTF mode may be different from the number of code units. The third argument +should point to a \fBuint32_t\fP variable. The value is a lower bound to the +length of any matching string. There may not be any strings of that length that +do actually match, but every string that does match is at least that long. +.sp + PCRE2_INFO_NAMECOUNT + PCRE2_INFO_NAMEENTRYSIZE + PCRE2_INFO_NAMETABLE +.sp +PCRE2 supports the use of named as well as numbered capturing parentheses. The +names are just an additional way of identifying the parentheses, which still +acquire numbers. Several convenience functions such as +\fBpcre2_substring_get_byname()\fP are provided for extracting captured +substrings by name. It is also possible to extract the data directly, by first +converting the name to a number in order to access the correct pointers in the +output vector (described with \fBpcre2_match()\fP below). To do the conversion, +you need to use the name-to-number map, which is described by these three +values. +.P +The map consists of a number of fixed-size entries. PCRE2_INFO_NAMECOUNT gives +the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives the size of each +entry in code units; both of these return a \fBuint32_t\fP value. The entry +size depends on the length of the longest name. +.P +PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. This is +a PCRE2_SPTR pointer to a block of code units. In the 8-bit library, the first +two bytes of each entry are the number of the capturing parenthesis, most +significant byte first. In the 16-bit library, the pointer points to 16-bit +code units, the first of which contains the parenthesis number. In the 32-bit +library, the pointer points to 32-bit code units, the first of which contains +the parenthesis number. The rest of the entry is the corresponding name, zero +terminated. +.P +The names are in alphabetical order. If (?| is used to create multiple capture +groups with the same number, as described in the +.\" HTML +.\" +section on duplicate group numbers +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page, the groups may be given the same name, but there is only one entry in the +table. Different names for groups of the same number are not permitted. +.P +Duplicate names for capture groups with different numbers are permitted, but +only if PCRE2_DUPNAMES is set. They appear in the table in the order in which +they were found in the pattern. In the absence of (?| this is the order of +increasing number; when (?| is used this is not necessarily the case because +later capture groups may have lower numbers. +.P +As a simple example of the name/number table, consider the following pattern +after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white +space - including newlines - is ignored): +.sp +.\" JOIN + (? (?(\ed\ed)?\ed\ed) - + (?\ed\ed) - (?\ed\ed) ) +.sp +There are four named capture groups, so the table has four entries, and each +entry in the table is eight bytes long. The table is as follows, with +non-printing bytes shows in hexadecimal, and undefined bytes shown as ??: +.sp + 00 01 d a t e 00 ?? + 00 05 d a y 00 ?? ?? + 00 04 m o n t h 00 + 00 02 y e a r 00 ?? +.sp +When writing code to extract data from named capture groups using the +name-to-number map, remember that the length of the entries is likely to be +different for each compiled pattern. +.sp + PCRE2_INFO_NEWLINE +.sp +The output is one of the following \fBuint32_t\fP values: +.sp + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +This identifies the character sequence that will be recognized as meaning +"newline" while matching. +.sp + PCRE2_INFO_SIZE +.sp +Return the size of the compiled pattern in bytes (for all three libraries). The +third argument should point to a \fBsize_t\fP variable. This value includes the +size of the general data block that precedes the code units of the compiled +pattern itself. The value that is used when \fBpcre2_compile()\fP is getting +memory in which to place the compiled pattern may be slightly larger than the +value returned by this option, because there are cases where the code that +calculates the size has to over-estimate. Processing a pattern with the JIT +compiler does not alter the value returned by this option. +. +. +.\" HTML +.SH "INFORMATION ABOUT A PATTERN'S CALLOUTS" +.rs +.sp +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +.sp +A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The +contents of the callout enumeration block are described in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation, which also gives further details about callouts. +. +. +.SH "SERIALIZATION AND PRECOMPILING" +.rs +.sp +It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. The host on which the patterns are +reloaded must be running the same version of PCRE2, with the same code unit +width, and must also have the same endianness, pointer width, and PCRE2_SIZE +type. Before compiled patterns can be saved, they must be converted to a +"serialized" form, which in the case of PCRE2 is really just a bytecode dump. +The functions whose names begin with \fBpcre2_serialize_\fP are used for +converting to and from the serialized form. They are described in the +.\" HREF +\fBpcre2serialize\fP +.\" +documentation. Note that PCRE2 serialization does not convert compiled patterns +to an abstract format like Java or .NET serialization. +. +. +.\" HTML +.SH "THE MATCH DATA BLOCK" +.rs +.sp +.nf +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +.P +Information about a successful or unsuccessful match is placed in a match +data block, which is an opaque structure that is accessed by function calls. In +particular, the match data block contains a vector of offsets into the subject +string that define the matched parts of the subject. This is known as the +\fIovector\fP. +.P +Before calling \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or +\fBpcre2_jit_match()\fP you must create a match data block by calling one of +the creation functions above. For \fBpcre2_match_data_create()\fP, the first +argument is the number of pairs of offsets in the \fIovector\fP. +.P +When using \fBpcre2_match()\fP, one pair of offsets is required to identify the +string that matched the whole pattern, with an additional pair for each +captured substring. For example, a value of 4 creates enough space to record +the matched portion of the subject plus three captured substrings. +.P +When using \fBpcre2_dfa_match()\fP there may be multiple matched substrings of +different lengths at the same point in the subject. The ovector should be made +large enough to hold as many as are expected. +.P +A minimum of at least 1 pair is imposed by \fBpcre2_match_data_create()\fP, so +it is always possible to return the overall matched string in the case of +\fBpcre2_match()\fP or the longest match in the case of +\fBpcre2_dfa_match()\fP. The maximum number of pairs is 65535; if the first +argument of \fBpcre2_match_data_create()\fP is greater than this, 65535 is +used. +.P +The second argument of \fBpcre2_match_data_create()\fP is a pointer to a +general context, which can specify custom memory management for obtaining the +memory for the match data block. If you are not using custom memory management, +pass NULL, which causes \fBmalloc()\fP to be used. +.P +For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a +pointer to a compiled pattern. The ovector is created to be exactly the right +size to hold all the substrings a pattern might capture when matched using +\fBpcre2_match()\fP. You should not use this call when matching with +\fBpcre2_dfa_match()\fP. The second argument is again a pointer to a general +context, but in this case if NULL is passed, the memory is obtained using the +same allocator that was used for the compiled pattern (custom or default). +.P +A match data block can be used many times, with the same or different compiled +patterns. You can extract information from a match data block after a match +operation has finished, using functions that are described in the sections on +.\" HTML +.\" +matched strings +.\" +and +.\" HTML +.\" +other match data +.\" +below. +.P +When a call of \fBpcre2_match()\fP fails, valid data is available in the match +block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ERROR_PARTIAL, or one +of the error codes for an invalid UTF string. Exactly what is available depends +on the error, and is detailed below. +.P +When one of the matching functions is called, pointers to the compiled pattern +and the subject string are set in the match data block so that they can be +referenced by the extraction functions after a successful match. After running +a match, you must not free a compiled pattern or a subject string until after +all operations on the match data block (for that match) have taken place, +unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for \fBpcre2_match()\fP" +.\" HTML +.\" +below. +.\" +.P +When a match data block itself is no longer needed, it should be freed by +calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL +argument, it returns immediately, without doing anything. +. +. +.SH "MEMORY USE FOR MATCH DATA BLOCKS" +.rs +.sp +.nf +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.fi +.P +The size of a match data block depends on the size of the ovector that it +contains. The function \fBpcre2_get_match_data_size()\fP returns the size, in +bytes, of the block that is its argument. +.P +When \fBpcre2_match()\fP runs interpretively (that is, without using JIT), it +makes use of a vector of data frames for remembering backtracking positions. +The size of each individual frame depends on the number of capturing +parentheses in the pattern and can be obtained by calling +\fBpcre2_pattern_info()\fP with the PCRE2_INFO_FRAMESIZE option (see the +section entitled "Information about a compiled pattern" +.\" HTML +.\" +above). +.\" +.P +Heap memory is used for the frames vector; if the initial memory block turns +out to be too small during matching, it is automatically expanded. When +\fBpcre2_match()\fP returns, the memory is not freed, but remains attached to +the match data block, for use by any subsequent matches that use the same +block. It is automatically freed when the match data block itself is freed. +.P +You can find the current size of the frames vector that a match data block owns +by calling \fBpcre2_get_match_data_heapframes_size()\fP. For a newly created +match data block the size will be zero. Some types of match may require a lot +of frames and thus a large vector; applications that run in environments where +memory is constrained can check this and free the match data block if the heap +frames vector has become too big. +. +. +.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION" +.rs +.sp +.nf +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +.P +The function \fBpcre2_match()\fP is called to match a subject string against a +compiled pattern, which is passed in the \fIcode\fP argument. You can call +\fBpcre2_match()\fP with the same \fIcode\fP argument as many times as you +like, in order to find multiple matches in the subject string or to match +different subject strings with the same pattern. +.P +This function is the main matching facility of the library, and it operates in +a Perl-like manner. For specialist use there is also an alternative matching +function, which is described +.\" HTML +.\" +below +.\" +in the section about the \fBpcre2_dfa_match()\fP function. +.P +Here is an example of a simple call to \fBpcre2_match()\fP: +.sp + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL); /* a match context; NULL means use defaults */ +.sp +If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +matching parameters are to be changed. For details, see the section on +.\" HTML +.\" +the match context +.\" +above. +. +. +.SS "The string to be matched by \fBpcre2_match()\fP" +.rs +.sp +The subject string is passed to \fBpcre2_match()\fP as a pointer in +\fIsubject\fP, a length in \fIlength\fP, and a starting offset in +\fIstartoffset\fP. The length and offset are in code units, not characters. +That is, they are in bytes for the 8-bit library, 16-bit code units for the +16-bit library, and 32-bit code units for the 32-bit library, whether or not +UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and +\fIlength\fP is zero, the subject is assumed to be an empty string. If +\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL. +.P +If \fIstartoffset\fP is greater than the length of the subject, +\fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is +zero, the search for a match starts at the beginning of the subject, and this +is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset +must point to the start of a character, or to the end of the subject (in UTF-32 +mode, one code unit equals one character, so all offsets are valid). Like the +pattern string, the subject may contain binary zeros. +.P +A non-zero starting offset is useful when searching for another match in the +same subject by calling \fBpcre2_match()\fP again after a previous success. +Setting \fIstartoffset\fP differs from passing over a shortened string and +setting PCRE2_NOTBOL in the case of a pattern that begins with any kind of +lookbehind. For example, consider the pattern +.sp + \eBiss\eB +.sp +which finds occurrences of "iss" in the middle of words. (\eB matches only if +the current position in the subject is not a word boundary.) When applied to +the string "Mississippi" the first call to \fBpcre2_match()\fP finds the first +occurrence. If \fBpcre2_match()\fP is called again with just the remainder of +the subject, namely "issippi", it does not match, because \eB is always false +at the start of the subject, which is deemed to be a word boundary. However, if +\fBpcre2_match()\fP is passed the entire string again, but with +\fIstartoffset\fP set to 4, it finds the second occurrence of "iss" because it +is able to look behind the starting point to discover that it is preceded by a +letter. +.P +Finding all the matches in a subject is tricky when the pattern can match an +empty string. It is possible to emulate Perl's /g behaviour by first trying the +match again at the same offset, with the PCRE2_NOTEMPTY_ATSTART and +PCRE2_ANCHORED options, and then if that fails, advancing the starting offset +and trying an ordinary match again. There is some code that demonstrates how to +do this in the +.\" HREF +\fBpcre2demo\fP +.\" +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +.P +If a non-zero starting offset is passed when the pattern is anchored, a single +attempt to match at the given offset is made. This can only succeed if the +pattern does not require the match to be at the start of the subject. In other +words, the anchoring must be the result of setting the PCRE2_ANCHORED option or +the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA. +. +. +.\" HTML +.SS "Option bits for \fBpcre2_match()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be +zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. +Their action is described below. +.P +Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by +the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the +interpretive code in \fBpcre2_match()\fP is run. +PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT +(obviously), the remaining options are supported for JIT matching. +.sp + PCRE2_ANCHORED +.sp +The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first +matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out +to be anchored by virtue of its contents, it cannot be made unachored at +matching time. Note that setting the option at match time disables JIT +matching. +.sp + PCRE2_COPY_MATCHED_SUBJECT +.sp +By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring +extraction functions. This means that the subject's memory must not be freed +until all such operations are complete. For some applications where the +lifetime of the subject string is not guaranteed, it may be necessary to make a +copy of the subject string, but it is wasteful to do this unless the match is +successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the +subject is copied and the new pointer is remembered in the match data block +instead of the original subject pointer. The memory allocator that was used for +the match block itself is used. The copy is automatically freed when +\fBpcre2_match_data_free()\fP is called to free the match data block. It is also +automatically freed if the match data block is re-used for another match +operation. +.sp + PCRE2_DISABLE_RECURSELOOP_CHECK +.sp +This option is relevant only to \fBpcre2_match()\fP for interpretive matching. +It is ignored when JIT is used, and is forbidden for \fBpcre2_dfa_match()\fP. +.P +The use of recursion in patterns can lead to infinite loops. In the +interpretive matcher these would be eventually caught by the match or heap +limits, but this could take a long time and/or use a lot of memory if the +limits are large. There is therefore a check at the start of each recursion. +If the same group is still active from a previous call, and the current subject +pointer is the same as it was at the start of that group, and the furthest +inspected character of the subject has not changed, an error is generated. +.P +There are rare cases of matches that would complete, but nevertheless trigger +this error. This option disables the check. It is provided mainly for testing +when comparing JIT and interpretive behaviour. +.sp + PCRE2_ENDANCHORED +.sp +If the PCRE2_ENDANCHORED option is set, any string that \fBpcre2_match()\fP +matches must be right at the end of the subject string. Note that setting the +option at match time disables JIT matching. +.sp + PCRE2_NOTBOL +.sp +This option specifies that first character of the subject string is not the +beginning of a line, so the circumflex metacharacter should not match before +it. Setting this without having set PCRE2_MULTILINE at compile time causes +circumflex never to match. This option affects only the behaviour of the +circumflex metacharacter. It does not affect \eA. +.sp + PCRE2_NOTEOL +.sp +This option specifies that the end of the subject string is not the end of a +line, so the dollar metacharacter should not match it nor (except in multiline +mode) a newline immediately before it. Setting this without having set +PCRE2_MULTILINE at compile time causes dollar never to match. This option +affects only the behaviour of the dollar metacharacter. It does not affect \eZ +or \ez. +.sp + PCRE2_NOTEMPTY +.sp +An empty string is not considered to be a valid match if this option is set. If +there are alternatives in the pattern, they are tried. If all the alternatives +match the empty string, the entire match fails. For example, if the pattern +.sp + a?b? +.sp +is applied to a string not beginning with "a" or "b", it matches an empty +string at the start of the subject. With PCRE2_NOTEMPTY set, this match is not +valid, so \fBpcre2_match()\fP searches further into the string for occurrences +of "a" or "b". +.sp + PCRE2_NOTEMPTY_ATSTART +.sp +This is like PCRE2_NOTEMPTY, except that it locks out an empty string match +only at the first matching position, that is, at the start of the subject plus +the starting offset. An empty string match later in the subject is permitted. +If the pattern is anchored, such a match can occur only if the pattern contains +\eK. +.sp + PCRE2_NO_JIT +.sp +By default, if a pattern has been successfully processed by +\fBpcre2_jit_compile()\fP, JIT is automatically used when \fBpcre2_match()\fP +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter. +.sp + PCRE2_NO_UTF_CHECK +.sp +When PCRE2_UTF is set at compile time, the validity of the subject as a UTF +string is checked unless PCRE2_NO_UTF_CHECK is passed to \fBpcre2_match()\fP or +PCRE2_MATCH_INVALID_UTF was passed to \fBpcre2_compile()\fP. The latter special +case is discussed in detail in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +In the default case, if a non-zero starting offset is given, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \eb and \eB are +one-character lookbehinds. +.P +The check is carried out before any other processing takes place, and a +negative error code is returned if the check fails. There are several UTF error +codes for each code unit width, corresponding to different problems with the +code unit sequence. There are discussions about the validity of +.\" HTML +.\" +UTF-8 strings, +.\" +.\" HTML +.\" +UTF-16 strings, +.\" +and +.\" HTML +.\" +UTF-32 strings +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +If you know that your subject is valid, and you want to skip this check for +performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling +\fBpcre2_match()\fP. You might want to do this for the second and subsequent +calls to \fBpcre2_match()\fP if you are making repeated calls to find multiple +matches in the same subject string. +.P +\fBWarning:\fP Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when +PCRE2_NO_UTF_CHECK is set at match time the effect of passing an invalid +string as a subject, or an invalid value of \fIstartoffset\fP, is undefined. +Your program may crash or loop indefinitely or give wrong results. +.sp + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT +.sp +These options turn on the partial matching feature. A partial match occurs if +the end of the subject string is reached successfully, but there are not enough +subject characters to complete the match. In addition, either at least one +character must have been inspected or the pattern must contain a lookbehind, or +the pattern must be one that could match an empty string. +.P +If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) +is set, matching continues by testing any remaining alternatives. Only if no +complete match can be found is PCRE2_ERROR_PARTIAL returned instead of +PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that the +caller is prepared to handle a partial match, but only if no complete match can +be found. +.P +If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this case, if +a partial match is found, \fBpcre2_match()\fP immediately returns +PCRE2_ERROR_PARTIAL, without considering any other alternatives. In other +words, when PCRE2_PARTIAL_HARD is set, a partial match is considered to be more +important that an alternative complete match. +.P +There is a more detailed discussion of partial and multi-segment matching, with +examples, in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +. +.SH "NEWLINE HANDLING WHEN MATCHING" +.rs +.sp +When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +a +.\" HTML +.\" +compile context +.\" +by calling \fBpcre2_set_newline()\fP. It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +.\" HTML +.\" +section on newline conventions +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. During matching, the newline choice affects the behaviour of the dot, +circumflex, and dollar metacharacters. It may also alter the way the match +starting position is advanced after a match failure for an unanchored pattern. +.P +When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is set as +the newline convention, and a match attempt for an unanchored pattern fails +when the current starting position is at a CRLF sequence, and the pattern +contains no explicit matches for CR or LF characters, the match position is +advanced by two characters instead of one, in other words, to after the CRLF. +.P +The above rule is a compromise that makes the most common cases work as +expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is +not set), it does not match the string "\er\enA" because, after failing at the +start, it skips both the CR and the LF before retrying. However, the pattern +[\er\en]A does match that string, because it contains an explicit CR or LF +reference, and so advances only by one character after the first failure. +.P +An explicit match for CR of LF is either a literal appearance of one of those +characters in the pattern, or one of the \er or \en or equivalent octal or +hexadecimal escape sequences. Implicit matches such as [^X] do not count, nor +does \es, even though it includes CR and LF in the characters that it matches. +.P +Notwithstanding the above, anomalous effects may still occur when CRLF is a +valid newline sequence and explicit \er or \en escapes appear in the pattern. +. +. +.\" HTML +.SH "HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS" +.rs +.sp +.nf +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.fi +.P +In general, a pattern matches a certain portion of the subject, and in +addition, further substrings from the subject may be picked out by +parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's +book, this is called "capturing" in what follows, and the phrase "capture +group" (Perl terminology) is used for a fragment of a pattern that picks out a +substring. PCRE2 supports several other kinds of parenthesized group that do +not cause substrings to be captured. The \fBpcre2_pattern_info()\fP function +can be used to find out how many capture groups there are in a compiled +pattern. +.P +You can use auxiliary functions for accessing captured substrings +.\" HTML +.\" +by number +.\" +or +.\" HTML +.\" +by name, +.\" +as described in sections below. +.P +Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the \fBovector\fP, which contains the offsets of captured strings. It is +part of the +.\" HTML +.\" +match data block. +.\" +The function \fBpcre2_get_ovector_pointer()\fP returns the address of the +ovector, and \fBpcre2_get_ovector_count()\fP returns the number of pairs of +values it contains. +.P +Within the ovector, the first in each pair of values is set to the offset of +the first code unit of a substring, and the second is set to the offset of the +first code unit after the end of a substring. These values are always code unit +offsets, not character offsets. That is, they are byte offsets in the 8-bit +library, 16-bit offsets in the 16-bit library, and 32-bit offsets in the 32-bit +library. +.P +After a partial match (error return PCRE2_ERROR_PARTIAL), only the first pair +of offsets (that is, \fIovector[0]\fP and \fIovector[1]\fP) are set. They +identify the part of the subject that was partially matched. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for details of partial matching. +.P +After a fully successful match, the first pair of offsets identifies the +portion of the subject string that was matched by the entire pattern. The next +pair is used for the first captured substring, and so on. The value returned by +\fBpcre2_match()\fP is one more than the highest numbered pair that has been +set. For example, if two substrings have been captured, the returned value is +3. If there are no captured substrings, the return value from a successful +match is 1, indicating that just the first pair of offsets has been set. +.P +If a pattern uses the \eK escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\eK) is matched against "ab", the start and +end offset values for the match are 2 and 0. +.P +If a capture group is matched repeatedly within a single match operation, it is +the last portion of the subject that it matched that is returned. +.P +If the ovector is too small to hold all the captured substring offsets, as much +as possible is filled in, and the function returns a value of zero. If captured +substrings are not of interest, \fBpcre2_match()\fP may be called with a match +data block whose ovector is of minimum length (that is, one pair). +.P +It is possible for capture group number \fIn+1\fP to match some part of the +subject when group \fIn\fP has not been used at all. For example, if the string +"abc" is matched against the pattern (a|(z))(bc) the return from the function +is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both +values in the offset pairs corresponding to unused groups are set to +PCRE2_UNSET. +.P +Offset values that correspond to unused groups at the end of the expression are +also set to PCRE2_UNSET. For example, if the string "abc" is matched against +the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the +function is 2, because the highest used capture group number is 1. The offsets +for the second and third capture groups (assuming the vector is large enough, +of course) are set to PCRE2_UNSET. +.P +Elements in the ovector that do not correspond to capturing parentheses in the +pattern are never changed. That is, if a pattern contains \fIn\fP capturing +parentheses, no more than \fIovector[0]\fP to \fIovector[2n+1]\fP are set by +\fBpcre2_match()\fP. The other elements retain whatever values they previously +had. After a failed match attempt, the contents of the ovector are unchanged. +. +. +.\" HTML +.SH "OTHER INFORMATION ABOUT A MATCH" +.rs +.sp +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +.P +As well as the offsets in the ovector, other information about a match is +retained in the match data block and can be retrieved by the above functions in +appropriate circumstances. If they are called at other times, the result is +undefined. +.P +After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure +to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function +\fBpcre2_get_mark()\fP can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just +(*MARK). The same function applies to all the verbs. It returns a pointer to +the zero-terminated name, which is within the compiled pattern. If no name is +available, NULL is returned. The length of the name (excluding the terminating +zero) is stored in the code unit that precedes the name. You should use this +length instead of relying on the terminating zero if the name might contain a +binary zero. +.P +After a successful match, the name that is returned is the last mark name +encountered on the matching path through the pattern. Instances of backtracking +verbs without names do not count. Thus, for example, if the matching path +contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a +partial match, the last encountered name is returned. For example, consider +this pattern: +.sp + ^(*MARK:A)((*MARK:B)a|b)c +.sp +When it matches "bc", the returned name is A. The B mark is "seen" in the first +branch of the group, but it is not on the matching path. On the other hand, +when this pattern fails to match "bx", the returned name is B. +.P +\fBWarning:\fP By default, certain start-of-match optimizations are used to +give a fast "no match" result in some situations. For example, if the anchoring +is removed from the pattern above, there is an initial check for the presence +of "c" in the subject before running the matching engine. This check fails for +"bx", causing a match failure without seeing any marks. You can disable the +start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for +\fBpcre2_compile()\fP or by starting the pattern with (*NO_START_OPT). +.P +After a successful match, a partial match, or one of the invalid UTF errors +(for example, PCRE2_ERROR_UTF8_ERR5), \fBpcre2_get_startchar()\fP can be +called. After a successful or partial match it returns the code unit offset of +the character at which the match started. For a non-partial match, this can be +different to the value of \fIovector[0]\fP if the pattern contains the \eK +escape sequence. After a partial match, however, this value is always the same +as \fIovector[0]\fP because \eK does not affect the result of a partial match. +.P +After a UTF check failure, \fBpcre2_get_startchar()\fP can be used to obtain +the code unit offset of the invalid UTF character. Details are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +. +. +.\" HTML +.SH "ERROR RETURNS FROM \fBpcre2_match()\fP" +.rs +.sp +If \fBpcre2_match()\fP fails, it returns a negative number. This can be +converted to a text string by calling the \fBpcre2_get_error_message()\fP +function (see "Obtaining a textual error message" +.\" HTML +.\" +below). +.\" +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. The following are the other errors that may be returned by +\fBpcre2_match()\fP: +.sp + PCRE2_ERROR_NOMATCH +.sp +The subject string did not match the pattern. +.sp + PCRE2_ERROR_PARTIAL +.sp +The subject string did not match, but it did match partially. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for details of partial matching. +.sp + PCRE2_ERROR_BADMAGIC +.sp +PCRE2 stores a 4-byte "magic number" at the start of the compiled code, to +catch the case when it is passed a junk pointer. This is the error that is +returned when the magic number is not present. +.sp + PCRE2_ERROR_BADMODE +.sp +This error is given when a compiled pattern is passed to a function in a +library of a different code unit width, for example, a pattern compiled by +the 8-bit library is passed to a 16-bit or 32-bit library function. +.sp + PCRE2_ERROR_BADOFFSET +.sp +The value of \fIstartoffset\fP was greater than the length of the subject. +.sp + PCRE2_ERROR_BADOPTION +.sp +An unrecognized bit was set in the \fIoptions\fP argument. +.sp + PCRE2_ERROR_BADUTFOFFSET +.sp +The UTF code unit sequence that was passed as a subject was checked and found +to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the value of +\fIstartoffset\fP did not point to the beginning of a UTF character or the end +of the subject. +.sp + PCRE2_ERROR_CALLOUT +.sp +This error is never generated by \fBpcre2_match()\fP itself. It is provided for +use by callout functions that want to cause \fBpcre2_match()\fP or +\fBpcre2_callout_enumerate()\fP to return a distinctive error code. See the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details. +.sp + PCRE2_ERROR_DEPTHLIMIT +.sp +The nested backtracking depth limit was reached. +.sp + PCRE2_ERROR_HEAPLIMIT +.sp +The heap limit was reached. +.sp + PCRE2_ERROR_INTERNAL +.sp +An unexpected internal error has occurred. This error could be caused by a bug +in PCRE2 or by overwriting of the compiled pattern. +.sp + PCRE2_ERROR_JIT_STACKLIMIT +.sp +This error is returned when a pattern that was successfully studied using JIT +is being matched, but the memory available for the just-in-time processing +stack is not large enough. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.sp + PCRE2_ERROR_MATCHLIMIT +.sp +The backtracking match limit was reached. +.sp + PCRE2_ERROR_NOMEMORY +.sp +Heap memory is used to remember backtracking points. This error is given when +the memory allocation function (default or custom) fails. Note that a different +error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds +the heap limit. PCRE2_ERROR_NOMEMORY is also returned if +PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. +.sp + PCRE2_ERROR_NULL +.sp +Either the \fIcode\fP, \fIsubject\fP, or \fImatch_data\fP argument was passed +as NULL. +.sp + PCRE2_ERROR_RECURSELOOP +.sp +This error is returned when \fBpcre2_match()\fP detects a recursion loop within +the pattern. Specifically, it means that either the whole pattern or a +capture group has been called recursively for the second time at the same +position in the subject string. Some simple patterns that might do this are +detected and faulted at compile time, but more complicated cases, in particular +mutual recursions between two different groups, cannot be detected until +matching is attempted. +. +. +.\" HTML +.SH "OBTAINING A TEXTUAL ERROR MESSAGE" +.rs +.sp +.nf +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.fi +.P +A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling \fBpcre2_get_error_message()\fP. The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length in code units, into which the text message is +placed. The message is returned in code units of the appropriate width for the +library that is being used. +.P +The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample. +. +. +.\" HTML +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER" +.rs +.sp +.nf +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.fi +.P +Captured substrings can be accessed directly by using the ovector as described +.\" HTML +.\" +above. +.\" +For convenience, auxiliary functions are provided for extracting captured +substrings as new, separate, zero-terminated strings. A substring that contains +a binary zero is correctly extracted and has a further zero added on the end, +but the result is not, of course, a C string. +.P +The functions in this section identify substrings by number. The number zero +refers to the entire matched substring, with higher numbers referring to +substrings captured by parenthesized groups. After a partial match, only +substring zero is available. An attempt to extract any other substring gives +the error PCRE2_ERROR_PARTIAL. The next section describes similar functions for +extracting captured substrings by name. +.P +If a pattern uses the \eK escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\eK) is matched against "ab", the start and +end offset values for the match are 2 and 0. In this situation, calling these +functions with a zero substring number extracts a zero-length empty string. +.P +You can find the length in code units of a captured substring without +extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first +argument is a pointer to the match data block, the second is the group number, +and the third is a pointer to a variable into which the length is placed. If +you just want to know whether or not the substring has been captured, you can +pass the third argument as NULL. +.P +The \fBpcre2_substring_copy_bynumber()\fP function copies a captured substring +into a supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it +into new memory, obtained using the same memory allocation function that was +used for the match data block. The first two arguments of these functions are a +pointer to the match data block and a capture group number. +.P +The final arguments of \fBpcre2_substring_copy_bynumber()\fP are a pointer to +the buffer and a pointer to a variable that contains its length in code units. +This is updated to contain the actual number of code units used for the +extracted substring, excluding the terminating zero. +.P +For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by +calling \fBpcre2_substring_free()\fP. +.P +The return value from all these functions is zero for success, or a negative +error code. If the pattern match failed, the match failure code is returned. +If a substring number greater than zero is used after a partial match, +PCRE2_ERROR_PARTIAL is returned. Other possible error codes are: +.sp + PCRE2_ERROR_NOMEMORY +.sp +The buffer was too small for \fBpcre2_substring_copy_bynumber()\fP, or the +attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP. +.sp + PCRE2_ERROR_NOSUBSTRING +.sp +There is no substring with that number in the pattern, that is, the number is +greater than the number of capturing parentheses. +.sp + PCRE2_ERROR_UNAVAILABLE +.sp +The substring number, though not greater than the number of captures in the +pattern, is greater than the number of slots in the ovector, so the substring +could not be captured. +.sp + PCRE2_ERROR_UNSET +.sp +The substring did not participate in the match. For example, if the pattern is +(abc)|(def) and the subject is "def", and the ovector contains at least two +capturing slots, substring number 1 is unset. +. +. +.SH "EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS" +.rs +.sp +.nf +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.sp +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +.fi +.P +The \fBpcre2_substring_list_get()\fP function extracts all available substrings +and builds a list of pointers to them. It also (optionally) builds a second +list that contains their lengths (in code units), excluding a terminating zero +that is added to each of them. All this is done in a single block of memory +that is obtained using the same memory allocation function that was used to get +the match data block. +.P +This function must be called only after a successful match. If called after a +partial match, the error code PCRE2_ERROR_PARTIAL is returned. +.P +The address of the memory block is returned via \fIlistptr\fP, which is also +the start of the list of string pointers. The end of the list is marked by a +NULL pointer. The address of the list of lengths is returned via +\fIlengthsptr\fP. If your strings do not contain binary zeros and you do not +therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP +argument to disable the creation of a list of lengths. The yield of the +function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block +could not be obtained. When the list is no longer needed, it should be freed by +calling \fBpcre2_substring_list_free()\fP. +.P +If this function encounters a substring that is unset, which can happen when +capture group number \fIn+1\fP matches some part of the subject, but group +\fIn\fP has not been used at all, it returns an empty string. This can be +distinguished from a genuine zero-length substring by inspecting the +appropriate offset in the ovector, which contain PCRE2_UNSET for unset +substrings, or by calling \fBpcre2_substring_length_bynumber()\fP. +. +. +.\" HTML +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME" +.rs +.sp +.nf +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.sp +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.fi +.P +To extract a substring by name, you first have to find associated number. +For example, for this pattern: +.sp + (a+)b(?\ed+)... +.sp +the number of the capture group called "xxx" is 2. If the name is known to be +unique (PCRE2_DUPNAMES was not set), you can find the number from the name by +calling \fBpcre2_substring_number_from_name()\fP. The first argument is the +compiled pattern, and the second is the name. The yield of the function is the +group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or +PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name. +Given the number, you can extract the substring directly from the ovector, or +use one of the "bynumber" functions described above. +.P +For convenience, there are also "byname" functions that correspond to the +"bynumber" functions, the only difference being that the second argument is a +name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate +names, these functions scan all the groups with the given name, and return the +captured substring from the first named group that is set. +.P +If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is +returned. If all groups with the name have numbers that are greater than the +number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there +is at least one group with a slot in the ovector, but no group is found to be +set, PCRE2_ERROR_UNSET is returned. +.P +\fBWarning:\fP If the pattern uses the (?| feature to set up multiple +capture groups with the same number, as described in the +.\" HTML +.\" +section on duplicate group numbers +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page, you cannot use names to distinguish the different capture groups, because +names are not included in the compiled code. The matching process uses only +numbers. For this reason, the use of different names for groups with the +same number causes an error at compile time. +. +. +.\" HTML +.SH "CREATING A NEW STRING WITH SUBSTITUTIONS" +.rs +.sp +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +.P +This function optionally calls \fBpcre2_match()\fP and then makes a copy of the +subject string in \fIoutputbuffer\fP, replacing parts that were matched with +the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the +replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an +error occurs if \fIreplacement\fP is NULL. +.P +There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). +.P +If successful, \fBpcre2_substitute()\fP returns the number of substitutions +that were carried out. This may be zero if no match was found, and is never +greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is +returned if an error is detected. +.P +Matches in which a \eK item in a lookahead in the pattern causes the match to +end before it starts are not supported, and give rise to an error return. For +global replacements, matches in which \eK in a lookbehind causes the match to +start earlier than the point that was reached in the previous iteration are +also not supported. +.P +The first seven arguments of \fBpcre2_substitute()\fP are the same as for +\fBpcre2_match()\fP, except that the partial matching options are not +permitted, and \fImatch_data\fP may be passed as NULL, in which case a match +data block is obtained and freed within this function, using memory management +functions from the match context, if provided, or else those that were used to +allocate memory for the compiled code. +.P +If \fImatch_data\fP is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the +provided block is used for all calls to \fBpcre2_match()\fP, and its contents +afterwards are the result of the final call. For global changes, this will +always be a no-match error. The contents of the ovector within the match data +block may or may not have been changed. +.P +As well as the usual options for \fBpcre2_match()\fP, a number of additional +options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP. +One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external +\fImatch_data\fP block must be provided, and it must have already been used for +an external call to \fBpcre2_match()\fP with the same pattern and subject +arguments. The data in the \fImatch_data\fP block (return code, offset vector) +is then used for the first substitution instead of calling \fBpcre2_match()\fP +from within \fBpcre2_substitute()\fP. This allows an application to check for a +match before choosing to substitute, without having to repeat the match. +.P +The contents of the externally supplied match data block are not changed when +PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, +\fBpcre2_match()\fP is called after the first substitution to check for further +matches, but this is done using an internally obtained match data block, thus +always leaving the external block unchanged. +.P +The \fIcode\fP argument is not used for matching before the first substitution +when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when +PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the +UTF setting and the number of capturing parentheses in the pattern. +.P +The default action of \fBpcre2_substitute()\fP is to return a copy of the +subject string with matched substrings replaced. However, if +PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are +returned. In the global case, multiple replacements are concatenated in the +output buffer. Substitution callouts (see +.\" HTML +.\" +below) +.\" +can be used to separate them if necessary. +.P +The \fIoutlengthptr\fP argument of \fBpcre2_substitute()\fP must point to a +variable that contains the length, in code units, of the output buffer. If the +function is successful, the value is updated to contain the length in code +units of the new string, excluding the trailing zero that is automatically +added. +.P +If the function is not successful, the value set via \fIoutlengthptr\fP depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. +.P +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, \fBpcre2_substitute()\fP continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed. This value is passed +back via the \fIoutlengthptr\fP variable, with the result of the function still +being PCRE2_ERROR_NOMEMORY. +.P +Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +.P +The replacement string, which is interpreted as a UTF string in UTF mode, is +checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF +replacement string causes an immediate return with the relevant UTF error code. +.P +If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted +in any way. By default, however, a dollar character is an escape character that +can specify the insertion of characters from capture groups and names from +(*MARK) or other control verbs in the pattern. Dollar is the only escape +character (backslash is treated as literal). The following forms are always +recognized: +.sp + $$ insert a dollar character + $ or ${} insert the contents of group + $*MARK or ${*MARK} insert a control verb name +.sp +Either a group number or a group name can be given for . Curly brackets are +required only if the following character would be interpreted as part of the +number or name. The number may be zero to include the entire matched string. +For example, if the pattern a(b)c is matched with "=abc=" and the replacement +string "+$1$0$1+", the result is "=+babcb+=". +.P +$*MARK inserts the name from the last encountered backtracking control verb on +the matching path that has a name. (*MARK) must always include a name, but the +other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name +inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This +facility can be used to perform simple simultaneous substitutions, as this +\fBpcre2test\fP example shows: +.sp + /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange +.sp +PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this option is not set, only the first +matching substring is replaced. The search for matches takes place in the +original subject string (that is, previous replacements do not affect it). +Iteration is implemented by advancing the \fIstartoffset\fP value for each +search, which is always passed the entire subject string. If an offset limit is +set in the match context, searching stops when that limit is reached. +.P +You can restrict the effect of a global substitution to a portion of the +subject string by setting either or both of \fIstartoffset\fP and an offset +limit. Here is a \fBpcre2test\fP example: +.sp + /B/g,replace=!,use_offset_limit + ABC ABC ABC ABC\e=offset=3,offset_limit=12 + 2: ABC A!C A!C ABC +.sp +When continuing with global substitutions after matching a substring with zero +length, an attempt to find a non-empty match at the same offset is performed. +If this is not successful, the offset is advanced by one character except when +CRLF is a valid newline sequence and the next two characters are CR, LF. In +this case, the offset is advanced by two characters. +.P +PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +.P +PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +.P +PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, two things change: +.P +Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \en or \ex{ddd} can be used to specify +particular character codes, and backslash followed by any non-alphanumeric +character quotes that character. Extended quoting can be coded using \eQ...\eE, +exactly as in pattern strings. +.P +There are also four escape sequences for forcing the case of inserted letters. +The insertion mechanism has three states: no case forcing, force upper case, +and force lower case. The escape sequences change the current state: \eU and +\eL change to upper or lower case forcing, respectively, and \eE (when not +terminating a \eQ quoted sequence) reverts to no case forcing. The sequences +\eu and \el force the next character (if it is a letter) to upper or lower +case, respectively, and then the state automatically reverts to no case +forcing. Case forcing applies to all inserted characters, including those from +capture groups and letters within \eQ...\eE quoted sequences. If either +PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +properties are used for case forcing characters whose code points are greater +than 127. +.P +Note that case forcing sequences such as \eU...\eE do not nest. For example, +the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +not apply to replacement strings. +.P +The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to capture group substitution. The syntax is similar to that used +by Bash: +.sp + ${:-} + ${:+:} +.sp +As before, may be a group number or a name. The first form specifies a +default value. If group is set, its value is inserted; if not, is +expanded and the result inserted. The second form specifies strings that are +expanded and inserted when group is set or unset, respectively. The first +form is just a convenient shorthand for +.sp + ${:+${}:} +.sp +Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this \fBpcre2test\fP example: +.sp + /(some)?(body)/substitute_extended,replace=${1:+\eU:\eL}HeLLo + body + 1: hello + somebody + 1: HELLO +.sp +The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +.P +If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, +PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and +are ignored. +. +. +.SS "Substitution errors" +.rs +.sp +In the event of an error, \fBpcre2_substitute()\fP returns a negative error +code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from +\fBpcre2_match()\fP are passed straight back. +.P +PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +.P +PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +.P +PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via \fIoutlengthptr\fP. Note that this does not happen by +default. +.P +PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the +\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP +arguments are NULL. For backward compatibility reasons an exception is made for +the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0. +.P +PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket +not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before +it started or the match started earlier than the current position in the +subject, which can happen if \eK is used in an assertion). +.P +As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the \fBpcre2_get_error_message()\fP function (see +"Obtaining a textual error message" +.\" HTML +.\" +above). +.\" +. +. +.\" HTML +.SS "Substitution callouts" +.rs +.sp +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +The \fBpcre2_set_substitution_callout()\fP function can be used to specify a +callout function for \fBpcre2_substitute()\fP. This information is passed in +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. The callout +function is not called for simulated substitutions that happen as a result of +the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. +.P +The first argument of the callout function is a pointer to a substitute callout +block structure, which contains the following fields, not necessarily in this +order: +.sp + uint32_t \fIversion\fP; + uint32_t \fIsubscount\fP; + PCRE2_SPTR \fIinput\fP; + PCRE2_SPTR \fIoutput\fP; + PCRE2_SIZE \fI*ovector\fP; + uint32_t \fIoveccount\fP; + PCRE2_SIZE \fIoutput_offsets[2]\fP; +.sp +The \fIversion\fP field contains the version number of the block format. The +current version is 0. The version number will increase in future if more fields +are added, but the intention is never to remove any of the existing fields. +.P +The \fIsubscount\fP field is the number of the current match. It is 1 for the +first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP +pointers are copies of the values passed to \fBpcre2_substitute()\fP. +.P +The \fIovector\fP field points to the ovector, which contains the result of the +most recent match. The \fIoveccount\fP field contains the number of pairs that +are set in the ovector, and is always greater than zero. +.P +The \fIoutput_offsets\fP vector contains the offsets of the replacement in the +output string. This has already been processed for dollar and (if requested) +backslash substitutions as described above. +.P +The second argument of the callout function is the value passed as +\fIcallout_data\fP when the function was registered. The value returned by the +callout function is interpreted as follows: +.P +If the value is zero, the replacement is accepted, and, if +PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next +match. If the value is not zero, the current replacement is not accepted. If +the value is greater than zero, processing continues when +PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or +PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the +output and the call to \fBpcre2_substitute()\fP exits, returning the number of +matches so far. +. +. +.SH "DUPLICATE CAPTURE GROUP NAMES" +.rs +.sp +.nf +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.fi +.P +When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture +groups are not required to be unique. Duplicate names are always allowed for +groups with the same number, created by using the (?| feature. Indeed, if such +groups are named, they are required to use the same names. +.P +Normally, patterns that use duplicate names are such that in any one match, +only one of each set of identically-named groups participates. An example is +shown in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +When duplicates are present, \fBpcre2_substring_copy_byname()\fP and +\fBpcre2_substring_get_byname()\fP return the first substring corresponding to +the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is +returned. The \fBpcre2_substring_number_from_name()\fP function returns the +error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. +.P +If you want to get full details of all captured substrings for a given name, +you must use the \fBpcre2_substring_nametable_scan()\fP function. The first +argument is the compiled pattern, and the second is the name. If the third and +fourth arguments are NULL, the function returns a group number for a unique +name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. +.P +When the third and fourth arguments are not NULL, they must be pointers to +variables that are updated by the function. After it has run, they point to the +first and last entries in the name-to-number table for the given name, and the +function returns the length of each entry in code units. In both cases, +PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. +.P +The format of the name table is described +.\" HTML +.\" +above +.\" +in the section entitled \fIInformation about a pattern\fP. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data. +. +. +.SH "FINDING ALL POSSIBLE MATCHES AT ONE POSITION" +.rs +.sp +The traditional matching function uses a similar algorithm to Perl, which stops +when it finds the first match at a given point in the subject. If you want to +find all possible matches, or the longest possible match at a given position, +consider using the alternative matching function (see below) instead. If you +cannot use the alternative function, you can kludge it up by making use of the +callout facility, which is described in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.P +What you have to do is to insert a callout right at the end of the pattern. +When your callout function is called, extract and save the current matched +substring. Then return 1, which forces \fBpcre2_match()\fP to backtrack and try +other alternatives. Ultimately, when it runs out of matches, +\fBpcre2_match()\fP will yield PCRE2_ERROR_NOMATCH. +. +. +.\" HTML +.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION" +.rs +.sp +.nf +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.fi +.P +The function \fBpcre2_dfa_match()\fP is called to match a subject string +against a compiled pattern, using a matching algorithm that scans the subject +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does +not support, see the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. +.P +The arguments for the \fBpcre2_dfa_match()\fP function are the same as for +\fBpcre2_match()\fP, plus two extras. The ovector within the match data block +is used in a different way, and this is described below. The other common +arguments are used in the same way as for \fBpcre2_match()\fP, so their +description is not repeated here. +.P +The two additional arguments provide workspace for the function. The workspace +vector should contain at least 20 elements. It is used for keeping track of +multiple paths through the pattern tree. More workspace is needed for patterns +and subjects where there are a lot of potential matches. +.P +Here is an example of a simple call to \fBpcre2_dfa_match()\fP: +.sp + int wspace[20]; + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_dfa_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL, /* a match context; NULL means use defaults */ + wspace, /* working space vector */ + 20); /* number of elements (NOT size in bytes) */ +. +.SS "Option bits for \fBpcre2_dfa_match()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must +be zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, +PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, +PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last +four of these are exactly the same as for \fBpcre2_match()\fP, so their +description is not repeated here. +.sp + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT +.sp +These have the same general effect as they do for \fBpcre2_match()\fP, but the +details are slightly different. When PCRE2_PARTIAL_HARD is set for +\fBpcre2_dfa_match()\fP, it returns PCRE2_ERROR_PARTIAL if the end of the +subject is reached and there is still at least one matching possibility that +requires additional characters. This happens even if some complete matches have +already been found. When PCRE2_PARTIAL_SOFT is set, the return code +PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL if the end of the +subject is reached, there have been no complete matches, but there is still at +least one matching possibility. The portion of the string that was inspected +when the longest partial match was found is set as the first matching string in +both cases. There is a more detailed discussion of partial and multi-segment +matching, with examples, in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +.sp + PCRE2_DFA_SHORTEST +.sp +Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as +soon as it has found one match. Because of the way the alternative algorithm +works, this is necessarily the shortest possible match at the first possible +matching point in the subject string. +.sp + PCRE2_DFA_RESTART +.sp +When \fBpcre2_dfa_match()\fP returns a partial match, it is possible to call it +again, with additional subject characters, and have it continue with the same +match. The PCRE2_DFA_RESTART option requests this action; when it is set, the +\fIworkspace\fP and \fIwscount\fP options must reference the same vector as +before because data about the match so far is left in them after a partial +match. There is more discussion of this facility in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +.SS "Successful returns from \fBpcre2_dfa_match()\fP" +.rs +.sp +When \fBpcre2_dfa_match()\fP succeeds, it may have matched more than one +substring in the subject. Note, however, that all the matches from one run of +the function start at the same point in the subject. The shorter matches are +all initial substrings of the longer matches. For example, if the pattern +.sp + <.*> +.sp +is matched against the string +.sp + This is no more +.sp +the three matched strings are +.sp + + + +.sp +On success, the yield of the function is a number greater than zero, which is +the number of matched substrings. The offsets of the substrings are returned in +the ovector, and can be extracted by number in the same way as for +\fBpcre2_match()\fP, but the numbers bear no relation to any capture groups +that may exist in the pattern, because DFA matching does not support capturing. +.P +Calls to the convenience functions that extract substrings by name +return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a +DFA match. The convenience functions that extract substrings by number never +return PCRE2_ERROR_NOSUBSTRING. +.P +The matched strings are stored in the ovector in reverse order of length; that +is, the longest matching string is first. If there were too many matches to fit +into the ovector, the yield of the function is zero, and the vector is filled +with the longest matches. +.P +NOTE: PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++". For DFA matching, this +means that only one possible match is found. If you really do want multiple +matches in such cases, either use an ungreedy repeat such as "a\ed+?" or set +the PCRE2_NO_AUTO_POSSESS option when compiling. +. +. +.SS "Error returns from \fBpcre2_dfa_match()\fP" +.rs +.sp +The \fBpcre2_dfa_match()\fP function returns a negative number when it fails. +Many of the errors are the same as for \fBpcre2_match()\fP, as described +.\" HTML +.\" +above. +.\" +There are in addition the following errors that are specific to +\fBpcre2_dfa_match()\fP: +.sp + PCRE2_ERROR_DFA_UITEM +.sp +This return is given if \fBpcre2_dfa_match()\fP encounters an item in the +pattern that it does not support, for instance, the use of \eC in a UTF mode or +a backreference. +.sp + PCRE2_ERROR_DFA_UCOND +.sp +This return is given if \fBpcre2_dfa_match()\fP encounters a condition item +that uses a backreference for the condition, or a test for recursion in a +specific capture group. These are not supported. +.sp + PCRE2_ERROR_DFA_UINVALID_UTF +.sp +This return is given if \fBpcre2_dfa_match()\fP is called for a pattern that +was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for DFA +matching. +.sp + PCRE2_ERROR_DFA_WSSIZE +.sp +This return is given if \fBpcre2_dfa_match()\fP runs out of space in the +\fIworkspace\fP vector. +.sp + PCRE2_ERROR_DFA_RECURSE +.sp +When a recursion or subroutine call is processed, the matching function calls +itself recursively, using private memory for the ovector and \fIworkspace\fP. +This error is given if the internal ovector is not large enough. This should be +extremely rare, as a vector of size 1000 is used. +.sp + PCRE2_ERROR_DFA_BADRESTART +.sp +When \fBpcre2_dfa_match()\fP is called with the \fBPCRE2_DFA_RESTART\fP option, +some plausibility checks are made on the contents of the workspace, which +should contain data about the previous partial match. If any of these checks +fail, this error is given. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2build\fP(3), \fBpcre2callout\fP(3), \fBpcre2demo(3)\fP, +\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3), +\fBpcre2sample\fP(3), \fBpcre2unicode\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 24 April 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2build.3 b/doc/pcre2build.3 new file mode 100644 index 0000000..1df4ebd --- /dev/null +++ b/doc/pcre2build.3 @@ -0,0 +1,665 @@ +.TH PCRE2BUILD 3 "15 April 2024" "PCRE2 10.44" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +. +. +.SH "BUILDING PCRE2" +.rs +.sp +PCRE2 is distributed with a \fBconfigure\fP script that can be used to build +the library in Unix-like environments using the applications known as +Autotools. Also in the distribution are files to support building using +\fBCMake\fP instead of \fBconfigure\fP. The text file +.\" HTML +.\" +\fBREADME\fP +.\" +contains general information about building with Autotools (some of which is +repeated below), and also has some comments about building on various operating +systems. The files in the \fBvms\fP directory support building under OpenVMS. +There is a lot more information about building PCRE2 without using +Autotools (including information about using \fBCMake\fP and building "by +hand") in the text file called +.\" HTML +.\" +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +You should consult this file as well as the +.\" HTML +.\" +\fBREADME\fP +.\" +file if you are building in a non-Unix-like environment. +. +. +.SH "PCRE2 BUILD-TIME OPTIONS" +.rs +.sp +The rest of this document describes the optional features of PCRE2 that can be +selected when the library is compiled. It assumes use of the \fBconfigure\fP +script, where the optional features are selected or deselected by providing +options to \fBconfigure\fP before running the \fBmake\fP command. However, the +same options can be selected in both Unix-like and non-Unix-like environments +if you are using \fBCMake\fP instead of \fBconfigure\fP to build PCRE2. +.P +If you are not using Autotools or \fBCMake\fP, option selection can be done by +editing the \fBconfig.h\fP file, or by passing parameter settings to the +compiler, as described in +.\" HTML +.\" +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +.P +The complete list of options for \fBconfigure\fP (which includes the standard +ones such as the selection of the installation directory) can be obtained by +running +.sp + ./configure --help +.sp +The following sections include descriptions of "on/off" options whose names +begin with --enable or --disable. Because of the way that \fBconfigure\fP +works, --enable and --disable always come in pairs, so the complementary option +always exists as well, but as it specifies the default, it is not described. +Options that specify values have names that start with --with. At the end of a +\fBconfigure\fP run, a summary of the configuration is output. +. +. +.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES" +.rs +.sp +By default, a library called \fBlibpcre2-8\fP is built, containing functions +that take string arguments contained in arrays of bytes, interpreted either as +single-byte characters, or UTF-8 strings. You can also build two other +libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process +strings that are contained in arrays of 16-bit and 32-bit code units, +respectively. These can be interpreted either as single-unit characters or +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +the following to the \fBconfigure\fP command: +.sp + --enable-pcre2-16 + --enable-pcre2-32 +.sp +If you do not want the 8-bit library, add +.sp + --disable-pcre2-8 +.sp +as well. At least one of the three libraries must be built. Note that the POSIX +wrapper is for the 8-bit library only, and that \fBpcre2grep\fP is an 8-bit +program. Neither of these are built if you select only the 16-bit or 32-bit +libraries. +. +. +.SH "BUILDING SHARED AND STATIC LIBRARIES" +.rs +.sp +The Autotools PCRE2 building process uses \fBlibtool\fP to build both shared +and static libraries by default. You can suppress an unwanted library by adding +one of +.sp + --disable-shared + --disable-static +.sp +to the \fBconfigure\fP command. Setting --disable-shared ensures that PCRE2 +libraries are built as static libraries. The binaries that are then created as +part of the build process (for example, \fBpcre2test\fP and \fBpcre2grep\fP) +are linked statically with one or more PCRE2 libraries, but may also be +dynamically linked with other libraries such as \fBlibc\fP. If you want these +binaries to be fully statically linked, you can set LDFLAGS like this: +.sp +LDFLAGS=--static ./configure --disable-shared +.sp +Note the two hyphens in --static. Of course, this works only if static versions +of all the relevant libraries are available for linking. +. +. +.SH "UNICODE AND UTF SUPPORT" +.rs +.sp +By default, PCRE2 is built with support for Unicode and UTF character strings. +To build it without Unicode support, add +.sp + --disable-unicode +.sp +to the \fBconfigure\fP command. This setting applies to all three libraries. It +is not possible to build one library with Unicode support and another without +in the same configuration. +.P +Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16 +or UTF-32. To do that, applications that use the library can set the PCRE2_UTF +option when they call \fBpcre2_compile()\fP to compile a pattern. +Alternatively, patterns may be started with (*UTF) unless the application has +locked this out by setting PCRE2_NEVER_UTF. +.P +UTF support allows the libraries to process character code points up to +0x10ffff in the strings that they handle. Unicode support also gives access to +the Unicode properties of characters, using pattern escapes such as \eP, \ep, +and \eX. Only the general category properties such as \fILu\fP and \fINd\fP, +script names, and some bi-directional properties are supported. Details are +given in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +Pattern escapes such as \ed and \ew do not by default make use of Unicode +properties. The application can request that they do by setting the PCRE2_UCP +option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also +request this by starting with (*UCP). +. +. +.SH "DISABLING THE USE OF \eC" +.rs +.sp +The \eC escape sequence, which matches a single code unit, even in a UTF mode, +can cause unpredictable behaviour because it may leave the current matching +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +\fBpcre2_compile()\fP. There is also a build-time option +.sp + --enable-never-backslash-C +.sp +(note the upper case C) which locks out the use of \eC entirely. +. +. +.SH "JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time (JIT) compiler support is included in the build by specifying +.sp + --enable-jit +.sp +This support is available only for certain hardware architectures. If this +option is set for an unsupported architecture, a building error occurs. +If in doubt, use +.sp + --enable-jit=auto +.sp +which enables JIT only if the current hardware is supported. You can check +if JIT is enabled in the configuration summary that is output at the end of a +\fBconfigure\fP run. If you are enabling JIT under SELinux you may also want to +add +.sp + --enable-jit-sealloc +.sp +which enables the use of an execmem allocator in JIT that is compatible with +SELinux. This has no effect if JIT is not enabled. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for a discussion of JIT usage. When JIT support is enabled, +\fBpcre2grep\fP automatically makes use of it, unless you add +.sp + --disable-pcre2grep-jit +.sp +to the \fBconfigure\fP command. +. +. +.SH "NEWLINE RECOGNITION" +.rs +.sp +By default, PCRE2 interprets the linefeed (LF) character as indicating the end +of a line. This is the normal newline character on Unix-like systems. You can +compile PCRE2 to use carriage return (CR) instead, by adding +.sp + --enable-newline-is-cr +.sp +to the \fBconfigure\fP command. There is also an --enable-newline-is-lf option, +which explicitly specifies linefeed as the newline character. +.P +Alternatively, you can specify that line endings are to be indicated by the +two-character sequence CRLF (CR immediately followed by LF). If you want this, +add +.sp + --enable-newline-is-crlf +.sp +to the \fBconfigure\fP command. There is a fourth option, specified by +.sp + --enable-newline-is-anycrlf +.sp +which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as +indicating a line ending. A fifth option, specified by +.sp + --enable-newline-is-any +.sp +causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline +sequences are the three just mentioned, plus the single characters VT (vertical +tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line +separator, U+2028), and PS (paragraph separator, U+2029). The final option is +.sp + --enable-newline-is-nul +.sp +which causes NUL (binary zero) to be set as the default line-ending character. +.P +Whatever default line ending convention is selected when PCRE2 is built can be +overridden by applications that use the library. At build time it is +recommended to use the standard for your operating system. +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +By default, the sequence \eR in a pattern matches any Unicode newline sequence, +independently of what has been selected as the line ending sequence. If you +specify +.sp + --enable-bsr-anycrlf +.sp +the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is +selected when PCRE2 is built can be overridden by applications that use the +library. +. +. +.SH "HANDLING VERY LARGE PATTERNS" +.rs +.sp +Within a compiled pattern, offset values are used to point from one part to +another (for example, from an opening parenthesis to an alternation +metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values +are used for these offsets, leading to a maximum size for a compiled pattern of +around 64 thousand code units. This is sufficient to handle all but the most +gigantic patterns. Nevertheless, some people do want to process truly enormous +patterns, so it is possible to compile PCRE2 to use three-byte or four-byte +offsets by adding a setting such as +.sp + --with-link-size=3 +.sp +to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the +16-bit library, a value of 3 is rounded up to 4. In these libraries, using +longer offsets slows down the operation of PCRE2 because it has to load +additional data when handling them. For the 32-bit library the value is always +4 and cannot be overridden; the value of --with-link-size is ignored. +. +. +.SH "LIMITING PCRE2 RESOURCE USAGE" +.rs +.sp +The \fBpcre2_match()\fP function increments a counter each time it goes round +its main loop. Putting a limit on this counter controls the amount of computing +resource used by a single call to \fBpcre2_match()\fP. The limit can be changed +at run time, as described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The default is 10 million, but this can be changed by adding a +setting such as +.sp + --with-match-limit=500000 +.sp +to the \fBconfigure\fP command. This setting also applies to the +\fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the +counting is done differently). +.P +The \fBpcre2_match()\fP function uses heap memory to record backtracking +points. The more nested backtracking points there are (that is, the deeper the +search tree), the more memory is needed. There is an upper limit, specified in +kibibytes (units of 1024 bytes). This limit can be changed at run time, as +described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The default limit (in effect unlimited) is 20 million. You can +change this by a setting such as +.sp + --with-heap-limit=500 +.sp +which limits the amount of heap to 500 KiB. This limit applies only to +interpretive matching in \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, which +may also use the heap for internal workspace when processing complicated +patterns. This limit does not apply when JIT (which has its own memory +arrangements) is used. +.P +You can also explicitly limit the depth of nested backtracking in the +\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set +for --with-match-limit. You can set a lower default limit by adding, for +example, +.sp + --with-match-limit-depth=10000 +.sp +to the \fBconfigure\fP command. This value can be overridden at run time. This +depth limit indirectly limits the amount of heap memory that is used, but +because the size of each backtracking "frame" depends on the number of +capturing parentheses in a pattern, the amount of heap that is used before the +limit is reached varies from pattern to pattern. This limit was more useful in +versions before 10.30, where function recursion was used for backtracking. +.P +As well as applying to \fBpcre2_match()\fP, the depth limit also controls +the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are +used for lookaround assertions, atomic groups, and recursion within patterns. +The limit does not apply to JIT matching. +. +. +.SH "LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS" +.rs +.sp +Lookbehind assertions in which one or more branches can match a variable number +of characters are supported only if there is a maximum matching length for each +top-level branch. There is a limit to this maximum that defaults to 255 +characters. You can alter this default by a setting such as +.sp + --with-max-varlookbehind=100 +.sp +The limit can be changed at runtime by calling +\fBpcre2_set_max_varlookbehind()\fP. Lookbehind assertions in which every +branch matches a fixed number of characters (not necessarily all the same) are +not constrained by this limit. +. +. +.\" HTML +.SH "CREATING CHARACTER TABLES AT BUILD TIME" +.rs +.sp +PCRE2 uses fixed tables for processing characters whose code points are less +than 256. By default, PCRE2 is built with a set of tables that are distributed +in the file \fIsrc/pcre2_chartables.c.dist\fP. These tables are for ASCII codes +only. If you add +.sp + --enable-rebuild-chartables +.sp +to the \fBconfigure\fP command, the distributed tables are no longer used. +Instead, a program called \fBpcre2_dftables\fP is compiled and run. This +outputs the source for new set of tables, created in the default locale of your +C run-time system. This method of replacing the tables does not work if you are +cross compiling, because \fBpcre2_dftables\fP needs to be run on the local +host and therefore not compiled with the cross compiler. +.P +If you need to create alternative tables when cross compiling, you will have to +do so "by hand". There may also be other reasons for creating tables manually. +To cause \fBpcre2_dftables\fP to be built on the local host, run a normal +compiling command, and then run the program with the output file as its +argument, for example: +.sp + cc src/pcre2_dftables.c -o pcre2_dftables + ./pcre2_dftables src/pcre2_chartables.c +.sp +This builds the tables in the default locale of the local host. If you want to +specify a locale, you must use the -L option: +.sp + LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c +.sp +You can also specify -b (with or without -L). This causes the tables to be +written in binary instead of as source code. A set of binary tables can be +loaded into memory by an application and passed to \fBpcre2_compile()\fP in the +same way as tables created by calling \fBpcre2_maketables()\fP. The tables are +just a string of bytes, independent of hardware characteristics such as +endianness. This means they can be bundled with an application that runs in +different environments, to ensure consistent behaviour. +. +. +.SH "USING EBCDIC CODE" +.rs +.sp +PCRE2 assumes by default that it will run in an environment where the character +code is ASCII or Unicode, which is a superset of ASCII. This is the case for +most computer operating systems. PCRE2 can, however, be compiled to run in an +8-bit EBCDIC environment by adding +.sp + --enable-ebcdic --disable-unicode +.sp +to the \fBconfigure\fP command. This setting implies +--enable-rebuild-chartables. You should only use it if you know that you are in +an EBCDIC environment (for example, an IBM mainframe operating system). +.P +It is not possible to support both EBCDIC and UTF-8 codes in the same version +of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually +exclusive. +.P +The EBCDIC character that corresponds to an ASCII LF is assumed to have the +value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In +such an environment you should use +.sp + --enable-ebcdic-nl25 +.sp +as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the +same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is \fInot\fP +chosen as LF is made to correspond to the Unicode NEL character (which, in +Unicode, is 0x85). +.P +The options that select newline behaviour, such as --enable-newline-is-cr, +and equivalent run-time options, refer to these character values in an EBCDIC +environment. +. +. +.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS" +.rs +.sp +By default \fBpcre2grep\fP supports the use of callouts with string arguments +within the patterns it is matching. There are two kinds: one that generates +output using local code, and another that calls an external program or script. +If --disable-pcre2grep-callout-fork is added to the \fBconfigure\fP command, +only the first kind of callout is supported; if --disable-pcre2grep-callout is +used, all callouts are completely ignored. For more details of \fBpcre2grep\fP +callouts, see the +.\" HREF +\fBpcre2grep\fP +.\" +documentation. +. +. +.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT" +.rs +.sp +By default, \fBpcre2grep\fP reads all files as plain text. You can build it so +that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads +them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of +.sp + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 +.sp +to the \fBconfigure\fP command. These options naturally require that the +relevant libraries are installed on your system. Configuration will fail if +they are not. +. +. +.SH "PCRE2GREP BUFFER SIZE" +.rs +.sp +\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is +scanning, in order to be able to output "before" and "after" lines when it +finds a match. The default starting size of the buffer is 20KiB. The buffer +itself is three times this size, but because of the way it is used for holding +"before" lines, the longest line that is guaranteed to be processable is the +notional buffer size. If a longer line is encountered, \fBpcre2grep\fP +automatically expands the buffer, up to a specified maximum size, whose default +is 1MiB or the starting size, whichever is the larger. You can change the +default parameter values by adding, for example, +.sp + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 +.sp +to the \fBconfigure\fP command. The caller of \fBpcre2grep\fP can override +these values by using --buffer-size and --max-buffer-size on the command line. +. +. +.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT" +.rs +.sp +If you add one of +.sp + --enable-pcre2test-libreadline + --enable-pcre2test-libedit +.sp +to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the +\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is +from a terminal, it reads it using the \fBreadline()\fP function. This provides +line-editing and history facilities. Note that \fBlibreadline\fP is +GPL-licensed, so if you distribute a binary of \fBpcre2test\fP linked in this +way, there may be licensing issues. These can be avoided by linking instead +with \fBlibedit\fP, which has a BSD licence. +.P +Setting --enable-pcre2test-libreadline causes the \fB-lreadline\fP option to be +added to the \fBpcre2test\fP build. In many operating environments with a +system-installed readline library this is sufficient. However, in some +environments (e.g. if an unmodified distribution version of readline is in +use), some extra configuration may be necessary. The INSTALL file for +\fBlibreadline\fP says this: +.sp + "Readline uses the termcap functions, but does not link with + the termcap or curses library itself, allowing applications + which link with readline the to choose an appropriate library." +.sp +If your environment has not been set up so that an appropriate library is +automatically included, you may need to add something like +.sp + LIBS="-ncurses" +.sp +immediately before the \fBconfigure\fP command. +. +. +.SH "INCLUDING DEBUGGING CODE" +.rs +.sp +If you add +.sp + --enable-debug +.sp +to the \fBconfigure\fP command, additional debugging code is included in the +build. This feature is intended for use by the PCRE2 maintainers. +. +. +.SH "DEBUGGING WITH VALGRIND SUPPORT" +.rs +.sp +If you add +.sp + --enable-valgrind +.sp +to the \fBconfigure\fP command, PCRE2 will use valgrind annotations to mark +certain memory regions as unaddressable. This allows it to detect invalid +memory accesses, and is mostly useful for debugging PCRE2 itself. +. +. +.SH "CODE COVERAGE REPORTING" +.rs +.sp +If your C compiler is gcc, you can build a version of PCRE2 that can generate a +code coverage report for its test suite. To enable this, you must install +\fBlcov\fP version 1.6 or above. Then specify +.sp + --enable-coverage +.sp +to the \fBconfigure\fP command and build PCRE2 in the usual way. +.P +Note that using \fBccache\fP (a caching C compiler) is incompatible with code +coverage reporting. If you have configured \fBccache\fP to run automatically +on your system, you must set the environment variable +.sp + CCACHE_DISABLE=1 +.sp +before running \fBmake\fP to build PCRE2, so that \fBccache\fP is not used. +.P +When --enable-coverage is used, the following addition targets are added to the +\fIMakefile\fP: +.sp + make coverage +.sp +This creates a fresh coverage report for the PCRE2 test suite. It is equivalent +to running "make coverage-reset", "make coverage-baseline", "make check", and +then "make coverage-report". +.sp + make coverage-reset +.sp +This zeroes the coverage counters, but does nothing else. +.sp + make coverage-baseline +.sp +This captures baseline coverage information. +.sp + make coverage-report +.sp +This creates the coverage report. +.sp + make coverage-clean-report +.sp +This removes the generated coverage report without cleaning the coverage data +itself. +.sp + make coverage-clean-data +.sp +This removes the captured coverage data without removing the coverage files +created at compile time (*.gcno). +.sp + make coverage-clean +.sp +This cleans all coverage data including the generated coverage report. For more +information about code coverage, see the \fBgcov\fP and \fBlcov\fP +documentation. +. +. +.SH "DISABLING THE Z AND T FORMATTING MODIFIERS" +.rs +.sp +The C99 standard defines formatting modifiers z and t for size_t and +ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in +environments other than old versions of Microsoft Visual Studio when +__STDC_VERSION__ is defined and has a value greater than or equal to 199901L +(indicating support for C99). +However, there is at least one environment that claims to be C99 but does not +support these modifiers. If +.sp + --disable-percent-zt +.sp +is specified, no use is made of the z or t modifiers. Instead of %td or %zu, +a suitable format is used depending in the size of long for the platform. +. +. +.SH "SUPPORT FOR FUZZERS" +.rs +.sp +There is a special option for use by people who want to run fuzzing tests on +PCRE2: +.sp + --enable-fuzz-support +.sp +At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. +.P +Setting --enable-fuzz-support also causes a binary called \fBpcre2fuzzcheck\fP +to be created. This is normally run under valgrind or used when PCRE2 is +compiled with address sanitizing enabled. It calls the fuzzing function and +outputs information about what it is doing. The input strings are specified by +arguments: if an argument starts with "=" the rest of it is a literal input +string. Otherwise, it is assumed to be a file name, and the contents of the +file are the test string. +. +. +.SH "OBSOLETE OPTION" +.rs +.sp +In versions of PCRE2 prior to 10.30, there were two ways of handling +backtracking in the \fBpcre2_match()\fP function. The default was to use the +system stack, but if +.sp + --disable-stack-for-recursion +.sp +was set, memory on the heap was used. From release 10.30 onwards this has +changed (the stack is no longer used) and this option now does nothing except +give a warning. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2-config\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 15 April 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3 new file mode 100644 index 0000000..86a1c54 --- /dev/null +++ b/doc/pcre2callout.3 @@ -0,0 +1,457 @@ +.TH PCRE2CALLOUT 3 "19 January 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.nf +.B int (*pcre2_callout)(pcre2_callout_block *, void *); +.sp +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +PCRE2 provides a feature called "callout", which is a means of temporarily +passing control to the caller of PCRE2 in the middle of pattern matching. The +caller of PCRE2 provides an external function by putting its entry point in +a match context (see \fBpcre2_set_callout()\fP in the +.\" HREF +\fBpcre2api\fP +.\" +documentation). +.P +When using the \fBpcre2_substitute()\fP function, an additional callout feature +is available. This does a callout after each change to the subject string and +is described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation; the rest of this document is concerned with callouts during +pattern matching. +.P +Within a regular expression, (?C) indicates a point at which the external +function is to be called. Different callout points can be identified by putting +a number less than 256 after the letter C. The default value is zero. +Alternatively, the argument may be a delimited string. The starting delimiter +must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the +start, except for {, where the ending delimiter is }. If the ending delimiter +is needed within the string, it must be doubled. For example, this pattern has +two callout points: +.sp + (?C1)abc(?C"some ""arbitrary"" text")def +.sp +If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 +automatically inserts callouts, all with number 255, before each item in the +pattern except for immediately before or after an explicit callout. For +example, if PCRE2_AUTO_CALLOUT is used with the pattern +.sp + A(?C3)B +.sp +it is processed as if it were +.sp + (?C255)A(?C3)B(?C255) +.sp +Here is a more complicated example: +.sp + A(\ed{2}|--) +.sp +With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were +.sp + (?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) +.sp +Notice that there is a callout before and after each parenthesis and +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the +condition. Such a callout may also be inserted explicitly, for example: +.sp + (?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de) +.sp +This applies only to assertion conditions (because they are themselves +independent groups). +.P +Callouts can be useful for tracking the progress of pattern matching. The +.\" HREF +\fBpcre2test\fP +.\" +program has a pattern qualifier (/auto_callout) that sets automatic callouts. +When any callouts are present, the output from \fBpcre2test\fP indicates how +the pattern is being matched. This is useful information when you are trying to +optimize the performance of a particular pattern. +. +. +.SH "MISSING CALLOUTS" +.rs +.sp +You should be aware that, because of optimizations in the way PCRE2 compiles +and matches patterns, callouts sometimes do not happen exactly as you might +expect. +. +. +.SS "Auto-possessification" +.rs +.sp +At compile time, PCRE2 "auto-possessifies" repeated items when it knows that +what follows cannot be part of the repeat. For example, a+[bc] is compiled as +if it were a++[bc]. The \fBpcre2test\fP output when this pattern is compiled +with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string +"aaaa" is: +.sp + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + No match +.sp +This indicates that when matching [bc] fails, there is no backtracking into a+ +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to \fBpcre2_compile()\fP, or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this: +.sp + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^^ [bc] + No match +.sp +This time, when matching [bc] fails, the matcher backtracks into a+ and tries +again, repeatedly, until a+ itself fails. +. +. +.SS "Automatic .* anchoring" +.rs +.sp +By default, an optimization is applied when .* is the first significant item in +a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the +pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can +start only after an internal newline or at the beginning of the subject, and +\fBpcre2_compile()\fP remembers this. If a pattern has more than one top-level +branch, automatic anchoring occurs if all branches are anchorable. +.P +This optimization is disabled, however, if .* is in an atomic group or if there +is a backreference to the capture group in which it appears. It is also +disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of +callouts does not affect it. +.P +For example, if the pattern .*\ed is compiled with PCRE2_AUTO_CALLOUT and +applied to the string "aa", the \fBpcre2test\fP output is: +.sp + --->aa + +0 ^ .* + +2 ^ ^ \ed + +2 ^^ \ed + +2 ^ \ed + No match +.sp +This shows that all match attempts start at the beginning of the subject. In +other words, the pattern is anchored. You can disable this optimization by +passing PCRE2_NO_DOTSTAR_ANCHOR to \fBpcre2_compile()\fP, or starting the +pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to: +.sp + --->aa + +0 ^ .* + +2 ^ ^ \ed + +2 ^^ \ed + +2 ^ \ed + +0 ^ .* + +2 ^^ \ed + +2 ^ \ed + No match +.sp +This shows more match attempts, starting at the second subject character. +Another optimization, described in the next section, means that there is no +subsequent attempt to match with an empty subject. +. +. +.SS "Other optimizations" +.rs +.sp +Other optimizations that provide fast "no match" results also affect callouts. +For example, if the pattern is +.sp + ab(?C4)cd +.sp +PCRE2 knows that any matching string must contain the letter "d". If the +subject string is "abyz", the lack of "d" means that matching doesn't ever +start, and the callout is never reached. However, with "abyd", though the +result is still no match, the callout is obeyed. +.P +For most patterns PCRE2 also knows the minimum length of a matching string, and +will immediately give a "no match" return without actually running a match if +the subject is not long enough, or, for unanchored patterns, if it has been +scanned far enough. +.P +You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE +option to \fBpcre2_compile()\fP, or by starting the pattern with +(*NO_START_OPT). This slows down the matching process, but does ensure that +callouts such as the example above are obeyed. +. +. +.\" HTML +.SH "THE CALLOUT INTERFACE" +.rs +.sp +During matching, when PCRE2 reaches a callout point, if an external function is +provided in the match context, it is called. This applies to both normal, +DFA, and JIT matching. The first argument to the callout function is a pointer +to a \fBpcre2_callout\fP block. The second argument is the void * callout data +that was supplied when the callout was set up by calling +\fBpcre2_set_callout()\fP (see the +.\" HREF +\fBpcre2api\fP +.\" +documentation). The callout block structure contains the following fields, not +necessarily in this order: +.sp + uint32_t \fIversion\fP; + uint32_t \fIcallout_number\fP; + uint32_t \fIcapture_top\fP; + uint32_t \fIcapture_last\fP; + uint32_t \fIcallout_flags\fP; + PCRE2_SIZE *\fIoffset_vector\fP; + PCRE2_SPTR \fImark\fP; + PCRE2_SPTR \fIsubject\fP; + PCRE2_SIZE \fIsubject_length\fP; + PCRE2_SIZE \fIstart_match\fP; + PCRE2_SIZE \fIcurrent_position\fP; + PCRE2_SIZE \fIpattern_position\fP; + PCRE2_SIZE \fInext_item_length\fP; + PCRE2_SIZE \fIcallout_string_offset\fP; + PCRE2_SIZE \fIcallout_string_length\fP; + PCRE2_SPTR \fIcallout_string\fP; +.sp +The \fIversion\fP field contains the version number of the block format. The +current version is 2; the three callout string fields were added for version 1, +and the \fIcallout_flags\fP field for version 2. If you are writing an +application that might use an earlier release of PCRE2, you should check the +version number before accessing any of these fields. The version number will +increase in future if more fields are added, but the intention is never to +remove any of the existing fields. +. +. +.SS "Fields for numerical callouts" +.rs +.sp +For a numerical callout, \fIcallout_string\fP is NULL, and \fIcallout_number\fP +contains the number of the callout, in the range 0-255. This is the number +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts. +. +. +.SS "Fields for string callouts" +.rs +.sp +For callouts with string arguments, \fIcallout_number\fP is always zero, and +\fIcallout_string\fP points to the string that is contained within the compiled +pattern. Its length is given by \fIcallout_string_length\fP. Duplicated ending +delimiters that were present in the original pattern string have been turned +into single characters, but there is no other processing of the callout string +argument. An additional code unit containing binary zero is present after the +string, but is not included in the length. The delimiter that was used to start +the string is also stored within the pattern, immediately before the string +itself. You can access this delimiter as \fIcallout_string\fP[-1] if you need +it. +.P +The \fIcallout_string_offset\fP field is the code unit offset to the start of +the callout argument string within the original pattern string. This is +provided for the benefit of applications such as script languages that might +need to report errors in the callout string within the pattern. +. +. +.SS "Fields for all callouts" +.rs +.sp +The remaining fields in the callout block are the same for both kinds of +callout. +.P +The \fIoffset_vector\fP field is a pointer to a vector of capturing offsets +(the "ovector"). You may read the elements in this vector, but you must not +change any of them. +.P +For calls to \fBpcre2_match()\fP, the \fIoffset_vector\fP field is not (since +release 10.30) a pointer to the actual ovector that was passed to the matching +function in the match data block. Instead it points to an internal ovector of a +size large enough to hold all possible captured substrings in the pattern. Note +that whenever a recursion or subroutine call within a pattern completes, the +capturing state is reset to what it was before. +.P +The \fIcapture_last\fP field contains the number of the most recently captured +substring, and the \fIcapture_top\fP field contains one more than the number of +the highest numbered captured substring so far. If no substrings have yet been +captured, the value of \fIcapture_last\fP is 0 and the value of +\fIcapture_top\fP is 1. The values of these fields do not always differ by one; +for example, when the callout in the pattern ((a)(b))(?C2) is taken, +\fIcapture_last\fP is 1 but \fIcapture_top\fP is 4. +.P +The contents of ovector[2] to ovector[*2-1] can be inspected in +order to extract substrings that have been matched so far, in the same way as +extracting substrings after a match has completed. The values in ovector[0] and +ovector[1] are always PCRE2_UNSET because the match is by definition not +complete. Substrings that have not been captured but whose numbers are less +than \fIcapture_top\fP also have both of their ovector slots set to +PCRE2_UNSET. +.P +For DFA matching, the \fIoffset_vector\fP field points to the ovector that was +passed to the matching function in the match data block for callouts at the top +level, but to an internal ovector during the processing of pattern recursions, +lookarounds, and atomic groups. However, these ovectors hold no useful +information because \fBpcre2_dfa_match()\fP does not support substring +capturing. The value of \fIcapture_top\fP is always 1 and the value of +\fIcapture_last\fP is always 0 for DFA matching. +.P +The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values +that were passed to the matching function. +.P +The \fIstart_match\fP field normally contains the offset within the subject at +which the current match attempt started. However, if the escape sequence \eK +has been encountered, this value is changed to reflect the modified starting +point. If the pattern is not anchored, the callout function may be called +several times from the same point in the pattern for different starting points +in the subject. +.P +The \fIcurrent_position\fP field contains the offset within the subject of the +current match pointer. +.P +The \fIpattern_position\fP field contains the offset in the pattern string to +the next item to be matched. +.P +The \fInext_item_length\fP field contains the length of the next item to be +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an alternation +bar or a closing parenthesis, the length is one, unless a closing parenthesis +is followed by a quantifier, in which case its length is included. (This +changed in release 10.23. In earlier releases, before an opening parenthesis +the length was that of the entire group, and before an alternation bar or a +closing parenthesis the length was zero.) +.P +The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to +help in distinguishing between different automatic callouts, which all have the +same callout number. However, they are set for all callouts, and are used by +\fBpcre2test\fP to show the next item to be matched when displaying callout +information. +.P +In callouts from \fBpcre2_match()\fP the \fImark\fP field contains a pointer to +the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or +(*THEN) item in the match, or NULL if no such items have been passed. Instances +of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In +callouts from the DFA matching function this field always contains NULL. +.P +The \fIcallout_flags\fP field is always zero in callouts from +\fBpcre2_dfa_match()\fP or when JIT is being used. When \fBpcre2_match()\fP +without JIT is used, the following bits may be set: +.sp + PCRE2_CALLOUT_STARTMATCH +.sp +This is set for the first callout after the start of matching for each new +starting position in the subject. +.sp + PCRE2_CALLOUT_BACKTRACK +.sp +This is set if there has been a matching backtrack since the previous callout, +or since the start of matching if this is the first callout from a +\fBpcre2_match()\fP run. +.P +Both bits are set when a backtrack has caused a "bumpalong" to a new starting +position in the subject. Output from \fBpcre2test\fP does not indicate the +presence of these bits unless the \fBcallout_extra\fP modifier is set. +.P +The information in the \fBcallout_flags\fP field is provided so that +applications can track and tell their users how matching with backtracking is +done. This can be useful when trying to optimize patterns, or just to +understand how PCRE2 works. There is no support in \fBpcre2_dfa_match()\fP +because there is no backtracking in DFA matching, and there is no support in +JIT because JIT is all about maximimizing matching performance. In both these +cases the \fBcallout_flags\fP field is always zero. +. +. +.SH "RETURN VALUES FROM CALLOUTS" +.rs +.sp +The external callout function returns an integer to PCRE2. If the value is +zero, matching proceeds as normal. If the value is greater than zero, matching +fails at the current point, but the testing of other matching possibilities +goes ahead, just as if a lookahead assertion had failed. If the value is less +than zero, the match is abandoned, and the matching function returns the +negative value. +.P +Negative values should normally be chosen from the set of PCRE2_ERROR_xxx +values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match" +failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout +functions; it will never be used by PCRE2 itself. +. +. +.SH "CALLOUT ENUMERATION" +.rs +.sp +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +.sp +A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The +data block contains the following fields: +.sp + \fIversion\fP Block version number + \fIpattern_position\fP Offset to next item in pattern + \fInext_item_length\fP Length of next item in pattern + \fIcallout_number\fP Number for numbered callouts + \fIcallout_string_offset\fP Offset to string within pattern + \fIcallout_string_length\fP Length of callout string + \fIcallout_string\fP Points to callout string or is NULL +.sp +The version number is currently 0. It will increase if new fields are ever +added to the block. The remaining fields are the same as their namesakes in the +\fBpcre2_callout\fP block that is used for callouts during matching, as +described +.\" HTML +.\" +above. +.\" +.P +Note that the value of \fIpattern_position\fP is unique for each callout. +However, if a callout occurs inside a group that is quantified with a non-zero +minimum or a fixed maximum, the group is replicated inside the compiled +pattern. For example, a pattern such as /(a){2}/ is compiled as if it were +/(a)(a)/. This means that the callout will be enumerated more than once, but +with the same value for \fIpattern_position\fP in each case. +.P +The callback function should normally return zero. If it returns a non-zero +value, scanning the pattern stops, and that value is returned from +\fBpcre2_callout_enumerate()\fP. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 19 January 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2compat.3 b/doc/pcre2compat.3 new file mode 100644 index 0000000..8313e03 --- /dev/null +++ b/doc/pcre2compat.3 @@ -0,0 +1,236 @@ +.TH PCRE2COMPAT 3 "30 November 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "DIFFERENCES BETWEEN PCRE2 AND PERL" +.rs +.sp +This document describes some of the known differences in the ways that PCRE2 +and Perl handle regular expressions. The differences described here are with +respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually +changing, the information may at times be out of date. +.P +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +.P +2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +have are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +.P +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +they do not mean what you might think. For example, (?!a){3} does not assert +that the next three characters are not "a". It just asserts that the next +character is not "a" three times (in principle; PCRE2 optimizes this to run the +assertion just once). Perl allows some repeat quantifiers on other assertions, +for example, \eb* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. +.P +4. If a braced quantifier such as {1,2} appears where there is nothing to +repeat (for example, at the start of a branch), PCRE2 raises an error whereas +Perl treats the quantifier characters as literal. +.P +5. Capture groups that occur inside negative lookaround assertions are counted, +but their entries in the offsets vector are set only when a negative assertion +is a condition that has a matching branch (that is, the condition is false). +Perl may set such capture groups in other circumstances. +.P +6. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, +\eU, and \eN when followed by a character name. \eN on its own, matching a +non-newline character, and \eN{U+dd..}, matching a Unicode code point, are +supported. The escapes that modify the case of following letters are +implemented by Perl's general string-handling and are not part of its pattern +matching engine. If any of these are encountered by PCRE2, an error is +generated by default. However, if either of the PCRE2_ALT_BSUX or +PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript +interprets them. +.P +7. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is +built with Unicode support (the default). The properties that can be tested +with \ep and \eP are limited to the general category properties such as Lu and +Nd, the derived properties Any and LC (synonym L&), script names such as Greek +or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and +Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See +the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details. The long synonyms for property names that Perl +supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted +to prefix any of these properties with "Is". +.P +8. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters +in between are treated as literals. However, this is slightly different from +Perl in that $ and @ are also handled as literals inside the quotes. In Perl, +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \eQ +and \eE which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \eQ and \eE just like any other character. Note the +following examples: +.sp + Pattern PCRE2 matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz + \eQA\eB\eE A\eB A\eB + \eQ\e\eE \e \e\eE +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes +by both PCRE2 and Perl. +.P +9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +constructions. However, PCRE2 does have a "callout" feature, which allows an +external function to be called during pattern matching. See the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details. +.P +10. Subroutine calls (whether recursive or not) were treated as atomic groups +up to PCRE2 release 10.23, but from release 10.30 this changed, and +backtracking into subroutine calls is now supported, as in Perl. +.P +11. In PCRE2, if any of the backtracking control verbs are used in a group that +is called as a subroutine (whether or not recursively), their effect is +confined to that group; it does not extend to the surrounding pattern. This is +not always the case in Perl. In particular, if (*THEN) is present in a group +that is called as a subroutine, its action is limited to that group, even if +the group does not contain any | characters. Note that such groups are +processed as anchored at the point where they are tested. +.P +12. If a pattern contains more than one backtracking control verb, the first +one that is backtracked onto acts. For example, in the pattern +A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C +triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the +same as PCRE2, but there are cases where it differs. +.P +13. There are some differences that are concerned with the settings of captured +strings when part of a pattern is repeated. For example, matching "aba" against +the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to +"b". +.P +14. PCRE2's handling of duplicate capture group numbers and names is not as +general as Perl's. This is a consequence of the fact the PCRE2 works internally +just with numbers, using an external table to translate between numbers and +names. In particular, a pattern such as (?|(?A)|(?B)), where the two +capture groups have the same number but different names, is not supported, and +causes an error at compile time. If it were allowed, it would not be possible +to distinguish which group matched, because both names map to capture group +number 1. To avoid this confusing situation, an error is given at compile time. +.P +15. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a group. If the /x modifier is +set, Perl allowed white space between ( and ? though the latest Perls give an +error (for a while it was just deprecated). There may still be some cases where +Perl behaves differently. +.P +16. Perl, when in warning mode, gives warnings for character classes such as +[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no +warning features, so it gives an error in these cases because they are almost +certainly user mistakes. +.P +17. In PCRE2, the upper/lower case character properties Lu and Ll are not +affected when case-independent matching is specified. For example, \ep{Lu} +always matches an upper case letter. I think Perl has changed in this respect; +in the release at the time of writing (5.38), \ep{Lu} and \ep{Ll} match all +letters, regardless of case, when case independence is specified. +.P +18. From release 5.32.0, Perl locks out the use of \eK in lookaround +assertions. From release 10.38 PCRE2 does the same by default. However, there +is an option for re-enabling the previous behaviour. When this option is set, +\eK is acted on when it occurs in positive assertions, but is ignored in +negative assertions. +.P +19. PCRE2 provides some extensions to the Perl regular expression facilities. +Perl 5.10 included new features that were not in earlier versions of Perl, some +of which (such as named parentheses) were in PCRE2 for some time before. This +list is with respect to Perl 5.38: +.sp +(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +meta-character matches only at the very end of the string. +.sp +(b) A backslash followed by a letter with no special meaning is faulted. (Perl +can be made to issue a warning.) +.sp +(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. +.sp +(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +only at the first matching position in the subject string. +.sp +(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART +options have no Perl equivalents. +.sp +(f) The \eR escape sequence can be restricted to match only CR, LF, or CRLF +by the PCRE2_BSR_ANYCRLF option. +.sp +(g) The callout facility is PCRE2-specific. Perl supports codeblocks and +variable interpolation, but not general hooks on every match. +.sp +(h) The partial matching facility is PCRE2-specific. +.sp +(i) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a +different way and is not Perl-compatible. +.sp +(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at +the start of a pattern. These set overall options that cannot be changed within +the pattern. +.sp +(k) PCRE2 supports non-atomic positive lookaround assertions. This is an +extension to the lookaround facilities. The default, Perl-compatible +lookarounds are atomic. +.sp +(l) There are three syntactical items in patterns that can refer to a capturing +group by number: back references such as \eg{2}, subroutine calls such as (?3), +and condition references such as (?(4)...). PCRE2 supports relative group +numbers such as +2 and -4 in all three cases. Perl supports both plus and minus +for subroutine calls, but only minus for back references, and no relative +numbering at all for conditions. +.P +20. Perl has different limits than PCRE2. See the +.\" HREF +\fBpcre2limit\fP +.\" +documentation for details. Perl went with 5.10 from recursion to iteration +keeping the intermediate matches on the heap, which is ~10% slower but does not +fall into any stack-overflow limit. PCRE2 made a similar change at release +10.30, and also has many build-time and run-time customizable limits. +.P +21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way +to set characters by context just like Perl's "/d". A regular expression using +PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer +to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top. +.P +22. Some recursive patterns that Perl diagnoses as infinite recursions can be +handled by PCRE2, either by the interpreter or the JIT. An example is +/(?:|(?0)abcd)(?(R)|\ez)/, which matches a sequence of any number of repeated +"abcd" substrings at the end of the subject. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 30 November 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/doc/pcre2convert.3 b/doc/pcre2convert.3 new file mode 100644 index 0000000..62c7ebb --- /dev/null +++ b/doc/pcre2convert.3 @@ -0,0 +1,164 @@ +.TH PCRE2CONVERT 3 "28 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS" +.rs +.sp +This document describes a set of functions that can be used to convert +"foreign" patterns into PCRE2 regular expressions. This facility is currently +experimental, and may be changed in future releases. Two kinds of pattern, +globs and POSIX patterns, are supported. +. +. +.SH "THE CONVERT CONTEXT" +.rs +.sp +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.sp +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.sp +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.fi +.sp +A convert context is used to hold parameters that affect the way that pattern +conversion works. Like all PCRE2 contexts, you need to use a context only if +you want to override the defaults. There are the usual create, copy, and free +functions. If custom memory management functions are set in a general context +that is passed to \fBpcre2_convert_context_create()\fP, they are used for all +memory management within the conversion functions. +.P +There are only two parameters in the convert context at present. Both apply +only to glob conversions. The escape character defaults to grave accent under +Windows, otherwise backslash. It can be set to zero, meaning no escape +character, or to any punctuation character with a code point less than 256. +The separator character defaults to backslash under Windows, otherwise forward +slash. It can be set to forward slash, backslash, or dot. +.P +The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if +their second argument is invalid. +. +. +.SH "THE CONVERSION FUNCTION" +.rs +.sp +.nf +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +.sp +The first two arguments of \fBpcre2_pattern_convert()\fP define the foreign +pattern that is to be converted. The length may be given as +PCRE2_ZERO_TERMINATED. The \fBoptions\fP argument defines how the pattern is to +be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set. +PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid. +One or more of the glob options, or one of the following POSIX options must be +set to define the type of conversion that is required: +.sp + PCRE2_CONVERT_GLOB + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + PCRE2_CONVERT_GLOB_NO_STARSTAR + PCRE2_CONVERT_POSIX_BASIC + PCRE2_CONVERT_POSIX_EXTENDED +.sp +Details of the conversions are given below. The \fBbuffer\fP and \fBblength\fP +arguments define how the output is handled: +.P +If \fBbuffer\fP is NULL, the function just returns the length of the converted +pattern via \fBblength\fP. This is one less than the length of buffer needed, +because a terminating zero is always added to the output. +.P +If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using +the allocator in the context or \fBmalloc()\fP if no context is supplied. A +pointer to this buffer is placed in the variable to which \fBbuffer\fP points. +When no longer needed the output buffer must be freed by calling +\fBpcre2_converted_pattern_free()\fP. If this function is called with a NULL +argument, it returns immediately without doing anything. +.P +If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the +actual length of the buffer provided (in code units). +.P +In all cases, after successful conversion, the variable pointed to by +\fBblength\fP is updated to the length actually used (in code units), excluding +the terminating zero that is always added. +.P +If an error occurs, the length (via \fBblength\fP) is set to the offset +within the input pattern where the error was detected. Only gross syntax errors +are caught; there are plenty of errors that will get passed on for +\fBpcre2_compile()\fP to discover. +.P +The return from \fBpcre2_pattern_convert()\fP is zero on success or a non-zero +PCRE2 error code. Note that PCRE2 error codes may be positive or negative: +\fBpcre2_compile()\fP uses mostly positive codes and \fBpcre2_match()\fP +negative ones; \fBpcre2_convert()\fP uses existing codes of both kinds. A +textual error message can be obtained by calling +\fBpcre2_get_error_message()\fP. +. +. +.SH "CONVERTING GLOBS" +.rs +.sp +Globs are used to match file names, and consequently have the concept of a +"path separator", which defaults to backslash under Windows and forward slash +otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not +permitted to match separator characters, but the double-star (**) feature +(which does match separators) is supported. +.P +PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to +match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with +the double-star feature disabled. These options may be given together. +. +. +.SH "CONVERTING POSIX PATTERNS" +.rs +.sp +POSIX defines two kinds of regular expression pattern: basic and extended. +These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or +PCRE2_CONVERT_POSIX_EXTENDED, respectively. +.P +In POSIX patterns, backslash is not special in a character class. Unmatched +closing parentheses are treated as literals. +.P +In basic patterns, ? + | {} and () must be escaped to be recognized +as metacharacters outside a character class. If the first character in the +pattern is * it is treated as a literal. ^ is a metacharacter only at the start +of a branch. +.P +In extended patterns, a backslash not in a character class always +makes the next character literal, whatever it is. There are no backreferences. +.P +Note: POSIX mandates that the longest possible match at the first matching +position must be found. This is not what \fBpcre2_match()\fP does; it yields +the first match that is found. An application can use \fBpcre2_dfa_match()\fP +to find the longest match, but that does not support backreferences (but then +neither do POSIX extended patterns). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 28 June 2018 +Copyright (c) 1997-2018 University of Cambridge. +.fi diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3 new file mode 100644 index 0000000..0453a94 --- /dev/null +++ b/doc/pcre2demo.3 @@ -0,0 +1,526 @@ +.TH PCRE2DEMO 3 " 7 June 2024" "PCRE2 10.44" +.\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT! +.SH NAME +PCRE2DEMO - A demonstration C program for PCRE2 +.SH "SOURCE CODE" +.rs +.sp +.\" Start example. +.de EX +. do ds mF \\n[.fam] +. nr mE \\n(.f +. nf +. nh +. do fam C +. ft CW +.. +. +. +.\" End example. +.de EE +. do fam \\*(mF +. ft \\n(mE +. fi +. hy \\n(HY +.. +. +.RS -7 +.EX +/************************************************* +* PCRE2 DEMONSTRATION PROGRAM * +*************************************************/ + +/* This is a demonstration program to illustrate a straightforward way of +using the PCRE2 regular expression library from a C program. See the +pcre2sample documentation for a short discussion ("man pcre2sample" if you have +the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is +incompatible with the original PCRE API. + +There are actually three libraries, each supporting a different code unit +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. + +In Unix-like environments, if PCRE2 is installed in your standard system +libraries, you should be able to compile this program using this command: + +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo + +If PCRE2 is not installed in a standard place, it is likely to be installed +with support for the pkg-config mechanism. If you have pkg-config, you can +compile this program using this command: + +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo + +If you do not have pkg-config, you may have to use something like this: + +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e + -R/usr/local/lib -lpcre2-8 -o pcre2demo + +Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and +library files for PCRE2 are installed on your system. Only some operating +systems (Solaris is one) use the -R option. + +Building under Windows: + +If you want to statically link this program against a non-dll .a file, you must +define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment +the following line. */ + +/* #define PCRE2_STATIC */ + +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include +#include + + +/************************************************************************** +* Here is the program. The API includes the concept of "contexts" for * +* setting up unusual interface requirements for compiling and matching, * +* such as custom memory managers and non-standard newline definitions. * +* This program does not do any of this, so it makes no use of contexts, * +* always passing NULL where a context could be given. * +**************************************************************************/ + +int main(int argc, char **argv) +{ +pcre2_code *re; +PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ +PCRE2_SPTR name_table; + +int crlf_is_newline; +int errornumber; +int find_all; +int i; +int rc; +int utf8; + +uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; +uint32_t newline; + +PCRE2_SIZE erroroffset; +PCRE2_SIZE *ovector; +PCRE2_SIZE subject_length; + +pcre2_match_data *match_data; + + +/************************************************************************** +* First, sort out the command line. There is only one possible option at * +* the moment, "-g" to request repeated matching to find all occurrences, * +* like Perl's /g option. We set the variable find_all to a non-zero value * +* if the -g option is present. * +**************************************************************************/ + +find_all = 0; +for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-g") == 0) find_all = 1; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\en", argv[i]); + return 1; + } + else break; + } + +/* After the options, we require exactly two arguments, which are the pattern, +and the subject string. */ + +if (argc - i != 2) + { + printf("Exactly two arguments required: a regex and a subject string\en"); + return 1; + } + +/* Pattern and subject are char arguments, so they can be straightforwardly +cast to PCRE2_SPTR because we are working in 8-bit code units. The subject +length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact +defined to be size_t. */ + +pattern = (PCRE2_SPTR)argv[i]; +subject = (PCRE2_SPTR)argv[i+1]; +subject_length = (PCRE2_SIZE)strlen((char *)subject); + + +/************************************************************************* +* Now we are going to compile the regular expression pattern, and handle * +* any errors that are detected. * +*************************************************************************/ + +re = pcre2_compile( + pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + 0, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ + +/* Compilation failed: print the error message and exit. */ + +if (re == NULL) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, + buffer); + return 1; + } + + +/************************************************************************* +* If the compilation succeeded, we call PCRE2 again, in order to do a * +* pattern match against the subject string. This does just ONE match. If * +* further matching is needed, it will be done below. Before running the * +* match we must set up a match_data block for holding the result. Using * +* pcre2_match_data_create_from_pattern() ensures that the block is * +* exactly the right size for the number of capturing parentheses in the * +* pattern. If you need to know the actual size of a match_data block as * +* a number of bytes, you can find it like this: * +* * +* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); * +*************************************************************************/ + +match_data = pcre2_match_data_create_from_pattern(re, NULL); + +/* Now run the match. */ + +rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + +/* Matching failed: handle error cases */ + +if (rc < 0) + { + switch(rc) + { + case PCRE2_ERROR_NOMATCH: printf("No match\en"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\en", rc); break; + } + pcre2_match_data_free(match_data); /* Release memory used for the match */ + pcre2_code_free(re); /* data and the compiled pattern. */ + return 1; + } + +/* Match succeeded. Get a pointer to the output vector, where string offsets +are stored. */ + +ovector = pcre2_get_ovector_pointer(match_data); +printf("Match succeeded at offset %d\en", (int)ovector[0]); + + +/************************************************************************* +* We have found the first match within the subject string. If the output * +* vector wasn't big enough, say so. Then output any substrings that were * +* captured. * +*************************************************************************/ + +/* The output vector wasn't big enough. This should not happen, because we used +pcre2_match_data_create_from_pattern() above. */ + +if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + +/* Since release 10.38 PCRE2 has locked out the use of \eK in lookaround +assertions. However, there is an option to re-enable the old behaviour. If that +is set, it is possible to run patterns such as /(?=.\eK)/ that use \eK in an +assertion to set the start of a match later than its end. In this demonstration +program, we show how to detect this case, but it shouldn't arise because the +option is never set. */ + +if (ovector[0] > ovector[1]) + { + printf("\e\eK was used in an assertion to set the match start after its end.\en" + "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\en"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + +/* Show substrings stored in the output vector by number. Obviously, in a real +application you might want to do things other than print them. */ + +for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); + } + + +/************************************************************************** +* That concludes the basic part of this demonstration program. We have * +* compiled a pattern, and performed a single match. The code that follows * +* shows first how to access named substrings, and then how to code for * +* repeated matches on the same subject. * +**************************************************************************/ + +/* See if there are any named substrings, and if so, show them by name. First +we have to extract the count of named parentheses from the pattern. */ + +(void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + +if (namecount == 0) printf("No named substrings\en"); else + { + PCRE2_SPTR tabptr; + printf("Named substrings\en"); + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + tabptr = name_table; + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + + +/************************************************************************* +* If the "-g" option was given on the command line, we want to continue * +* to search for additional matches in the subject string, in a similar * +* way to the /g option in Perl. This turns out to be trickier than you * +* might think because of the possibility of matching an empty string. * +* What happens is as follows: * +* * +* If the previous match was NOT for an empty string, we can just start * +* the next match at the end of the previous one. * +* * +* If the previous match WAS for an empty string, we can't do that, as it * +* would lead to an infinite loop. Instead, a call of pcre2_match() is * +* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * +* first of these tells PCRE2 that an empty string at the start of the * +* subject is not a valid match; other possibilities must be tried. The * +* second flag restricts PCRE2 to one match attempt at the initial string * +* position. If this match succeeds, an alternative to the empty string * +* match has been found, and we can print it and proceed round the loop, * +* advancing by the length of whatever was found. If this match does not * +* succeed, we still stay in the loop, advancing by just one character. * +* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * +* more than one byte. * +* * +* However, there is a complication concerned with newlines. When the * +* newline convention is such that CRLF is a valid newline, we must * +* advance by two characters rather than one. The newline convention can * +* be set in the regex by (*CR), etc.; if not, we must find the default. * +*************************************************************************/ + +if (!find_all) /* Check for -g */ + { + pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_code_free(re); /* for the match data and the pattern. */ + return 0; /* Exit the program. */ + } + +/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline +sequence. First, find the options with which the regex was compiled and extract +the UTF state. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits); +utf8 = (option_bits & PCRE2_UTF) != 0; + +/* Now find the newline convention and see whether CRLF is a valid newline +sequence. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); +crlf_is_newline = newline == PCRE2_NEWLINE_ANY || + newline == PCRE2_NEWLINE_CRLF || + newline == PCRE2_NEWLINE_ANYCRLF; + +/* Loop for second and subsequent matches */ + +for (;;) + { + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == subject_length) break; + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /* If the previous match was not an empty string, there is one tricky case to + consider. If a pattern contains \eK within a lookbehind assertion at the + start, the end of the matched string can be at the offset where the match + started. Without special action, this leads to a loop that keeps on matching + the same substring. We must detect this case and arrange to move the start on + by one character. The pcre2_get_startchar() function returns the starting + offset that was passed to pcre2_match(). */ + + else + { + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + if (start_offset <= startchar) + { + if (startchar >= subject_length) break; /* Reached end of subject. */ + start_offset = startchar + 1; /* Advance by one character. */ + if (utf8) /* If UTF-8, it may be more */ + { /* than one code unit. */ + for (; start_offset < subject_length; start_offset++) + if ((subject[start_offset] & 0xc0) != 0x80) break; + } + } + } + + /* Run the next matching operation */ + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) + { + if (options == 0) break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is a newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\er' && + subject[start_offset + 1] == '\en') + ovector[1] += 1; /* Advance by one more. */ + else if (utf8) /* Otherwise, ensure we */ + { /* advance a whole UTF-8 */ + while (ovector[1] < subject_length) /* character. */ + { + if ((subject[ovector[1]] & 0xc0) != 0x80) break; + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) + { + printf("Matching error %d\en", rc); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* Match succeeded */ + + printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]); + + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + + /* We must guard against patterns such as /(?=.\eK)/ that use \eK in an + assertion to set the start of a match later than its end. In this + demonstration program, we just detect this case and give up. */ + + if (ovector[0] > ovector[1]) + { + printf("\e\eK was used in an assertion to set the match start after its end.\en" + "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\en"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + size_t substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); + } + + if (namecount == 0) printf("No named substrings\en"); else + { + PCRE2_SPTR tabptr = name_table; + printf("Named substrings\en"); + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + } /* End of loop to find second and subsequent matches */ + +printf("\en"); +pcre2_match_data_free(match_data); +pcre2_code_free(re); +return 0; +} + +/* End of pcre2demo.c */ +.EE diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 new file mode 100644 index 0000000..ffe9d39 --- /dev/null +++ b/doc/pcre2grep.1 @@ -0,0 +1,1018 @@ +.TH PCRE2GREP 1 "22 December 2023" "PCRE2 10.43" +.SH NAME +pcre2grep - a grep with Perl-compatible regular expressions. +.SH SYNOPSIS +.B pcre2grep [options] [long options] [pattern] [path1 path2 ...] +. +.SH DESCRIPTION +.rs +.sp +\fBpcre2grep\fP searches files for character patterns, in the same way as other +grep commands do, but it uses the PCRE2 regular expression library to support +patterns that are compatible with the regular expressions of Perl 5. See +.\" HREF +\fBpcre2syntax\fP(3) +.\" +for a quick-reference summary of pattern syntax, or +.\" HREF +\fBpcre2pattern\fP(3) +.\" +for a full description of the syntax and semantics of the regular expressions +that PCRE2 supports. +.P +Patterns, whether supplied on the command line or in a separate file, are given +without delimiters. For example: +.sp + pcre2grep Thursday /etc/motd +.sp +If you attempt to use delimiters (for example, by surrounding a pattern with +slashes, as is common in Perl scripts), they are interpreted as part of the +pattern. Quotes can of course be used to delimit patterns on the command line +because they are interpreted by the shell, and indeed quotes are required if a +pattern contains white space or shell metacharacters. +.P +The first argument that follows any option settings is treated as the single +pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present. +Conversely, when one or both of these options are used to specify patterns, all +arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an +argument pattern must be provided. +.P +If no files are specified, \fBpcre2grep\fP reads the standard input. The +standard input can also be referenced by a name consisting of a single hyphen. +For example: +.sp + pcre2grep some-pattern file1 - file3 +.sp +By default, input files are searched line by line, so pattern assertions about +the beginning and end of a subject string (^, $, \eA, \eZ, and \ez) match at +the beginning and end of each line. When a line matches a pattern, it is copied +to the standard output, and if there is more than one file, the file name is +output at the start of each line, followed by a colon. However, there are +options that can change how \fBpcre2grep\fP behaves. For example, the \fB-M\fP +option makes it possible to search for strings that span line boundaries. What +defines a line boundary is controlled by the \fB-N\fP (\fB--newline\fP) option. +The \fB-h\fP and \fB-H\fP options control whether or not file names are shown, +and the \fB-Z\fP option changes the file name terminator to a zero byte. +.P +The amount of memory used for buffering files that are being scanned is +controlled by parameters that can be set by the \fB--buffer-size\fP and +\fB--max-buffer-size\fP options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The +default values for these parameters can be set when \fBpcre2grep\fP is +built; if nothing is specified, the defaults are set to 20KiB and 1MiB +respectively. An error occurs if a line is too long and the buffer can no +longer be expanded. +.P +The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output. +.P +When matching with a multiline pattern, the size of the buffer must be at least +half of the maximum match expected or the pattern might fail to match. +.P +Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater. +BUFSIZ is defined in \fB\fP. When there is more than one pattern +(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to +each line in the order in which they are defined, except that all the \fB-e\fP +patterns are tried before the \fB-f\fP patterns. +.P +By default, as soon as one pattern matches a line, no further patterns are +considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the +matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, +\fB--line-offsets\fP, or \fB--output\fP is used to output only the part of the +line that matched (either shown literally, or as an offset), the behaviour is +different. In this situation, all the patterns are applied to the line. If +there is more than one match, the one that begins nearest to the start of the +subject is processed; if there is more than one match at that position, the one +with the longest matching substring is processed; if the matching substrings +are equal, the first match found is processed. +.P +Scanning with all the patterns resumes immediately following the match, so that +later matches on the same line can be found. Note, however, that an overlapping +match that starts in the middle of another match will not be processed. +.P +The above behaviour was changed at release 10.41 to be more compatible with GNU +grep. In earlier releases, \fBpcre2grep\fP did not recognize matches from +later patterns that were earlier in the subject. +.P +Patterns that can match an empty string are accepted, but empty string +matches are never recognized. An example is the pattern "(super)?(man)?", in +which all components are optional. This pattern finds all occurrences of both +"super" and "man"; the output differs from matching with "super|man" when only +the matching substrings are being shown. +.P +If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set, +\fBpcre2grep\fP uses the value to set a locale when calling the PCRE2 library. +The \fB--locale\fP option can be used to override this. +. +. +.SH "SUPPORT FOR COMPRESSED FILES" +.rs +.sp +Compile-time options for \fBpcre2grep\fP can set it up to use \fBlibz\fP or +\fBlibbz2\fP for reading compressed files whose names end in \fB.gz\fP or +\fB.bz2\fP, respectively. You can find out whether your \fBpcre2grep\fP binary +has support for one or both of these file types by running it with the +\fB--help\fP option. If the appropriate support is not present, all files are +treated as plain text. The standard input is always so treated. If a file with +a \fB.gz\fP or \fB.bz2\fP extension is not in fact compressed, it is read as a +plain text file. When input is from a compressed .gz or .bz2 file, the +\fB--line-buffered\fP option is ignored. +. +. +.SH "BINARY FILES" +.rs +.sp +By default, a file that contains a binary zero byte within the first 1024 bytes +is identified as a binary file, and is processed specially. However, if the +newline type is specified as NUL, that is, the line terminator is a binary +zero, the test for a binary file is not applied. See the \fB--binary-files\fP +option for a means of changing the way binary files are handled. +. +. +.SH "BINARY ZEROS IN PATTERNS" +.rs +.sp +Patterns passed from the command line are strings that are terminated by a +binary zero, so cannot contain internal zeros. However, patterns that are read +from a file via the \fB-f\fP option may contain binary zeros. +. +. +.SH OPTIONS +.rs +.sp +The order in which some of the options appear can affect the output. For +example, both the \fB-H\fP and \fB-l\fP options affect the printing of file +names. Whichever comes later in the command line will be the one that takes +effect. Similarly, except where noted below, if an option is given twice, the +later setting is used. Numerical values for options may be followed by K or M, +to signify multiplication by 1024 or 1024*1024 respectively. +.TP 10 +\fB--\fP +This terminates the list of options. It is useful if the next item on the +command line starts with a hyphen but is not an option. This allows for the +processing of patterns and file names that start with hyphens. +.TP +\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP +Output up to \fInumber\fP lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines (the \fB-Z\fP option can be used to change the file name +terminator to a zero byte). A line containing "--" is output between each group +of lines, unless they are in fact contiguous in the input file. The value of +\fInumber\fP is expected to be relatively small. When \fB-c\fP is used, +\fB-A\fP is ignored. +.TP +\fB-a\fP, \fB--text\fP +Treat binary files as text. This is equivalent to +\fB--binary-files\fP=\fItext\fP. +.TP +\fB--allow-lookaround-bsk\fP +PCRE2 now forbids the use of \eK in lookarounds by default, in line with Perl. +This option causes \fBpcre2grep\fP to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option, which enables this somewhat dangerous usage. +.TP +\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP +Output up to \fInumber\fP lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +\fInumber\fP lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines (the \fB-Z\fP option can be used to +change the file name terminator to a zero byte). A line containing "--" is +output between each group of lines, unless they are in fact contiguous in the +input file. The value of \fInumber\fP is expected to be relatively small. When +\fB-c\fP is used, \fB-B\fP is ignored. +.TP +\fB--binary-files=\fP\fIword\fP +Specify how binary files are to be processed. If the word is "binary" (the +default), pattern matching is performed on binary files, but the only output is +"Binary file matches" when a match succeeds. If the word is "text", +which is equivalent to the \fB-a\fP or \fB--text\fP option, binary files are +processed in the same way as any other file. In this case, when a match +succeeds, the output may be binary garbage, which can have nasty effects if +sent to a terminal. If the word is "without-match", which is equivalent to the +\fB-I\fP option, binary files are not processed at all; they are assumed not to +be of interest and are skipped without causing any output or affecting the +return code. +.TP +\fB--buffer-size=\fP\fInumber\fP +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +\fB--max-buffer-size\fP below. +.TP +\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP +Output \fInumber\fP lines of context both before and after each matching line. +This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value. +.TP +\fB-c\fP, \fB--count\fP +Do not output lines from the files that are being scanned; instead output the +number of lines that would have been shown, either because they matched, or, if +\fB-v\fP is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +\fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more +suppressed lines than the count (that is, the number of matches). +.sp +If no lines are selected, the number zero is output. If several files are +being scanned, a count is output for each of them and the \fB-t\fP option can +be used to cause a total to be output at the end. However, if the +\fB--files-with-matches\fP option is also used, only those files whose counts +are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP, +\fB-B\fP, and \fB-C\fP options are ignored. +.TP +\fB--colour\fP, \fB--color\fP +If this option is given without any data, it is equivalent to "--colour=auto". +If data is required, it must be given in the same shell item, separated by an +equals sign. +.TP +\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP +This option specifies under what circumstances the parts of a line that matched +a pattern should be coloured in the output. It is ignored if +\fB--file-offsets\fP, \fB--line-offsets\fP, or \fB--output\fP is set. By +default, output is not coloured. The value for the \fB--colour\fP option (which +is optional, see above) may be "never", "always", or "auto". In the latter +case, colouring happens only if the standard output is connected to a terminal. +More resources are used when colouring is enabled, because \fBpcre2grep\fP has +to search for all possible matches in a line, not just one, in order to colour +them all. +.sp +The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +.sp +If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. +.TP +\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP +If an input path is not a regular file or a directory, "action" specifies how +it is to be processed. Valid values are "read" (the default) or "skip" +(silently skip the path). +.TP +\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP +If an input path is a directory, "action" specifies how it is to be processed. +Valid values are "read" (the default in non-Windows environments, for +compatibility with GNU grep), "recurse" (equivalent to the \fB-r\fP option), or +"skip" (silently skip the path, the default in Windows environments). In the +"read" case, directories are read as if they were ordinary files. In some +operating systems the effect of reading a directory like this is an immediate +end-of-file; in others it may provoke an error. +.TP +\fB--depth-limit\fP=\fInumber\fP +See \fB--match-limit\fP below. +.TP +\fB-E\fP, \fB--case-restrict\fP +When case distinctions are being ignored in Unicode mode, two ASCII letters (K +and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F +(long S) respectively, as well as their lower case ASCII counterparts. When +this option is set, case equivalences are restricted such that no ASCII +character matches a non-ASCII character, and vice versa. +.TP +\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP +Specify a pattern to be matched. This option can be used multiple times in +order to specify several patterns. It can also be used as a way of specifying a +single pattern that starts with a hyphen. When \fB-e\fP is used, no argument +pattern is taken from the command line; all arguments are treated as file +names. There is no limit to the number of patterns. They are applied to each +line in the order in which they are defined. +.sp +If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first, +followed by the patterns from the file(s), independent of the order in which +these options are specified. +.TP +\fB--exclude\fP=\fIpattern\fP +Files (but not directories) whose names match the pattern are skipped without +being processed. This applies to all files, whether listed on the command line, +obtained from \fB--file-list\fP, or by scanning a directory. The pattern is a +PCRE2 regular expression, and is matched against the final component of the +file name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do +not apply to this pattern. The option may be given any number of times in order +to specify multiple patterns. If a file name matches both an \fB--include\fP +and an \fB--exclude\fP pattern, it is excluded. There is no short form for this +option. +.TP +\fB--exclude-from=\fP\fIfilename\fP +Treat each non-empty line of the file as the data for an \fB--exclude\fP +option. What constitutes a newline when reading the file is the operating +system's default. The \fB--newline\fP option has no effect on this option. This +option may be given more than once in order to specify a number of files to +read. +.TP +\fB--exclude-dir\fP=\fIpattern\fP +Directories whose names match the pattern are skipped without being processed, +whatever the setting of the \fB--recursive\fP option. This applies to all +directories, whether listed on the command line, obtained from +\fB--file-list\fP, or by scanning a parent directory. The pattern is a PCRE2 +regular expression, and is matched against the final component of the directory +name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not +apply to this pattern. The option may be given any number of times in order to +specify more than one pattern. If a directory matches both \fB--include-dir\fP +and \fB--exclude-dir\fP, it is excluded. There is no short form for this +option. +.TP +\fB-F\fP, \fB--fixed-strings\fP +Interpret each data-matching pattern as a list of fixed strings, separated by +newlines, instead of as a regular expression. What constitutes a newline for +this purpose is controlled by the \fB--newline\fP option. The \fB-w\fP (match +as a word) and \fB-x\fP (match whole line) options can be used with \fB-F\fP. +They apply to each of the fixed strings. A line is selected if any of the fixed +strings are found in it (subject to \fB-w\fP or \fB-x\fP, if present). This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the \fB--include\fP or +\fB--exclude\fP options. +.TP +\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP +Read patterns from the file, one per line. As is the case with patterns on the +command line, no delimiters should be used. What constitutes a newline when +reading the file is the operating system's default interpretation of \en. The +\fB--newline\fP option has no effect on this option. Trailing white space is +removed from each line, and blank lines are ignored. An empty file contains no +patterns and therefore matches nothing. Patterns read from a file in this way +may contain binary zeros, which are treated as ordinary data characters. +.sp +If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When \fB-f\fP is used, patterns +specified on the command line using \fB-e\fP may also be present; they are +matched before the file's patterns. However, no pattern is taken from the +command line; all arguments are treated as the names of paths to be searched. +.TP +\fB--file-list\fP=\fIfilename\fP +Read a list of files and/or directories that are to be scanned from the given +file, one per line. What constitutes a newline when reading the file is the +operating system's default. Trailing white space is removed from each line, and +blank lines are ignored. These paths are processed before any that are listed +on the command line. The file name can be given as "-" to refer to the standard +input. If \fB--file\fP and \fB--file-list\fP are both specified as "-", +patterns are read first. This is useful only when the standard input is a +terminal, from which further lines (the list of files) can be read after an +end-of-file indication. If this option is given more than once, all the +specified files are read. +.TP +\fB--file-offsets\fP +Instead of showing lines or parts of lines that match, show each match as an +offset from the start of the file and a length, separated by a comma. In this +mode, \fB--colour\fP has no effect, and no context is shown. That is, the +\fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is more than one +match in a line, each of them is shown separately. This option is mutually +exclusive with \fB--output\fP, \fB--line-offsets\fP, and \fB--only-matching\fP. +.TP +\fB--group-separator\fP=\fItext\fP +Output this text string instead of two hyphens between groups of lines when +\fB-A\fP, \fB-B\fP, or \fB-C\fP is in use. See also \fB--no-group-separator\fP. +.TP +\fB-H\fP, \fB--with-filename\fP +Force the inclusion of the file name at the start of output lines when +searching a single file. The file name is not normally shown in this case. +By default, for matching lines, the file name is followed by a colon; for +context lines, a hyphen separator is used. The \fB-Z\fP option can be used to +change the terminator to a zero byte. If a line number is also being output, +it follows the file name. When the \fB-M\fP option causes a pattern to match +more than one line, only the first is preceded by the file name. This option +overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options. +.TP +\fB-h\fP, \fB--no-filename\fP +Suppress the output file names when searching multiple files. File names are +normally shown when multiple files are searched. By default, for matching +lines, the file name is followed by a colon; for context lines, a hyphen +separator is used. The \fB-Z\fP option can be used to change the terminator to +a zero byte. If a line number is also being output, it follows the file name. +This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options. +.TP +\fB--heap-limit\fP=\fInumber\fP +See \fB--match-limit\fP below. +.TP +\fB--help\fP +Output a help message, giving brief details of the command options and file +type support, and then exit. Anything else on the command line is +ignored. +.TP +\fB-I\fP +Ignore binary files. This is equivalent to +\fB--binary-files\fP=\fIwithout-match\fP. +.TP +\fB-i\fP, \fB--ignore-case\fP +Ignore upper/lower case distinctions when pattern matching. This applies when +matching path names for inclusion or exclusion as well as when matching lines +in files. +.TP +\fB--include\fP=\fIpattern\fP +If any \fB--include\fP patterns are specified, the only files that are +processed are those whose names match one of the patterns and do not match an +\fB--exclude\fP pattern. This option does not affect directories, but it +applies to all files, whether listed on the command line, obtained from +\fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular +expression, and is matched against the final component of the file name, not +the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not apply to +this pattern. The option may be given any number of times. If a file name +matches both an \fB--include\fP and an \fB--exclude\fP pattern, it is excluded. +There is no short form for this option. +.TP +\fB--include-from=\fP\fIfilename\fP +Treat each non-empty line of the file as the data for an \fB--include\fP +option. What constitutes a newline for this purpose is the operating system's +default. The \fB--newline\fP option has no effect on this option. This option +may be given any number of times; all the files are read. +.TP +\fB--include-dir\fP=\fIpattern\fP +If any \fB--include-dir\fP patterns are specified, the only directories that +are processed are those whose names match one of the patterns and do not match +an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed +on the command line, obtained from \fB--file-list\fP, or by scanning a parent +directory. The pattern is a PCRE2 regular expression, and is matched against +the final component of the directory name, not the entire path. The \fB-F\fP, +\fB-w\fP, and \fB-x\fP options do not apply to this pattern. The option may be +given any number of times. If a directory matches both \fB--include-dir\fP and +\fB--exclude-dir\fP, it is excluded. There is no short form for this option. +.TP +\fB-L\fP, \fB--files-without-match\fP +Instead of outputting lines from the files, just output the names of the files +that do not contain any lines that would have been output. Each file name is +output once, on a separate line by default, but if the \fB-Z\fP option is set, +they are separated by zero bytes instead of newlines. This option overrides any +previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options. +.TP +\fB-l\fP, \fB--files-with-matches\fP +Instead of outputting lines from the files, just output the names of the files +containing lines that would have been output. Each file name is output once, on +a separate line, but if the \fB-Z\fP option is set, they are separated by zero +bytes instead of newlines. Searching normally stops as soon as a matching line +is found in a file. However, if the \fB-c\fP (count) option is also used, +matching continues in order to obtain the correct count, and those files that +have at least one match are listed along with their counts. Using this option +with \fB-c\fP is a way of suppressing the listing of files with no matches that +occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP, +\fB-h\fP, or \fB-L\fP options. +.TP +\fB--label\fP=\fIname\fP +This option supplies a name to be used for the standard input when file names +are being output. If not supplied, "(standard input)" is used. There is no +short form for this option. +.TP +\fB--line-buffered\fP +When this option is given, non-compressed input is read and processed line by +line, and the output is flushed after each write. By default, input is read in +large chunks, unless \fBpcre2grep\fP can determine that it is reading from a +terminal, which is currently possible only in Unix-like environments or +Windows. Output to terminal is normally automatically flushed by the operating +system. This option can be useful when the input or output is attached to a +pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data. +However, its use will affect performance, and the \fB-M\fP (multiline) option +ceases to work. When input is from a compressed .gz or .bz2 file, +\fB--line-buffered\fP is ignored. +.TP +\fB--line-offsets\fP +Instead of showing lines or parts of lines that match, show each match as a +line number, the offset from the start of the line, and a length. The line +number is terminated by a colon (as usual; see the \fB-n\fP option), and the +offset and length are separated by a comma. In this mode, \fB--colour\fP has no +effect, and no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP +options are ignored. If there is more than one match in a line, each of them is +shown separately. This option is mutually exclusive with \fB--output\fP, +\fB--file-offsets\fP, and \fB--only-matching\fP. +.TP +\fB--locale\fP=\fIlocale-name\fP +This option specifies a locale to be used for pattern matching. It overrides +the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no +locale is specified, the PCRE2 library's default (usually the "C" locale) is +used. There is no short form for this option. +.TP +\fB-M\fP, \fB--multiline\fP +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode, and a match is allowed to continue past +the end of the initial line and onto one or more subsequent lines. +.sp +Patterns used with \fB-M\fP may usefully contain literal newline characters and +internal occurrences of ^ and $ characters, because in multiline mode these can +match at internal newlines. Because \fBpcre2grep\fP is scanning multiple lines, +the \eZ and \ez assertions match only at the end of the last line in the file. +The \eA assertion matches at the start of the first line of a match. This can +be any line in the file; it is not anchored to the first line. +.sp +The output for a successful match may consist of more than one line. The first +line is the line in which the match started, and the last line is the line in +which the match ended. If the matched string ends with a newline sequence, the +output ends at the end of that line. If \fB-v\fP is set, none of the lines in a +multi-line match are output. Once a match has been handled, scanning restarts +at the beginning of the line after the one in which the match ended. +.sp +The newline sequence that separates multiple lines must be matched as part of +the pattern. For example, to find the phrase "regular expression" in a file +where "regular" might be at the end of a line and "expression" at the start of +the next line, you could use this command: +.sp + pcre2grep -M 'regular\es+expression' +.sp +The \es escape sequence matches any white space character, including newlines, +and is followed by + so as to match trailing white space on the first line as +well as possibly handling a two-character newline sequence. +.sp +There is a limit to the number of lines that can be matched, imposed by the way +that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem. +.sp +The \fB-M\fP option does not work when input is read line by line (see +\fB--line-buffered\fP.) +.TP +\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP +Stop processing after finding \fInumber\fP matching lines, or non-matching +lines if \fB-v\fP is also set. Any trailing context lines are output after the +final match. In multiline mode, each multiline match counts as just one line +for this purpose. If this limit is reached when reading the standard input from +a regular file, the file is left positioned just after the last matching line. +If \fB-c\fP is also set, the count that is output is never greater than +\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or +\fB-q\fP, or when just checking for a match in a binary file. +.TP +\fB--match-limit\fP=\fInumber\fP +Processing some regular expression patterns may take a very long time to search +for all possible matching strings. Others may require a very large amount of +memory. There are three options that set resource limits for matching. +.sp +The \fB--match-limit\fP option provides a means of limiting computing resource +usage when processing patterns that are not going to match, but which have a +very large number of possibilities in their search trees. The classic example +is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a +counter that is incremented each time around its main processing loop. If the +value set by \fB--match-limit\fP is reached, an error occurs. +.sp +The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of +1024 bytes), the maximum amount of heap memory that may be used for matching. +.sp +The \fB--depth-limit\fP option limits the depth of nested backtracking points, +which indirectly limits the amount of memory that is used. The amount of memory +needed for each backtracking point depends on the number of capturing +parentheses in the pattern, so the amount of memory that is used before this +limit acts varies from pattern to pattern. This limit is of use only if it is +set smaller than \fB--match-limit\fP. +.sp +There are no short forms for these options. The default limits can be set +when the PCRE2 library is compiled; if they are not specified, the defaults +are very large and so effectively unlimited. +.TP +\fB--max-buffer-size\fP=\fInumber\fP +This limits the expansion of the processing buffer, whose initial size can be +set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +.TP +\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP +Six different conventions for indicating the ends of lines in scanned files are +supported. For example: +.sp + pcre2grep -N CRLF 'some pattern' +.sp +The newline type may be specified in upper, lower, or mixed case. If the +newline type is NUL, lines are separated by binary zero characters. The other +types are the single-character sequences CR (carriage return) and LF +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which +recognizes any of the preceding three types, and an "any" type, for which any +Unicode line ending sequence is assumed to end a line. The Unicode sequences +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +.sp +When the PCRE2 library is built, a default line-ending sequence is specified. +This is normally the standard sequence for the operating system. Unless +otherwise specified by this option, \fBpcre2grep\fP uses the library's default. +.sp +This option makes it possible to use \fBpcre2grep\fP to scan files that have +come from other environments without having to modify their line endings. If +the data that is being scanned does not agree with the convention set by this +option, \fBpcre2grep\fP may behave in strange ways. Note that this option does +not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or +\fB--include-from\fP options, which are expected to use the operating system's +standard newline sequence. +.TP +\fB-n\fP, \fB--line-number\fP +Precede each output line by its line number in the file, followed by a colon +for matching lines or a hyphen for context lines. If the file name is also +being output, it precedes the line number. When the \fB-M\fP option causes a +pattern to match more than one line, only the first is preceded by its line +number. This option is forced if \fB--line-offsets\fP is used. +.TP +\fB--no-group-separator\fP +Do not output a separator between groups of lines when \fB-A\fP, \fB-B\fP, or +\fB-C\fP is in use. The default is to output a line containing two hyphens. See +also \fB--group-separator\fP. +.TP +\fB--no-jit\fP +If the PCRE2 library is built with support for just-in-time compiling (which +speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it +was explicitly disabled at build time. This option can be used to disable the +use of JIT at run time. It is provided for testing and working around problems. +It should never be needed in normal use. +.TP +\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP +When there is a match, instead of outputting the line that matched, output just +the text specified in this option, followed by an operating-system standard +newline. In this mode, \fB--colour\fP has no effect, and no context is shown. +That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. The +\fB--newline\fP option has no effect on this option, which is mutually +exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and +\fB--line-offsets\fP. However, like \fB--only-matching\fP, if there is more +than one match in a line, each of them causes a line of output. +.sp +Escape sequences starting with a dollar character may be used to insert the +contents of the matched part of the line and/or captured substrings into the +text. +.sp +$ or ${} is replaced by the captured substring of the given +decimal number; zero substitutes the whole match. If the number is greater than +the number of capturing substrings, or if the capture is unset, the replacement +is empty. +.sp +$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by +newline; $r by carriage return; $t by tab; $v by vertical tab. +.sp +$o or $o{} is replaced by the character whose code point is the +given octal number. In the first form, up to three octal digits are processed. +When more digits are needed in Unicode mode to specify a wide character, the +second form must be used. +.sp +$x or $x{} is replaced by the character represented by the +given hexadecimal number. In the first form, up to two hexadecimal digits are +processed. When more digits are needed in Unicode mode to specify a wide +character, the second form must be used. +.sp +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar. +.TP +\fB-o\fP, \fB--only-matching\fP +Show only the part of the line that matched a pattern instead of the whole +line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and +\fB-C\fP options are ignored. If there is more than one match in a line, each +of them is shown separately, on a separate line of output. If \fB-o\fP is +combined with \fB-v\fP (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with \fB--output\fP, +\fB--file-offsets\fP and \fB--line-offsets\fP. +.TP +\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP +Show only the part of the line that matched the capturing parentheses of the +given number. Up to 50 capturing parentheses are supported by default. This +limit can be changed via the \fB--om-capture\fP option. A pattern may contain +any number of capturing parentheses, but only those whose number is within the +limit can be accessed by \fB-o\fP. An error occurs if the number specified by +\fB-o\fP is greater than the limit. +.sp +-o0 is the same as \fB-o\fP without a number. Because these options can be +given without an argument (see above), if an argument is present, it must be +given in the same shell item, for example, -o3 or --only-matching=2. The +comments given for the non-argument case above also apply to this option. If +the specified capturing parentheses do not exist in the pattern, or were not +set in the match, nothing is output unless the file name or line number are +being output. +.sp +If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +but one option). +.TP +\fB--om-capture\fP=\fInumber\fP +Set the number of capturing parentheses that can be accessed by \fB-o\fP. The +default is 50. +.TP +\fB--om-separator\fP=\fItext\fP +Specify a separating string for multiple occurrences of \fB-o\fP. The default +is an empty string. Separating strings are never coloured. +.TP +\fB-P\fP, \fB--no-ucp\fP +Starting from release 10.43, when UTF/Unicode mode is specified with \fB-u\fP +or \fB-U\fP, the PCRE2_UCP option is used by default. This means that the +POSIX classes in patterns match more than just ASCII characters. For example, +[:digit:] matches any Unicode decimal digit. The \fB--no-ucp\fP option +suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters, +as was the case in earlier releases. Note that there are now more fine-grained +option settings within patterns that affect individual classes. For example, +when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while +allowing \ew to match Unicode letters and digits. +.TP +\fB-q\fP, \fB--quiet\fP +Work quietly, that is, display nothing except error messages. The exit +status indicates whether or not any matches were found. +.TP +\fB-r\fP, \fB--recursive\fP +If any given path is a directory, recursively scan the files it contains, +taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a +directory is read as a normal file; in some operating systems this gives an +immediate end-of-file. This option is a shorthand for setting the \fB-d\fP +option to "recurse". +.TP +\fB--recursion-limit\fP=\fInumber\fP +This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP +above for details. +.TP +\fB-s\fP, \fB--no-messages\fP +Suppress error messages about non-existent or unreadable files. Such files are +quietly skipped. However, the return code is still 2, even if matches were +found in other files. +.TP +\fB-t\fP, \fB--total-count\fP +This option is useful when scanning more than one file. If used on its own, +\fB-t\fP suppresses all output except for a grand total number of matching +lines (or non-matching lines if \fB-v\fP is used) in all the files. If \fB-t\fP +is used with \fB-c\fP, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The \fB-t\fP option is +ignored when used with \fB-L\fP (list files without matches), because the grand +total would always be zero. +.TP +\fB-u\fP, \fB--utf\fP +Operate in UTF/Unicode mode. This option is available only if PCRE2 has been +compiled with UTF-8 support. All patterns (including those for any +\fB--exclude\fP and \fB--include\fP options) and all lines that are scanned +must be valid strings of UTF-8 characters. If an invalid UTF-8 string is +encountered, an error occurs. +.TP +\fB-U\fP, \fB--utf-allow-invalid\fP +As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code +unit sequences. These can never form part of any pattern match. Patterns +themselves, however, must still be valid UTF-8 strings. This facility allows +valid UTF-8 strings to be sought within arbitrary byte sequences in executable +or other binary files. For more details about matching in non-valid UTF-8 +strings, see the +.\" HREF +\fBpcre2unicode\fP(3) +.\" +documentation. +.TP +\fB-V\fP, \fB--version\fP +Write the version numbers of \fBpcre2grep\fP and the PCRE2 library to the +standard output and then exit. Anything else on the command line is +ignored. +.TP +\fB-v\fP, \fB--invert-match\fP +Invert the sense of the match, so that lines which do \fInot\fP match any of +the patterns are the ones that are found. When this option is set, options such +as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match +that are to be output, are ignored. +.TP +\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP +Force the patterns only to match "words". That is, there must be a word +boundary at the start and end of each matched string. This is equivalent to +having "\eb(?:" at the start of each pattern, and ")\eb" at the end. This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the \fB--include\fP or +\fB--exclude\fP options. +.TP +\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP +Force the patterns to start matching only at the beginnings of lines, and in +addition, require them to match entire lines. In multiline mode the match may +be more than one line. This is equivalent to having "^(?:" at the start of each +pattern and ")$" at the end. This option applies only to the patterns that are +matched against the contents of files; it does not apply to patterns specified +by any of the \fB--include\fP or \fB--exclude\fP options. +.TP +\fB-Z\fP, \fB--null\fP +Terminate files names in the regular output with a zero byte (the NUL +character) instead of what would normally appear. This is useful when file +names contain unusual characters such as colons, hyphens, or even newlines. The +option does not apply to file names in error messages. +. +. +.SH "ENVIRONMENT VARIABLES" +.rs +.sp +The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that +order, for a locale. The first one that is set is used. This can be overridden +by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default +(usually the "C" locale) is used. +. +. +.SH "NEWLINES" +.rs +.sp +The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with +newline conventions that differ from the default. This option affects only the +way scanned files are processed. It does not affect the interpretation of files +specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or +\fB--include-from\fP options. +.P +Any parts of the scanned input files that are written to the standard output +are copied with whatever newline sequences they have in the input. However, if +the final line of a file is output, and it does not end with a newline +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a +single NL is used. +.P +The newline setting does not affect the way in which \fBpcre2grep\fP writes +newlines in informational messages to the standard output and error streams. +Under Windows, the standard output is set to be binary, so that "\er\en" at the +ends of output lines that are copied from the input is not converted to +"\er\er\en" by the C I/O library. This means that any messages written to the +standard output must end with "\er\en". For all other operating systems, and +for all messages to the standard error stream, "\en" is used. +. +. +.SH "OPTIONS COMPATIBILITY WITH GNU GREP" +.rs +.sp +Many of the short and long forms of \fBpcre2grep\fP's options are the same as +in the GNU \fBgrep\fP program. Any long option of the form \fB--xxx-regexp\fP +(GNU terminology) is also available as \fB--xxx-regex\fP (PCRE2 terminology). +However, the \fB--case-restrict\fP, \fB--depth-limit\fP, \fB-E\fP, +\fB--file-list\fP, \fB--file-offsets\fP, \fB--heap-limit\fP, +\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, +\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--no-ucp\fP, +\fB--om-separator\fP, \fB--output\fP, \fB-P\fP, \fB-u\fP, \fB--utf\fP, +\fB-U\fP, and \fB--utf-allow-invalid\fP options are specific to +\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a +capturing parentheses number. +.P +Although most of the common options work the same way, a few are different in +\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob +for GNU \fBgrep\fP, but in \fBpcre2grep\fP it is a regular expression to which +the \fB-i\fP option applies. If both the \fB-c\fP and \fB-l\fP options are +given, GNU grep lists only file names, without counts, but \fBpcre2grep\fP +gives the counts as well. +. +. +.SH "OPTIONS WITH DATA" +.rs +.sp +There are four different ways in which an option with data can be specified. +If a short form option is used, the data may follow immediately, or (with one +exception) in the next command line item. For example: +.sp + -f/some/file + -f /some/file +.sp +The exception is the \fB-o\fP option, which may appear with or without data. +Because of this, if data is present, it must follow immediately in the same +item, for example -o3. +.P +If a long form option is used, the data may appear in the same command line +item, separated by an equals character, or (with two exceptions) it may appear +in the next command line item. For example: +.sp + --file=/some/file + --file /some/file +.sp +Note, however, that if you want to supply a file name beginning with ~ as data +in a shell command, and have the shell expand ~ to a home directory, you must +separate the file name from the option, because the shell does not treat ~ +specially unless it is at the start of an item. +.P +The exceptions to the above are the \fB--colour\fP (or \fB--color\fP) and +\fB--only-matching\fP options, for which the data is optional. If one of these +options does have data, it must be given in the first form, using an equals +character. Otherwise \fBpcre2grep\fP will assume that it has no data. +. +. +.SH "USING PCRE2'S CALLOUT FACILITY" +.rs +.sp +\fBpcre2grep\fP has, by default, support for calling external programs or +scripts or echoing specific strings during matching by making use of PCRE2's +callout facility. However, this support can be completely or partially disabled +when \fBpcre2grep\fP is built. You can find out whether your binary has support +for callouts by running it with the \fB--help\fP option. If callout support is +completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored. +.P +A callout in a PCRE2 pattern is of the form (?C) where the argument is +either a number or a quoted string (see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP; +only callouts with string arguments are useful. +. +. +.SS "Echoing a specific string" +.rs +.sp +Starting the callout string with a pipe character invokes an echoing facility +that avoids calling an external program or script. This facility is always +available, provided that callouts were not completely disabled when +\fBpcre2grep\fP was built. The rest of the callout string is processed as a +zero-terminated string, which means it should not contain any internal binary +zeros. It is written to the output, having first been passed through the same +escape processing as text from the \fB--output\fP (\fB-O\fP) option (see +above). However, $0 cannot be used to insert a matched substring because the +match is still in progress. Instead, the single character '0' is inserted. Any +syntax errors in the string (for example, a dollar not followed by another +character) causes the callout to be ignored. No terminator is added to the +output string, so if you want a newline, you must include it explicitly using +the escape $n. For example: +.sp + pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' +.sp +Matching continues normally after the string is output. If you want to see only +the callout output but not any output from an actual match, you should end the +pattern with (*FAIL). +. +. +.SS "Calling external programs or scripts" +.rs +.sp +This facility can be independently disabled when \fBpcre2grep\fP is built. It +is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS, +where \fBlib$spawn()\fP is used, and for any Unix-like environment where +\fBfork()\fP and \fBexecv()\fP are available. +.P +If the callout string does not start with a pipe (vertical bar) character, it +is parsed into a list of substrings separated by pipe characters. The first +substring must be an executable name, with the following substrings specifying +arguments: +.sp + executable_name|arg1|arg2|... +.sp +Any substring (including the executable name) may contain escape sequences +started by a dollar character. These are the same as for the \fB--output\fP +(\fB-O\fP) option documented above, except that $0 cannot insert the matched +string because the match is still in progress. Instead, the character '0' +is inserted. If you need a literal dollar or pipe character in any +substring, use $$ or $| respectively. Here is an example: +.sp + echo -e "abcde\en12345" | pcre2grep \e + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - +.sp + Output: +.sp + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 +.sp +The parameters for the system call that is used to run the program or script +are zero-terminated strings. This means that binary zero characters in the +callout argument will cause premature termination of their substrings, and +therefore should not be present. Any syntax errors in the string (for example, +a dollar not followed by another character) causes the callout to be ignored. +If running the program fails for any reason (including the non-existence of the +executable), a local matching failure occurs and the matcher backtracks in the +normal way. +. +. +.SH "MATCHING ERRORS" +.rs +.sp +It is possible to supply a regular expression that takes a very long time to +fail to match certain lines. Such patterns normally involve nested indefinite +repeats, for example: (a+)*\ed when matched against a line of a's with no final +digit. The PCRE2 matching function has a resource limit that causes it to abort +in these circumstances. If this happens, \fBpcre2grep\fP outputs an error +message and the line that caused the problem to the standard error stream. If +there are more than 20 such errors, \fBpcre2grep\fP gives up. +.P +The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the +overall resource limit. There are also other limits that affect the amount of +memory used during matching; see the discussion of \fB--heap-limit\fP and +\fB--depth-limit\fP above. +. +. +.SH DIAGNOSTICS +.rs +.sp +Exit status is 0 if any matches were found, 1 if no matches were found, and 2 +for syntax errors, overlong lines, non-existent or inaccessible files (even if +matches were found in other files) or too many matching errors. Using the +\fB-s\fP option to suppress error messages about inaccessible files does not +affect the return code. +.P +When run under VMS, the return code is placed in the symbol PCRE2GREP_RC +because VMS does not distinguish between exit(0) and exit(1). +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3), +\fBpcre2unicode\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 22 December 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt new file mode 100644 index 0000000..7914c45 --- /dev/null +++ b/doc/pcre2grep.txt @@ -0,0 +1,1095 @@ + +PCRE2GREP(1) General Commands Manual PCRE2GREP(1) + + +NAME + pcre2grep - a grep with Perl-compatible regular expressions. + + +SYNOPSIS + pcre2grep [options] [long options] [pattern] [path1 path2 ...] + + +DESCRIPTION + + pcre2grep searches files for character patterns, in the same way as + other grep commands do, but it uses the PCRE2 regular expression li- + brary to support patterns that are compatible with the regular expres- + sions of Perl 5. See pcre2syntax(3) for a quick-reference summary of + pattern syntax, or pcre2pattern(3) for a full description of the syntax + and semantics of the regular expressions that PCRE2 supports. + + Patterns, whether supplied on the command line or in a separate file, + are given without delimiters. For example: + + pcre2grep Thursday /etc/motd + + If you attempt to use delimiters (for example, by surrounding a pattern + with slashes, as is common in Perl scripts), they are interpreted as + part of the pattern. Quotes can of course be used to delimit patterns + on the command line because they are interpreted by the shell, and in- + deed quotes are required if a pattern contains white space or shell + metacharacters. + + The first argument that follows any option settings is treated as the + single pattern to be matched when neither -e nor -f is present. Con- + versely, when one or both of these options are used to specify pat- + terns, all arguments are treated as path names. At least one of -e, -f, + or an argument pattern must be provided. + + If no files are specified, pcre2grep reads the standard input. The + standard input can also be referenced by a name consisting of a single + hyphen. For example: + + pcre2grep some-pattern file1 - file3 + + By default, input files are searched line by line, so pattern asser- + tions about the beginning and end of a subject string (^, $, \A, \Z, + and \z) match at the beginning and end of each line. When a line + matches a pattern, it is copied to the standard output, and if there is + more than one file, the file name is output at the start of each line, + followed by a colon. However, there are options that can change how + pcre2grep behaves. For example, the -M option makes it possible to + search for strings that span line boundaries. What defines a line + boundary is controlled by the -N (--newline) option. The -h and -H op- + tions control whether or not file names are shown, and the -Z option + changes the file name terminator to a zero byte. + + The amount of memory used for buffering files that are being scanned is + controlled by parameters that can be set by the --buffer-size and + --max-buffer-size options. The first of these sets the size of buffer + that is obtained at the start of processing. If an input file contains + very long lines, a larger buffer may be needed; this is handled by au- + tomatically extending the buffer, up to the limit specified by --max- + buffer-size. The default values for these parameters can be set when + pcre2grep is built; if nothing is specified, the defaults are set to + 20KiB and 1MiB respectively. An error occurs if a line is too long and + the buffer can no longer be expanded. + + The block of memory that is actually used is three times the "buffer + size", to allow for buffering "before" and "after" lines. If the buffer + size is too small, fewer than requested "before" and "after" lines may + be output. + + When matching with a multiline pattern, the size of the buffer must be + at least half of the maximum match expected or the pattern might fail + to match. + + Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the + greater. BUFSIZ is defined in . When there is more than one + pattern (specified by the use of -e and/or -f), each pattern is applied + to each line in the order in which they are defined, except that all + the -e patterns are tried before the -f patterns. + + By default, as soon as one pattern matches a line, no further patterns + are considered. However, if --colour (or --color) is used to colour the + matching substrings, or if --only-matching, --file-offsets, --line-off- + sets, or --output is used to output only the part of the line that + matched (either shown literally, or as an offset), the behaviour is + different. In this situation, all the patterns are applied to the line. + If there is more than one match, the one that begins nearest to the + start of the subject is processed; if there is more than one match at + that position, the one with the longest matching substring is + processed; if the matching substrings are equal, the first match found + is processed. + + Scanning with all the patterns resumes immediately following the match, + so that later matches on the same line can be found. Note, however, + that an overlapping match that starts in the middle of another match + will not be processed. + + The above behaviour was changed at release 10.41 to be more compatible + with GNU grep. In earlier releases, pcre2grep did not recognize matches + from later patterns that were earlier in the subject. + + Patterns that can match an empty string are accepted, but empty string + matches are never recognized. An example is the pattern "(su- + per)?(man)?", in which all components are optional. This pattern finds + all occurrences of both "super" and "man"; the output differs from + matching with "super|man" when only the matching substrings are being + shown. + + If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses + the value to set a locale when calling the PCRE2 library. The --locale + option can be used to override this. + + +SUPPORT FOR COMPRESSED FILES + + Compile-time options for pcre2grep can set it up to use libz or libbz2 + for reading compressed files whose names end in .gz or .bz2, respec- + tively. You can find out whether your pcre2grep binary has support for + one or both of these file types by running it with the --help option. + If the appropriate support is not present, all files are treated as + plain text. The standard input is always so treated. If a file with a + .gz or .bz2 extension is not in fact compressed, it is read as a plain + text file. When input is from a compressed .gz or .bz2 file, the + --line-buffered option is ignored. + + +BINARY FILES + + By default, a file that contains a binary zero byte within the first + 1024 bytes is identified as a binary file, and is processed specially. + However, if the newline type is specified as NUL, that is, the line + terminator is a binary zero, the test for a binary file is not applied. + See the --binary-files option for a means of changing the way binary + files are handled. + + +BINARY ZEROS IN PATTERNS + + Patterns passed from the command line are strings that are terminated + by a binary zero, so cannot contain internal zeros. However, patterns + that are read from a file via the -f option may contain binary zeros. + + +OPTIONS + + The order in which some of the options appear can affect the output. + For example, both the -H and -l options affect the printing of file + names. Whichever comes later in the command line will be the one that + takes effect. Similarly, except where noted below, if an option is + given twice, the later setting is used. Numerical values for options + may be followed by K or M, to signify multiplication by 1024 or + 1024*1024 respectively. + + -- This terminates the list of options. It is useful if the next + item on the command line starts with a hyphen but is not an + option. This allows for the processing of patterns and file + names that start with hyphens. + + -A number, --after-context=number + Output up to number lines of context after each matching + line. Fewer lines are output if the next match or the end of + the file is reached, or if the processing buffer size has + been set too small. If file names and/or line numbers are be- + ing output, a hyphen separator is used instead of a colon for + the context lines (the -Z option can be used to change the + file name terminator to a zero byte). A line containing "--" + is output between each group of lines, unless they are in + fact contiguous in the input file. The value of number is ex- + pected to be relatively small. When -c is used, -A is ig- + nored. + + -a, --text + Treat binary files as text. This is equivalent to --binary- + files=text. + + --allow-lookaround-bsk + PCRE2 now forbids the use of \K in lookarounds by default, in + line with Perl. This option causes pcre2grep to set the + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, which enables this + somewhat dangerous usage. + + -B number, --before-context=number + Output up to number lines of context before each matching + line. Fewer lines are output if the previous match or the + start of the file is within number lines, or if the process- + ing buffer size has been set too small. If file names and/or + line numbers are being output, a hyphen separator is used in- + stead of a colon for the context lines (the -Z option can be + used to change the file name terminator to a zero byte). A + line containing "--" is output between each group of lines, + unless they are in fact contiguous in the input file. The + value of number is expected to be relatively small. When -c + is used, -B is ignored. + + --binary-files=word + Specify how binary files are to be processed. If the word is + "binary" (the default), pattern matching is performed on bi- + nary files, but the only output is "Binary file + matches" when a match succeeds. If the word is "text", which + is equivalent to the -a or --text option, binary files are + processed in the same way as any other file. In this case, + when a match succeeds, the output may be binary garbage, + which can have nasty effects if sent to a terminal. If the + word is "without-match", which is equivalent to the -I op- + tion, binary files are not processed at all; they are assumed + not to be of interest and are skipped without causing any + output or affecting the return code. + + --buffer-size=number + Set the parameter that controls how much memory is obtained + at the start of processing for buffering files that are being + scanned. See also --max-buffer-size below. + + -C number, --context=number + Output number lines of context both before and after each + matching line. This is equivalent to setting both -A and -B + to the same value. + + -c, --count + Do not output lines from the files that are being scanned; + instead output the number of lines that would have been + shown, either because they matched, or, if -v is set, because + they failed to match. By default, this count is exactly the + same as the number of lines that would have been output, but + if the -M (multiline) option is used (without -v), there may + be more suppressed lines than the count (that is, the number + of matches). + + If no lines are selected, the number zero is output. If sev- + eral files are being scanned, a count is output for each of + them and the -t option can be used to cause a total to be + output at the end. However, if the --files-with-matches op- + tion is also used, only those files whose counts are greater + than zero are listed. When -c is used, the -A, -B, and -C op- + tions are ignored. + + --colour, --color + If this option is given without any data, it is equivalent to + "--colour=auto". If data is required, it must be given in + the same shell item, separated by an equals sign. + + --colour=value, --color=value + This option specifies under what circumstances the parts of a + line that matched a pattern should be coloured in the output. + It is ignored if --file-offsets, --line-offsets, or --output + is set. By default, output is not coloured. The value for the + --colour option (which is optional, see above) may be + "never", "always", or "auto". In the latter case, colouring + happens only if the standard output is connected to a termi- + nal. More resources are used when colouring is enabled, be- + cause pcre2grep has to search for all possible matches in a + line, not just one, in order to colour them all. + + The colour that is used can be specified by setting one of + the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, + PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that + order. If none of these are set, pcre2grep looks for + GREP_COLORS or GREP_COLOR (in that order). The value of the + variable should be a string of two numbers, separated by a + semicolon, except in the case of GREP_COLORS, which must + start with "ms=" or "mt=" followed by two semicolon-separated + colours, terminated by the end of the string or by a colon. + If GREP_COLORS does not start with "ms=" or "mt=" it is ig- + nored, and GREP_COLOR is checked. + + If the string obtained from one of the above variables con- + tains any characters other than semicolon or digits, the set- + ting is ignored and the default colour is used. The string is + copied directly into the control string for setting colour on + a terminal, so it is your responsibility to ensure that the + values make sense. If no relevant environment variable is + set, the default is "1;31", which gives red. + + -D action, --devices=action + If an input path is not a regular file or a directory, "ac- + tion" specifies how it is to be processed. Valid values are + "read" (the default) or "skip" (silently skip the path). + + -d action, --directories=action + If an input path is a directory, "action" specifies how it is + to be processed. Valid values are "read" (the default in + non-Windows environments, for compatibility with GNU grep), + "recurse" (equivalent to the -r option), or "skip" (silently + skip the path, the default in Windows environments). In the + "read" case, directories are read as if they were ordinary + files. In some operating systems the effect of reading a di- + rectory like this is an immediate end-of-file; in others it + may provoke an error. + + --depth-limit=number + See --match-limit below. + + -E, --case-restrict + When case distinctions are being ignored in Unicode mode, two + ASCII letters (K and S) will by default match Unicode charac- + ters U+212A (Kelvin sign) and U+017F (long S) respectively, + as well as their lower case ASCII counterparts. When this op- + tion is set, case equivalences are restricted such that no + ASCII character matches a non-ASCII character, and vice + versa. + + -e pattern, --regex=pattern, --regexp=pattern + Specify a pattern to be matched. This option can be used mul- + tiple times in order to specify several patterns. It can also + be used as a way of specifying a single pattern that starts + with a hyphen. When -e is used, no argument pattern is taken + from the command line; all arguments are treated as file + names. There is no limit to the number of patterns. They are + applied to each line in the order in which they are defined. + + If -f is used with -e, the command line patterns are matched + first, followed by the patterns from the file(s), independent + of the order in which these options are specified. + + --exclude=pattern + Files (but not directories) whose names match the pattern are + skipped without being processed. This applies to all files, + whether listed on the command line, obtained from --file- + list, or by scanning a directory. The pattern is a PCRE2 reg- + ular expression, and is matched against the final component + of the file name, not the entire path. The -F, -w, and -x op- + tions do not apply to this pattern. The option may be given + any number of times in order to specify multiple patterns. If + a file name matches both an --include and an --exclude pat- + tern, it is excluded. There is no short form for this option. + + --exclude-from=filename + Treat each non-empty line of the file as the data for an + --exclude option. What constitutes a newline when reading the + file is the operating system's default. The --newline option + has no effect on this option. This option may be given more + than once in order to specify a number of files to read. + + --exclude-dir=pattern + Directories whose names match the pattern are skipped without + being processed, whatever the setting of the --recursive op- + tion. This applies to all directories, whether listed on the + command line, obtained from --file-list, or by scanning a + parent directory. The pattern is a PCRE2 regular expression, + and is matched against the final component of the directory + name, not the entire path. The -F, -w, and -x options do not + apply to this pattern. The option may be given any number of + times in order to specify more than one pattern. If a direc- + tory matches both --include-dir and --exclude-dir, it is ex- + cluded. There is no short form for this option. + + -F, --fixed-strings + Interpret each data-matching pattern as a list of fixed + strings, separated by newlines, instead of as a regular ex- + pression. What constitutes a newline for this purpose is con- + trolled by the --newline option. The -w (match as a word) and + -x (match whole line) options can be used with -F. They ap- + ply to each of the fixed strings. A line is selected if any + of the fixed strings are found in it (subject to -w or -x, if + present). This option applies only to the patterns that are + matched against the contents of files; it does not apply to + patterns specified by any of the --include or --exclude op- + tions. + + -f filename, --file=filename + Read patterns from the file, one per line. As is the case + with patterns on the command line, no delimiters should be + used. What constitutes a newline when reading the file is the + operating system's default interpretation of \n. The --new- + line option has no effect on this option. Trailing white + space is removed from each line, and blank lines are ignored. + An empty file contains no patterns and therefore matches + nothing. Patterns read from a file in this way may contain + binary zeros, which are treated as ordinary data characters. + + If this option is given more than once, all the specified + files are read. A data line is output if any of the patterns + match it. A file name can be given as "-" to refer to the + standard input. When -f is used, patterns specified on the + command line using -e may also be present; they are matched + before the file's patterns. However, no pattern is taken from + the command line; all arguments are treated as the names of + paths to be searched. + + --file-list=filename + Read a list of files and/or directories that are to be + scanned from the given file, one per line. What constitutes a + newline when reading the file is the operating system's de- + fault. Trailing white space is removed from each line, and + blank lines are ignored. These paths are processed before any + that are listed on the command line. The file name can be + given as "-" to refer to the standard input. If --file and + --file-list are both specified as "-", patterns are read + first. This is useful only when the standard input is a ter- + minal, from which further lines (the list of files) can be + read after an end-of-file indication. If this option is given + more than once, all the specified files are read. + + --file-offsets + Instead of showing lines or parts of lines that match, show + each match as an offset from the start of the file and a + length, separated by a comma. In this mode, --colour has no + effect, and no context is shown. That is, the -A, -B, and -C + options are ignored. If there is more than one match in a + line, each of them is shown separately. This option is mutu- + ally exclusive with --output, --line-offsets, and --only- + matching. + + --group-separator=text + Output this text string instead of two hyphens between groups + of lines when -A, -B, or -C is in use. See also --no-group- + separator. + + -H, --with-filename + Force the inclusion of the file name at the start of output + lines when searching a single file. The file name is not nor- + mally shown in this case. By default, for matching lines, + the file name is followed by a colon; for context lines, a + hyphen separator is used. The -Z option can be used to change + the terminator to a zero byte. If a line number is also being + output, it follows the file name. When the -M option causes a + pattern to match more than one line, only the first is pre- + ceded by the file name. This option overrides any previous + -h, -l, or -L options. + + -h, --no-filename + Suppress the output file names when searching multiple files. + File names are normally shown when multiple files are + searched. By default, for matching lines, the file name is + followed by a colon; for context lines, a hyphen separator is + used. The -Z option can be used to change the terminator to a + zero byte. If a line number is also being output, it follows + the file name. This option overrides any previous -H, -L, or + -l options. + + --heap-limit=number + See --match-limit below. + + --help Output a help message, giving brief details of the command + options and file type support, and then exit. Anything else + on the command line is ignored. + + -I Ignore binary files. This is equivalent to --binary- + files=without-match. + + -i, --ignore-case + Ignore upper/lower case distinctions when pattern matching. + This applies when matching path names for inclusion or exclu- + sion as well as when matching lines in files. + + --include=pattern + If any --include patterns are specified, the only files that + are processed are those whose names match one of the patterns + and do not match an --exclude pattern. This option does not + affect directories, but it applies to all files, whether + listed on the command line, obtained from --file-list, or by + scanning a directory. The pattern is a PCRE2 regular expres- + sion, and is matched against the final component of the file + name, not the entire path. The -F, -w, and -x options do not + apply to this pattern. The option may be given any number of + times. If a file name matches both an --include and an --ex- + clude pattern, it is excluded. There is no short form for + this option. + + --include-from=filename + Treat each non-empty line of the file as the data for an + --include option. What constitutes a newline for this purpose + is the operating system's default. The --newline option has + no effect on this option. This option may be given any number + of times; all the files are read. + + --include-dir=pattern + If any --include-dir patterns are specified, the only direc- + tories that are processed are those whose names match one of + the patterns and do not match an --exclude-dir pattern. This + applies to all directories, whether listed on the command + line, obtained from --file-list, or by scanning a parent di- + rectory. The pattern is a PCRE2 regular expression, and is + matched against the final component of the directory name, + not the entire path. The -F, -w, and -x options do not apply + to this pattern. The option may be given any number of times. + If a directory matches both --include-dir and --exclude-dir, + it is excluded. There is no short form for this option. + + -L, --files-without-match + Instead of outputting lines from the files, just output the + names of the files that do not contain any lines that would + have been output. Each file name is output once, on a sepa- + rate line by default, but if the -Z option is set, they are + separated by zero bytes instead of newlines. This option + overrides any previous -H, -h, or -l options. + + -l, --files-with-matches + Instead of outputting lines from the files, just output the + names of the files containing lines that would have been out- + put. Each file name is output once, on a separate line, but + if the -Z option is set, they are separated by zero bytes in- + stead of newlines. Searching normally stops as soon as a + matching line is found in a file. However, if the -c (count) + option is also used, matching continues in order to obtain + the correct count, and those files that have at least one + match are listed along with their counts. Using this option + with -c is a way of suppressing the listing of files with no + matches that occurs with -c on its own. This option overrides + any previous -H, -h, or -L options. + + --label=name + This option supplies a name to be used for the standard input + when file names are being output. If not supplied, "(standard + input)" is used. There is no short form for this option. + + --line-buffered + When this option is given, non-compressed input is read and + processed line by line, and the output is flushed after each + write. By default, input is read in large chunks, unless + pcre2grep can determine that it is reading from a terminal, + which is currently possible only in Unix-like environments or + Windows. Output to terminal is normally automatically flushed + by the operating system. This option can be useful when the + input or output is attached to a pipe and you do not want + pcre2grep to buffer up large amounts of data. However, its + use will affect performance, and the -M (multiline) option + ceases to work. When input is from a compressed .gz or .bz2 + file, --line-buffered is ignored. + + --line-offsets + Instead of showing lines or parts of lines that match, show + each match as a line number, the offset from the start of the + line, and a length. The line number is terminated by a colon + (as usual; see the -n option), and the offset and length are + separated by a comma. In this mode, --colour has no effect, + and no context is shown. That is, the -A, -B, and -C options + are ignored. If there is more than one match in a line, each + of them is shown separately. This option is mutually exclu- + sive with --output, --file-offsets, and --only-matching. + + --locale=locale-name + This option specifies a locale to be used for pattern match- + ing. It overrides the value in the LC_ALL or LC_CTYPE envi- + ronment variables. If no locale is specified, the PCRE2 li- + brary's default (usually the "C" locale) is used. There is no + short form for this option. + + -M, --multiline + Allow patterns to match more than one line. When this option + is set, the PCRE2 library is called in "multiline" mode, and + a match is allowed to continue past the end of the initial + line and onto one or more subsequent lines. + + Patterns used with -M may usefully contain literal newline + characters and internal occurrences of ^ and $ characters, + because in multiline mode these can match at internal new- + lines. Because pcre2grep is scanning multiple lines, the \Z + and \z assertions match only at the end of the last line in + the file. The \A assertion matches at the start of the first + line of a match. This can be any line in the file; it is not + anchored to the first line. + + The output for a successful match may consist of more than + one line. The first line is the line in which the match + started, and the last line is the line in which the match + ended. If the matched string ends with a newline sequence, + the output ends at the end of that line. If -v is set, none + of the lines in a multi-line match are output. Once a match + has been handled, scanning restarts at the beginning of the + line after the one in which the match ended. + + The newline sequence that separates multiple lines must be + matched as part of the pattern. For example, to find the + phrase "regular expression" in a file where "regular" might + be at the end of a line and "expression" at the start of the + next line, you could use this command: + + pcre2grep -M 'regular\s+expression' + + The \s escape sequence matches any white space character, in- + cluding newlines, and is followed by + so as to match trail- + ing white space on the first line as well as possibly han- + dling a two-character newline sequence. + + There is a limit to the number of lines that can be matched, + imposed by the way that pcre2grep buffers the input file as + it scans it. With a sufficiently large processing buffer, + this should not be a problem. + + The -M option does not work when input is read line by line + (see --line-buffered.) + + -m number, --max-count=number + Stop processing after finding number matching lines, or non- + matching lines if -v is also set. Any trailing context lines + are output after the final match. In multiline mode, each + multiline match counts as just one line for this purpose. If + this limit is reached when reading the standard input from a + regular file, the file is left positioned just after the last + matching line. If -c is also set, the count that is output + is never greater than number. This option has no effect if + used with -L, -l, or -q, or when just checking for a match in + a binary file. + + --match-limit=number + Processing some regular expression patterns may take a very + long time to search for all possible matching strings. Others + may require a very large amount of memory. There are three + options that set resource limits for matching. + + The --match-limit option provides a means of limiting comput- + ing resource usage when processing patterns that are not go- + ing to match, but which have a very large number of possibil- + ities in their search trees. The classic example is a pattern + that uses nested unlimited repeats. Internally, PCRE2 has a + counter that is incremented each time around its main pro- + cessing loop. If the value set by --match-limit is reached, + an error occurs. + + The --heap-limit option specifies, as a number of kibibytes + (units of 1024 bytes), the maximum amount of heap memory that + may be used for matching. + + The --depth-limit option limits the depth of nested back- + tracking points, which indirectly limits the amount of memory + that is used. The amount of memory needed for each backtrack- + ing point depends on the number of capturing parentheses in + the pattern, so the amount of memory that is used before this + limit acts varies from pattern to pattern. This limit is of + use only if it is set smaller than --match-limit. + + There are no short forms for these options. The default lim- + its can be set when the PCRE2 library is compiled; if they + are not specified, the defaults are very large and so effec- + tively unlimited. + + --max-buffer-size=number + This limits the expansion of the processing buffer, whose + initial size can be set by --buffer-size. The maximum buffer + size is silently forced to be no smaller than the starting + buffer size. + + -N newline-type, --newline=newline-type + Six different conventions for indicating the ends of lines in + scanned files are supported. For example: + + pcre2grep -N CRLF 'some pattern' + + The newline type may be specified in upper, lower, or mixed + case. If the newline type is NUL, lines are separated by bi- + nary zero characters. The other types are the single-charac- + ter sequences CR (carriage return) and LF (linefeed), the + two-character sequence CRLF, an "anycrlf" type, which recog- + nizes any of the preceding three types, and an "any" type, + for which any Unicode line ending sequence is assumed to end + a line. The Unicode sequences are the three just mentioned, + plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL + (next line, U+0085), LS (line separator, U+2028), and PS + (paragraph separator, U+2029). + + When the PCRE2 library is built, a default line-ending se- + quence is specified. This is normally the standard sequence + for the operating system. Unless otherwise specified by this + option, pcre2grep uses the library's default. + + This option makes it possible to use pcre2grep to scan files + that have come from other environments without having to mod- + ify their line endings. If the data that is being scanned + does not agree with the convention set by this option, + pcre2grep may behave in strange ways. Note that this option + does not apply to files specified by the -f, --exclude-from, + or --include-from options, which are expected to use the op- + erating system's standard newline sequence. + + -n, --line-number + Precede each output line by its line number in the file, fol- + lowed by a colon for matching lines or a hyphen for context + lines. If the file name is also being output, it precedes the + line number. When the -M option causes a pattern to match + more than one line, only the first is preceded by its line + number. This option is forced if --line-offsets is used. + + --no-group-separator + Do not output a separator between groups of lines when -A, + -B, or -C is in use. The default is to output a line contain- + ing two hyphens. See also --group-separator. + + --no-jit If the PCRE2 library is built with support for just-in-time + compiling (which speeds up matching), pcre2grep automatically + makes use of this, unless it was explicitly disabled at build + time. This option can be used to disable the use of JIT at + run time. It is provided for testing and working around prob- + lems. It should never be needed in normal use. + + -O text, --output=text + When there is a match, instead of outputting the line that + matched, output just the text specified in this option, fol- + lowed by an operating-system standard newline. In this mode, + --colour has no effect, and no context is shown. That is, + the -A, -B, and -C options are ignored. The --newline option + has no effect on this option, which is mutually exclusive + with --only-matching, --file-offsets, and --line-offsets. + However, like --only-matching, if there is more than one + match in a line, each of them causes a line of output. + + Escape sequences starting with a dollar character may be used + to insert the contents of the matched part of the line and/or + captured substrings into the text. + + $ or ${} is replaced by the captured sub- + string of the given decimal number; zero substitutes the + whole match. If the number is greater than the number of cap- + turing substrings, or if the capture is unset, the replace- + ment is empty. + + $a is replaced by bell; $b by backspace; $e by escape; $f by + form feed; $n by newline; $r by carriage return; $t by tab; + $v by vertical tab. + + $o or $o{} is replaced by the character whose + code point is the given octal number. In the first form, up + to three octal digits are processed. When more digits are + needed in Unicode mode to specify a wide character, the sec- + ond form must be used. + + $x or $x{} is replaced by the character rep- + resented by the given hexadecimal number. In the first form, + up to two hexadecimal digits are processed. When more digits + are needed in Unicode mode to specify a wide character, the + second form must be used. + + Any other character is substituted by itself. In particular, + $$ is replaced by a single dollar. + + -o, --only-matching + Show only the part of the line that matched a pattern instead + of the whole line. In this mode, no context is shown. That + is, the -A, -B, and -C options are ignored. If there is more + than one match in a line, each of them is shown separately, + on a separate line of output. If -o is combined with -v (in- + vert the sense of the match to find non-matching lines), no + output is generated, but the return code is set appropri- + ately. If the matched portion of the line is empty, nothing + is output unless the file name or line number are being + printed, in which case they are shown on an otherwise empty + line. This option is mutually exclusive with --output, + --file-offsets and --line-offsets. + + -onumber, --only-matching=number + Show only the part of the line that matched the capturing + parentheses of the given number. Up to 50 capturing parenthe- + ses are supported by default. This limit can be changed via + the --om-capture option. A pattern may contain any number of + capturing parentheses, but only those whose number is within + the limit can be accessed by -o. An error occurs if the num- + ber specified by -o is greater than the limit. + + -o0 is the same as -o without a number. Because these options + can be given without an argument (see above), if an argument + is present, it must be given in the same shell item, for ex- + ample, -o3 or --only-matching=2. The comments given for the + non-argument case above also apply to this option. If the + specified capturing parentheses do not exist in the pattern, + or were not set in the match, nothing is output unless the + file name or line number are being output. + + If this option is given multiple times, multiple substrings + are output for each match, in the order the options are + given, and all on one line. For example, -o3 -o1 -o3 causes + the substrings matched by capturing parentheses 3 and 1 and + then 3 again to be output. By default, there is no separator + (but see the next but one option). + + --om-capture=number + Set the number of capturing parentheses that can be accessed + by -o. The default is 50. + + --om-separator=text + Specify a separating string for multiple occurrences of -o. + The default is an empty string. Separating strings are never + coloured. + + -P, --no-ucp + Starting from release 10.43, when UTF/Unicode mode is speci- + fied with -u or -U, the PCRE2_UCP option is used by default. + This means that the POSIX classes in patterns match more than + just ASCII characters. For example, [:digit:] matches any + Unicode decimal digit. The --no-ucp option suppresses + PCRE2_UCP, thus restricting the POSIX classes to ASCII char- + acters, as was the case in earlier releases. Note that there + are now more fine-grained option settings within patterns + that affect individual classes. For example, when in UCP + mode, the sequence (?aP) restricts [:word:] to ASCII letters, + while allowing \w to match Unicode letters and digits. + + -q, --quiet + Work quietly, that is, display nothing except error messages. + The exit status indicates whether or not any matches were + found. + + -r, --recursive + If any given path is a directory, recursively scan the files + it contains, taking note of any --include and --exclude set- + tings. By default, a directory is read as a normal file; in + some operating systems this gives an immediate end-of-file. + This option is a shorthand for setting the -d option to "re- + curse". + + --recursion-limit=number + This is an obsolete synonym for --depth-limit. See --match- + limit above for details. + + -s, --no-messages + Suppress error messages about non-existent or unreadable + files. Such files are quietly skipped. However, the return + code is still 2, even if matches were found in other files. + + -t, --total-count + This option is useful when scanning more than one file. If + used on its own, -t suppresses all output except for a grand + total number of matching lines (or non-matching lines if -v + is used) in all the files. If -t is used with -c, a grand to- + tal is output except when the previous output is just one + line. In other words, it is not output when just one file's + count is listed. If file names are being output, the grand + total is preceded by "TOTAL:". Otherwise, it appears as just + another number. The -t option is ignored when used with -L + (list files without matches), because the grand total would + always be zero. + + -u, --utf Operate in UTF/Unicode mode. This option is available only if + PCRE2 has been compiled with UTF-8 support. All patterns (in- + cluding those for any --exclude and --include options) and + all lines that are scanned must be valid strings of UTF-8 + characters. If an invalid UTF-8 string is encountered, an er- + ror occurs. + + -U, --utf-allow-invalid + As --utf, but in addition subject lines may contain invalid + UTF-8 code unit sequences. These can never form part of any + pattern match. Patterns themselves, however, must still be + valid UTF-8 strings. This facility allows valid UTF-8 strings + to be sought within arbitrary byte sequences in executable or + other binary files. For more details about matching in non- + valid UTF-8 strings, see the pcre2unicode(3) documentation. + + -V, --version + Write the version numbers of pcre2grep and the PCRE2 library + to the standard output and then exit. Anything else on the + command line is ignored. + + -v, --invert-match + Invert the sense of the match, so that lines which do not + match any of the patterns are the ones that are found. When + this option is set, options such as --only-matching and + --output, which specify parts of a match that are to be out- + put, are ignored. + + -w, --word-regex, --word-regexp + Force the patterns only to match "words". That is, there must + be a word boundary at the start and end of each matched + string. This is equivalent to having "\b(?:" at the start of + each pattern, and ")\b" at the end. This option applies only + to the patterns that are matched against the contents of + files; it does not apply to patterns specified by any of the + --include or --exclude options. + + -x, --line-regex, --line-regexp + Force the patterns to start matching only at the beginnings + of lines, and in addition, require them to match entire + lines. In multiline mode the match may be more than one line. + This is equivalent to having "^(?:" at the start of each pat- + tern and ")$" at the end. This option applies only to the + patterns that are matched against the contents of files; it + does not apply to patterns specified by any of the --include + or --exclude options. + + -Z, --null + Terminate files names in the regular output with a zero byte + (the NUL character) instead of what would normally appear. + This is useful when file names contain unusual characters + such as colons, hyphens, or even newlines. The option does + not apply to file names in error messages. + + +ENVIRONMENT VARIABLES + + The environment variables LC_ALL and LC_CTYPE are examined, in that or- + der, for a locale. The first one that is set is used. This can be over- + ridden by the --locale option. If no locale is set, the PCRE2 library's + default (usually the "C" locale) is used. + + +NEWLINES + + The -N (--newline) option allows pcre2grep to scan files with newline + conventions that differ from the default. This option affects only the + way scanned files are processed. It does not affect the interpretation + of files specified by the -f, --file-list, --exclude-from, or --in- + clude-from options. + + Any parts of the scanned input files that are written to the standard + output are copied with whatever newline sequences they have in the in- + put. However, if the final line of a file is output, and it does not + end with a newline sequence, a newline sequence is added. If the new- + line setting is CR, LF, CRLF or NUL, that line ending is output; for + the other settings (ANYCRLF or ANY) a single NL is used. + + The newline setting does not affect the way in which pcre2grep writes + newlines in informational messages to the standard output and error + streams. Under Windows, the standard output is set to be binary, so + that "\r\n" at the ends of output lines that are copied from the input + is not converted to "\r\r\n" by the C I/O library. This means that any + messages written to the standard output must end with "\r\n". For all + other operating systems, and for all messages to the standard error + stream, "\n" is used. + + +OPTIONS COMPATIBILITY WITH GNU GREP + + Many of the short and long forms of pcre2grep's options are the same as + in the GNU grep program. Any long option of the form --xxx-regexp (GNU + terminology) is also available as --xxx-regex (PCRE2 terminology). + However, the --case-restrict, --depth-limit, -E, --file-list, --file- + offsets, --heap-limit, --include-dir, --line-offsets, --locale, + --match-limit, -M, --multiline, -N, --newline, --no-ucp, --om-separa- + tor, --output, -P, -u, --utf, -U, and --utf-allow-invalid options are + specific to pcre2grep, as is the use of the --only-matching option with + a capturing parentheses number. + + Although most of the common options work the same way, a few are dif- + ferent in pcre2grep. For example, the --include option's argument is a + glob for GNU grep, but in pcre2grep it is a regular expression to which + the -i option applies. If both the -c and -l options are given, GNU + grep lists only file names, without counts, but pcre2grep gives the + counts as well. + + +OPTIONS WITH DATA + + There are four different ways in which an option with data can be spec- + ified. If a short form option is used, the data may follow immedi- + ately, or (with one exception) in the next command line item. For exam- + ple: + + -f/some/file + -f /some/file + + The exception is the -o option, which may appear with or without data. + Because of this, if data is present, it must follow immediately in the + same item, for example -o3. + + If a long form option is used, the data may appear in the same command + line item, separated by an equals character, or (with two exceptions) + it may appear in the next command line item. For example: + + --file=/some/file + --file /some/file + + Note, however, that if you want to supply a file name beginning with ~ + as data in a shell command, and have the shell expand ~ to a home di- + rectory, you must separate the file name from the option, because the + shell does not treat ~ specially unless it is at the start of an item. + + The exceptions to the above are the --colour (or --color) and --only- + matching options, for which the data is optional. If one of these op- + tions does have data, it must be given in the first form, using an + equals character. Otherwise pcre2grep will assume that it has no data. + + +USING PCRE2'S CALLOUT FACILITY + + pcre2grep has, by default, support for calling external programs or + scripts or echoing specific strings during matching by making use of + PCRE2's callout facility. However, this support can be completely or + partially disabled when pcre2grep is built. You can find out whether + your binary has support for callouts by running it with the --help op- + tion. If callout support is completely disabled, all callouts in pat- + terns are ignored by pcre2grep. If the facility is partially disabled, + calling external programs is not supported, and callouts that request + it are ignored. + + A callout in a PCRE2 pattern is of the form (?C) where the argu- + ment is either a number or a quoted string (see the pcre2callout docu- + mentation for details). Numbered callouts are ignored by pcre2grep; + only callouts with string arguments are useful. + + Echoing a specific string + + Starting the callout string with a pipe character invokes an echoing + facility that avoids calling an external program or script. This facil- + ity is always available, provided that callouts were not completely + disabled when pcre2grep was built. The rest of the callout string is + processed as a zero-terminated string, which means it should not con- + tain any internal binary zeros. It is written to the output, having + first been passed through the same escape processing as text from the + --output (-O) option (see above). However, $0 cannot be used to insert + a matched substring because the match is still in progress. Instead, + the single character '0' is inserted. Any syntax errors in the string + (for example, a dollar not followed by another character) causes the + callout to be ignored. No terminator is added to the output string, so + if you want a newline, you must include it explicitly using the escape + $n. For example: + + pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' + + Matching continues normally after the string is output. If you want to + see only the callout output but not any output from an actual match, + you should end the pattern with (*FAIL). + + Calling external programs or scripts + + This facility can be independently disabled when pcre2grep is built. It + is supported for Windows, where a call to _spawnvp() is used, for VMS, + where lib$spawn() is used, and for any Unix-like environment where + fork() and execv() are available. + + If the callout string does not start with a pipe (vertical bar) charac- + ter, it is parsed into a list of substrings separated by pipe charac- + ters. The first substring must be an executable name, with the follow- + ing substrings specifying arguments: + + executable_name|arg1|arg2|... + + Any substring (including the executable name) may contain escape se- + quences started by a dollar character. These are the same as for the + --output (-O) option documented above, except that $0 cannot insert the + matched string because the match is still in progress. Instead, the + character '0' is inserted. If you need a literal dollar or pipe charac- + ter in any substring, use $$ or $| respectively. Here is an example: + + echo -e "abcde\n12345" | pcre2grep \ + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + + The parameters for the system call that is used to run the program or + script are zero-terminated strings. This means that binary zero charac- + ters in the callout argument will cause premature termination of their + substrings, and therefore should not be present. Any syntax errors in + the string (for example, a dollar not followed by another character) + causes the callout to be ignored. If running the program fails for any + reason (including the non-existence of the executable), a local match- + ing failure occurs and the matcher backtracks in the normal way. + + +MATCHING ERRORS + + It is possible to supply a regular expression that takes a very long + time to fail to match certain lines. Such patterns normally involve + nested indefinite repeats, for example: (a+)*\d when matched against a + line of a's with no final digit. The PCRE2 matching function has a re- + source limit that causes it to abort in these circumstances. If this + happens, pcre2grep outputs an error message and the line that caused + the problem to the standard error stream. If there are more than 20 + such errors, pcre2grep gives up. + + The --match-limit option of pcre2grep can be used to set the overall + resource limit. There are also other limits that affect the amount of + memory used during matching; see the discussion of --heap-limit and + --depth-limit above. + + +DIAGNOSTICS + + Exit status is 0 if any matches were found, 1 if no matches were found, + and 2 for syntax errors, overlong lines, non-existent or inaccessible + files (even if matches were found in other files) or too many matching + errors. Using the -s option to suppress error messages about inaccessi- + ble files does not affect the return code. + + When run under VMS, the return code is placed in the symbol + PCRE2GREP_RC because VMS does not distinguish between exit(0) and + exit(1). + + +SEE ALSO + + pcre2pattern(3), pcre2syntax(3), pcre2callout(3), pcre2unicode(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 22 December 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.43 22 December 2023 PCRE2GREP(1) diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3 new file mode 100644 index 0000000..8798089 --- /dev/null +++ b/doc/pcre2jit.3 @@ -0,0 +1,481 @@ +.TH PCRE2JIT 3 "21 February 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time compiling is a heavyweight optimization that can greatly speed up +pattern matching. However, it comes at the cost of extra processing before the +match is performed, so it is of most benefit when the same pattern is going to +be matched many times. This does not necessarily mean many calls of a matching +function; if the pattern is not anchored, matching attempts may take place many +times at various positions in the subject, even for a single call. Therefore, +if the subject string is very long, it may still pay to use JIT even for +one-off matches. JIT support is available for all of the 8-bit, 16-bit and +32-bit PCRE2 libraries. +.P +JIT support applies only to the traditional Perl-compatible matching function. +It does not apply when the DFA matching function is being used. The code for +JIT support was written by Zoltan Herczeg. +. +. +.SH "AVAILABILITY OF JIT SUPPORT" +.rs +.sp +JIT support is an optional feature of PCRE2. The "configure" option +--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if +you want to use JIT. The support is limited to the following hardware +platforms: +.sp + ARM 32-bit (v7, and Thumb2) + ARM 64-bit + IBM s390x 64 bit + Intel x86 32-bit and 64-bit + LoongArch 64 bit + MIPS 32-bit and 64-bit + Power PC 32-bit and 64-bit + RISC-V 32-bit and 64-bit +.sp +If --enable-jit is set on an unsupported platform, compilation fails. +.P +A client program can tell if JIT support is available by calling +\fBpcre2_config()\fP with the PCRE2_CONFIG_JIT option. The result is one if +PCRE2 was built with JIT support, and zero otherwise. However, having the JIT +code available does not guarantee that it will be used for any particular +match. One reason for this is that there are a number of options and pattern +items that are +.\" HTML +.\" +not supported by JIT +.\" +(see below). Another reason is that in some environments JIT is unable to get +memory in which to build its compiled code. The only guarantee from +\fBpcre2_config()\fP is that if it returns zero, JIT will definitely \fInot\fP +be used. +.P +A simple program does not need to check availability in order to use JIT when +possible. The API is implemented in a way that falls back to the interpretive +code if JIT is not available or cannot be used for a given match. For programs +that need the best possible performance, there is a +.\" HTML +.\" +"fast path" +.\" +API that is JIT-specific. +. +. +.SH "SIMPLE USE OF JIT" +.rs +.sp +To make use of the JIT support in the simplest way, all you have to do is to +call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with +\fBpcre2_compile()\fP. This function has two arguments: the first is the +compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the +second is zero or more of the following option bits: PCRE2_JIT_COMPLETE, +PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. +.P +If JIT support is not available, a call to \fBpcre2_jit_compile()\fP does +nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern +is passed to the JIT compiler, which turns it into machine code that executes +much faster than the normal interpretive code, but yields exactly the same +results. The returned value from \fBpcre2_jit_compile()\fP is zero on success, +or a negative error code. +.P +There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to \fBpcre2_jit_compile()\fP returns +PCRE2_ERROR_NOMEMORY. +.P +PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete +matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or +PCRE2_PARTIAL_SOFT options of \fBpcre2_match()\fP, you should set one or both +of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT +compiler generates different optimized code for each of the three modes +(normal, soft partial, hard partial). When \fBpcre2_match()\fP is called, the +appropriate code is run if it is available. Otherwise, the pattern is matched +using interpretive code. +.P +You can call \fBpcre2_jit_compile()\fP multiple times for the same compiled +pattern. It does nothing if it has previously compiled code for any of the +option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and +(perhaps later, when you find you need partial matching) again with +PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore +PCRE2_JIT_COMPLETE and just compile code for partial matching. If +\fBpcre2_jit_compile()\fP is called with no option bits set, it immediately +returns zero. This is an alternative way of testing whether JIT is available. +.P +At present, it is not possible to free JIT compiled code except when the entire +compiled pattern is freed by calling \fBpcre2_code_free()\fP. +.P +In some circumstances you may need to call additional functions. These are +described in the section entitled +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below. +.P +There are some \fBpcre2_match()\fP options that are not supported by JIT, and +there are also some pattern items that JIT cannot handle. Details are given +.\" HTML +.\" +below. +.\" +In both cases, matching automatically falls back to the interpretive code. If +you want to know whether JIT was actually used for a particular match, you +should arrange for a JIT callback function to be set up as described in the +section entitled +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below, even if you do not need to supply a non-default JIT stack. Such a +callback function is called whenever JIT code is about to be obeyed. If the +match-time options are not right for JIT execution, the callback function is +not obeyed. +.P +If the JIT compiler finds an unsupported item, no JIT data is generated. You +can find out if JIT compilation was successful for a compiled pattern by +calling \fBpcre2_pattern_info()\fP with the PCRE2_INFO_JITSIZE option. A +non-zero result means that JIT compilation was successful. A result of 0 means +that JIT support is not available, or the pattern was not processed by +\fBpcre2_jit_compile()\fP, or the JIT compiler was not able to handle the +pattern. Successful JIT compilation does not, however, guarantee the use of JIT +at match time because there are some match time options that are not supported +by JIT. +. +. +.SH "MATCHING SUBJECTS CONTAINING INVALID UTF" +.rs +.sp +When a pattern is compiled with the PCRE2_UTF option, subject strings are +normally expected to be a valid sequence of UTF code units. By default, this is +checked at the start of matching and an error is generated if invalid UTF is +detected. The PCRE2_NO_UTF_CHECK option can be passed to \fBpcre2_match()\fP to +skip the check (for improved performance) if you are sure that a subject string +is valid. If this option is used with an invalid string, the result is +undefined. The calling program may crash or loop or otherwise misbehave. +.P +However, a way of running matches on strings that may contain invalid UTF +sequences is available. Calling \fBpcre2_compile()\fP with the +PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in +\fBpcre2_match()\fP to support invalid UTF, and, if \fBpcre2_jit_compile()\fP +is subsequently called, the compiled JIT code also supports invalid UTF. +Details of how this support works, in both the JIT and the interpretive cases, +is given in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +There is also an obsolete option for \fBpcre2_jit_compile()\fP called +PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility. +It is superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF +and should no longer be used. It may be removed in future. +. +. +.\" HTML +.SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS" +.rs +.sp +The \fBpcre2_match()\fP options that are supported for JIT matching are +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and +PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not +supported at match time. +.P +If the PCRE2_NO_JIT option is passed to \fBpcre2_match()\fP it disables the +use of JIT, forcing matching by the interpreter code. +.P +The only unsupported pattern items are \eC (match a single data unit) when +running in a UTF mode, and a callout immediately before an assertion condition +in a conditional group. +. +. +.SH "RETURN VALUES FROM JIT MATCHING" +.rs +.sp +When a pattern is matched using JIT, the return values are the same as those +given by the interpretive \fBpcre2_match()\fP code, with the addition of one +new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for +the JIT stack was insufficient. See +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below for a discussion of JIT stack usage. +.P +The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching +a very large pattern tree goes on for too long, as it is in the same +circumstance when JIT is not used, but the details of exactly what is counted +are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned +when JIT matching is used. +. +. +.\" HTML +.SH "CONTROLLING THE JIT STACK" +.rs +.sp +When the compiled JIT code runs, it needs a block of memory to use as a stack. +By default, it uses 32KiB on the machine stack. However, some large or +complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT +is given when there is not enough stack. Three functions are provided for +managing blocks of memory for use as JIT stacks. There is further discussion +about the use of JIT stacks in the section entitled +.\" HTML +.\" +"JIT stack FAQ" +.\" +below. +.P +The \fBpcre2_jit_stack_create()\fP function creates a JIT stack. Its arguments +are a starting size, a maximum size, and a general context (for memory +allocation functions, or NULL for standard memory allocation). It returns a +pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there +is an error. The \fBpcre2_jit_stack_free()\fP function is used to free a stack +that is no longer needed. If its argument is NULL, this function returns +immediately, without doing anything. (For the technically minded: the address +space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to +1MiB should be more than enough for any pattern. +.P +The \fBpcre2_jit_stack_assign()\fP function specifies which stack JIT code +should use. Its arguments are as follows: +.sp + pcre2_match_context *mcontext + pcre2_jit_callback callback + void *data +.sp +The first argument is a pointer to a match context. When this is subsequently +passed to a matching function, its information determines which JIT stack is +used. If this argument is NULL, the function returns immediately, without doing +anything. There are three cases for the values of the other two options: +.sp + (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32KiB block + on the machine stack is used. This is the default when a match + context is created. +.sp + (2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be + a pointer to a valid JIT stack, the result of calling + \fBpcre2_jit_stack_create()\fP. +.sp + (3) If \fIcallback\fP is not NULL, it must point to a function that is + called with \fIdata\fP as an argument at the start of matching, in + order to set up a JIT stack. If the return from the callback + function is NULL, the internal 32KiB stack is used; otherwise the + return value must be a valid JIT stack, the result of calling + \fBpcre2_jit_stack_create()\fP. +.sp +A callback function is obeyed whenever JIT code is about to be run; it is not +obeyed when \fBpcre2_match()\fP is called with options that are incompatible +for JIT matching. A callback function can therefore be used to determine +whether a match operation was executed by JIT or by the interpreter. +.P +You may safely use the same JIT stack for more than one pattern (either by +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +.P +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. +.P +Strictly speaking, even more is allowed. You can assign the same non-NULL stack +to a match context that is used by any number of patterns, as long as they are +not used for matching by multiple threads at the same time. For example, you +could use the same stack in all compiled patterns, with a global mutex in the +callback to wait until the stack is available for use. However, this is an +inefficient solution, and not recommended. +.P +This is a suggestion for how a multithreaded program that needs to set up +non-default JIT stacks might operate: +.sp + During thread initialization + thread_local_var = pcre2_jit_stack_create(...) +.sp + During thread exit + pcre2_jit_stack_free(thread_local_var) +.sp + Use a one-line callback function + return thread_local_var +.sp +All the functions described in this section do nothing if JIT is not available. +. +. +.\" HTML +.SH "JIT STACK FAQ" +.rs +.sp +(1) Why do we need JIT stacks? +.sp +PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where +the local data of the current node is pushed before checking its child nodes. +Allocating real machine stack on some platforms is difficult. For example, the +stack chain needs to be updated every time if we extend the stack on PowerPC. +Although it is possible, its updating time overhead decreases performance. So +we do the recursion in memory. +.P +(2) Why don't we simply allocate blocks of memory with \fBmalloc()\fP? +.sp +Modern operating systems have a nice feature: they can reserve an address space +instead of allocating memory. We can safely allocate memory pages inside this +address space, so the stack could grow without moving memory data (this is +important because of pointers). Thus we can allocate 1MiB address space, and +use only a single memory page (usually 4KiB) if that is enough. However, we can +still grow up to 1MiB anytime if needed. +.P +(3) Who "owns" a JIT stack? +.sp +The owner of the stack is the user program, not the JIT studied pattern or +anything else. The user program must ensure that if a stack is being used by +\fBpcre2_match()\fP, (that is, it is assigned to a match context that is passed +to the pattern currently running), that stack must not be used by any other +threads (to avoid overwriting the same memory area). The best practice for +multithreaded programs is to allocate a stack for each thread, and return this +stack through the JIT callback function. +.P +(4) When should a JIT stack be freed? +.sp +You can free a JIT stack at any time, as long as it will not be used by +\fBpcre2_match()\fP again. When you assign the stack to a match context, only a +pointer is set. There is no reference counting or any other magic. You can free +compiled patterns, contexts, and stacks in any order, anytime. +Just \fIdo not\fP call \fBpcre2_match()\fP with a match context pointing to an +already freed stack, as that will cause SEGFAULT. (Also, do not free a stack +currently used by \fBpcre2_match()\fP in another thread). You can also replace +the stack in a context at any time when it is not in use. You should free the +previous stack before assigning a replacement. +.P +(5) Should I allocate/free a stack every time before/after calling +\fBpcre2_match()\fP? +.sp +No, because this is too costly in terms of resources. However, you could +implement some clever idea which release the stack if it is not used in let's +say two minutes. The JIT callback can help to achieve this without keeping a +list of patterns. +.P +(6) OK, the stack is for long term memory allocation. But what happens if a +pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the +stack is freed? +.sp +Especially on embedded systems, it might be a good idea to release memory +sometimes without freeing the stack. There is no API for this at the moment. +Probably a function call which returns with the currently allocated memory for +any stack and another which allows releasing memory (shrinking the stack) would +be a good idea if someone needs this. +.P +(7) This is too much of a headache. Isn't there any better solution for JIT +stack handling? +.sp +No, thanks to Windows. If POSIX threads were used everywhere, we could throw +out this complicated API. +. +. +.SH "FREEING JIT SPECULATIVE MEMORY" +.rs +.sp +.nf +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.fi +.P +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve +allocation speed. However, in low memory conditions, it might be better to free +all possible memory. You can cause this to happen by calling +pcre2_jit_free_unused_memory(). Its argument is a general context, for custom +memory management, or NULL for standard memory management. +. +. +.SH "EXAMPLE CODE" +.rs +.sp +This is a single-threaded example that specifies a JIT stack without using a +callback. A real program should include error checking after all the function +calls. +.sp + int rc; + pcre2_code *re; + pcre2_match_data *match_data; + pcre2_match_context *mcontext; + pcre2_jit_stack *jit_stack; +.sp + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + &errornumber, &erroffset, NULL); + rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); + mcontext = pcre2_match_context_create(NULL); + jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(mcontext, NULL, jit_stack); + match_data = pcre2_match_data_create(re, 10); + rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext); + /* Process result */ +.sp + pcre2_code_free(re); + pcre2_match_data_free(match_data); + pcre2_match_context_free(mcontext); + pcre2_jit_stack_free(jit_stack); +.sp +. +. +.\" HTML +.SH "JIT FAST PATH API" +.rs +.sp +Because the API described above falls back to interpreted matching when JIT is +not available, it is convenient for programs that are written for general use +in many environments. However, calling JIT via \fBpcre2_match()\fP does have a +performance impact. Programs that are written for use where JIT is known to be +available, and which need the best possible performance, can instead use a +"fast path" API to call JIT matching directly instead of calling +\fBpcre2_match()\fP (obviously only for patterns that have been successfully +processed by \fBpcre2_jit_compile()\fP). +.P +The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly +the same arguments as \fBpcre2_match()\fP. However, the subject string must be +specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported +option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as +is the PCRE2_NO_JIT option. The return values are also the same as for +\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial +or complete) is requested that was not compiled. +.P +When you call \fBpcre2_match()\fP, as well as testing for invalid options, a +number of other sanity checks are performed on the arguments. For example, if +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not +set for \fBpcre2_compile()\fP, the result is undefined. The program may crash +or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should call \fBpcre2_jit_match()\fP in UTF mode only if you are sure the +subject is valid. +.P +Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give +speedups of more than 10%. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2unicode\fP(3) +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel (FAQ by Zoltan Herczeg) +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 21 February 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2limits.3 b/doc/pcre2limits.3 new file mode 100644 index 0000000..a1ea3ea --- /dev/null +++ b/doc/pcre2limits.3 @@ -0,0 +1,81 @@ +.TH PCRE2LIMITS 3 "1 August 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SIZE AND OTHER LIMITATIONS" +.rs +.sp +There are some size limitations in PCRE2 but it is hoped that they will never +in practice be relevant. +.P +The maximum size of a compiled pattern is approximately 64 thousand code units +for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default +internal linkage size, which is 2 bytes for these libraries. If you want to +process regular expressions that are truly enormous, you can compile PCRE2 with +an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is +rounded up to 4). See the \fBREADME\fP file in the source distribution and the +.\" HREF +\fBpcre2build\fP +.\" +documentation for details. In these cases the limit is substantially larger. +However, the speed of execution is slower. In the 32-bit library, the internal +linkage size is always 4. +.P +The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls \fBpcre2_compile()\fP can specify a smaller limit. +.P +The maximum length (in code units) of a subject string is one less than the +largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned +integer type, usually defined as size_t. Its maximum value (that is +~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated strings +and unset offsets. +.P +All values in repeating quantifiers must be less than 65536. +.P +There are two different limits that apply to branches of lookbehind assertions. +If every branch in such an assertion matches a fixed number of characters, +the maximum length of any branch is 65535 characters. If any branch matches a +variable number of characters, then the maximum matching length for every +branch is limited. The default limit is set at compile time, defaulting to 255, +but can be changed by the calling program. +.P +There is no limit to the number of parenthesized groups, but there can be no +more than 65535 capture groups, and there is a limit to the depth of nesting of +parenthesized subpatterns of all kinds. This is imposed in order to limit the +amount of system stack used at compile time. The default limit can be specified +when PCRE2 is built; if not, the default is set to 250. An application can +change this limit by calling pcre2_set_parens_nest_limit() to set the limit in +a compile context. +.P +The maximum length of name for a named capture group is 32 code units, and the +maximum number of such groups is 10000. +.P +The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +.P +The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold. +.P +The maximum amount of heap memory used for matching is controlled by the heap +limit, which can be set in a pattern or in a match context. The default is a +very large number, effectively unlimited. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: August 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/doc/pcre2matching.3 b/doc/pcre2matching.3 new file mode 100644 index 0000000..96800ef --- /dev/null +++ b/doc/pcre2matching.3 @@ -0,0 +1,222 @@ +.TH PCRE2MATCHING 3 "19 January 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 MATCHING ALGORITHMS" +.rs +.sp +This document describes the two different algorithms that are available in +PCRE2 for matching a compiled regular expression against a given subject +string. The "standard" algorithm is the one provided by the \fBpcre2_match()\fP +function. This works in the same as Perl's matching function, and provide a +Perl-compatible matching operation. The just-in-time (JIT) optimization that is +described in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation is compatible with this function. +.P +An alternative algorithm is provided by the \fBpcre2_dfa_match()\fP function; +it operates in a different way, and is not Perl-compatible. This alternative +has advantages and disadvantages compared with the standard algorithm, and +these are described below. +.P +When there is only one possible way in which a given subject string can match a +pattern, the two algorithms give the same answer. A difference arises, however, +when there are multiple possibilities. For example, if the pattern +.sp + ^<.*> +.sp +is matched against the string +.sp + +.sp +there are three possible answers. The standard algorithm finds only one of +them, whereas the alternative algorithm finds all three. +. +. +.SH "REGULAR EXPRESSIONS AS TREES" +.rs +.sp +The set of strings that are matched by a regular expression can be represented +as a tree structure. An unlimited repetition in the pattern makes the tree of +infinite size, but it is still a tree. Matching the pattern to a given subject +string (from a given starting point) can be thought of as a search of the tree. +There are two ways to search a tree: depth-first and breadth-first, and these +correspond to the two matching algorithms provided by PCRE2. +. +. +.SH "THE STANDARD MATCHING ALGORITHM" +.rs +.sp +In the terminology of Jeffrey Friedl's book "Mastering Regular Expressions", +the standard algorithm is an "NFA algorithm". It conducts a depth-first search +of the pattern tree. That is, it proceeds along a single path through the tree, +checking that the subject matches what is required. When there is a mismatch, +the algorithm tries any alternatives at the current point, and if they all +fail, it backs up to the previous branch point in the tree, and tries the next +alternative branch at that level. This often involves backing up (moving to the +left) in the subject string as well. The order in which repetition branches are +tried is controlled by the greedy or ungreedy nature of the quantifier. +.P +If a leaf node is reached, a matching string has been found, and at that point +the algorithm stops. Thus, if there is more than one possible match, this +algorithm returns the first one that it finds. Whether this is the shortest, +the longest, or some intermediate length depends on the way the alternations +and the greedy or ungreedy repetition quantifiers are specified in the +pattern. +.P +Because it ends up with a single path through the tree, it is relatively +straightforward for this algorithm to keep track of the substrings that are +matched by portions of the pattern in parentheses. This provides support for +capturing parentheses and backreferences. +. +. +.SH "THE ALTERNATIVE MATCHING ALGORITHM" +.rs +.sp +This algorithm conducts a breadth-first search of the tree. Starting from the +first matching point in the subject, it scans the subject string from left to +right, once, character by character, and as it does this, it remembers all the +paths through the tree that represent valid matches. In Friedl's terminology, +this is a kind of "DFA algorithm", though it is not implemented as a +traditional finite state machine (it keeps multiple states active +simultaneously). +.P +Although the general principle of this matching algorithm is that it scans the +subject string only once, without backtracking, there is one exception: when a +lookaround assertion is encountered, the characters following or preceding the +current point have to be independently inspected. +.P +The scan continues until either the end of the subject is reached, or there are +no more unterminated paths. At this point, terminated paths represent the +different matching possibilities (if there are none, the match has failed). +Thus, if there is more than one possible match, this algorithm finds all of +them, and in particular, it finds the longest. The matches are returned in +the output vector in decreasing order of length. There is an option to stop the +algorithm after the first match (which is necessarily the shortest) is found. +.P +Note that the size of vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match +data block is therefore not advisable when doing DFA matching. +.P +Note also that all the matches that are found start at the same point in the +subject. If the pattern +.sp + cat(er(pillar)?)? +.sp +is matched against the string "the caterpillar catchment", the result is the +three strings "caterpillar", "cater", and "cat" that start at the fifth +character of the subject. The algorithm does not automatically move on to find +matches that start at later positions. +.P +PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\ed+?") or set the PCRE2_NO_AUTO_POSSESS option when compiling. +.P +There are a number of features of PCRE2 regular expressions that are not +supported or behave differently in the alternative matching function. Those +that are not supported cause an error if encountered. +.P +1. Because the algorithm finds all possible matches, the greedy or ungreedy +nature of repetition quantifiers is not relevant (though it may affect +auto-possessification, as just described). During matching, greedy and ungreedy +quantifiers are treated in exactly the same way. However, possessive +quantifiers can make a difference when what follows could also match what is +quantified, for example in a pattern like this: +.sp + ^a++\ew! +.sp +This pattern matches "aaab!" but not "aaa!", which would be matched by a +non-possessive quantifier. Similarly, if an atomic group is present, it is +matched as if it were a standalone pattern at the current point, and the +longest match is then "locked in" for the rest of the overall pattern. +.P +2. When dealing with multiple paths through the tree simultaneously, it is not +straightforward to keep track of captured substrings for the different matching +possibilities, and PCRE2's implementation of this algorithm does not attempt to +do this. This means that no captured substrings are available. +.P +3. Because no substrings are captured, backreferences within the pattern are +not supported. +.P +4. For the same reason, conditional expressions that use a backreference as the +condition or test for a specific group recursion are not supported. +.P +5. Again for the same reason, script runs are not supported. +.P +6. Because many paths through the tree may be active, the \eK escape sequence, +which resets the start of the match when encountered (but may be on some paths +and not on others), is not supported. +.P +7. Callouts are supported, but the value of the \fIcapture_top\fP field is +always 1, and the value of the \fIcapture_last\fP field is always 0. +.P +8. The \eC escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in these modes, because +the alternative algorithm moves through the subject string one character (not +code unit) at a time, for all active paths through the tree. +.P +9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +supported. (*FAIL) is supported, and behaves like a failing negative assertion. +.P +10. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not +supported by \fBpcre2_dfa_match()\fP. +. +. +.SH "ADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +The main advantage of the alternative algorithm is that all possible matches +(at a single point in the subject) are automatically found, and in particular, +the longest match is found. To find more than one match at the same point using +the standard algorithm, you have to do kludgy things with callouts. +.P +Partial matching is possible with this algorithm, though it has some +limitations. The +.\" HREF +\fBpcre2partial\fP +.\" +documentation gives details of partial matching and discusses multi-segment +matching. +. +. +.SH "DISADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +The alternative algorithm suffers from a number of disadvantages: +.P +1. It is substantially slower than the standard algorithm. This is partly +because it has to search for all possible matches, but is also because it is +less susceptible to optimization. +.P +2. Capturing parentheses, backreferences, script runs, and matching within +invalid UTF string are not supported. +.P +3. Although atomic groups are supported, their use does not provide the +performance advantage that it does for the standard algorithm. +.P +4. JIT optimization is not supported. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 19 January 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2partial.3 b/doc/pcre2partial.3 new file mode 100644 index 0000000..cf8006d --- /dev/null +++ b/doc/pcre2partial.3 @@ -0,0 +1,373 @@ +.TH PCRE2PARTIAL 3 "04 September 2019" "PCRE2 10.34" +.SH NAME +PCRE2 - Perl-compatible regular expressions +.SH "PARTIAL MATCHING IN PCRE2" +.rs +.sp +In normal use of PCRE2, if there is a match up to the end of a subject string, +but more characters are needed to match the entire pattern, PCRE2_ERROR_NOMATCH +is returned, just like any other failing match. There are circumstances where +it might be helpful to distinguish this "partial match" case. +.P +One example is an application where the subject string is very long, and not +all available at once. The requirement here is to be able to do the matching +segment by segment, but special action is needed when a matched substring spans +the boundary between two segments. +.P +Another example is checking a user input string as it is typed, to ensure that +it conforms to a required format. Invalid characters can be immediately +diagnosed and rejected, giving instant feedback. +.P +Partial matching is a PCRE2-specific feature; it is not Perl-compatible. It is +requested by setting one of the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT +options when calling a matching function. The difference between the two +options is whether or not a partial match is preferred to an alternative +complete match, though the details differ between the two types of matching +function. If both options are set, PCRE2_PARTIAL_HARD takes precedence. +.P +If you want to use partial matching with just-in-time optimized code, as well +as setting a partial match option for the matching function, you must also call +\fBpcre2_jit_compile()\fP with one or both of these options: +.sp + PCRE2_JIT_PARTIAL_HARD + PCRE2_JIT_PARTIAL_SOFT +.sp +PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial +matches on the same pattern. Separate code is compiled for each mode. If the +appropriate JIT mode has not been compiled, interpretive matching code is used. +.P +Setting a partial matching option disables two of PCRE2's standard +optimization hints. PCRE2 remembers the last literal code unit in a pattern, +and abandons matching immediately if it is not present in the subject string. +This optimization cannot be used for a subject string that might match only +partially. PCRE2 also remembers a minimum length of a matching string, and does +not bother to run the matching function on shorter strings. This optimization +is also disabled for partial matching. +. +. +.SH "REQUIREMENTS FOR A PARTIAL MATCH" +.rs +.sp +A possible partial match occurs during matching when the end of the subject +string is reached successfully, but either more characters are needed to +complete the match, or the addition of more characters might change what is +matched. +.P +Example 1: if the pattern is /abc/ and the subject is "ab", more characters are +definitely needed to complete a match. In this case both hard and soft matching +options yield a partial match. +.P +Example 2: if the pattern is /ab+/ and the subject is "ab", a complete match +can be found, but the addition of more characters might change what is +matched. In this case, only PCRE2_PARTIAL_HARD returns a partial match; +PCRE2_PARTIAL_SOFT returns the complete match. +.P +On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if the next +pattern item is \ez, \eZ, \eb, \eB, or $ there is always a partial match. +Otherwise, for both options, the next pattern item must be one that inspects a +character, and at least one of the following must be true: +.P +(1) At least one character has already been inspected. An inspected character +need not form part of the final matched string; lookbehind assertions and the +\eK escape sequence provide ways of inspecting characters before the start of a +matched string. +.P +(2) The pattern contains one or more lookbehind assertions. This condition +exists in case there is a lookbehind that inspects characters before the start +of the match. +.P +(3) There is a special case when the whole pattern can match an empty string. +When the starting point is at the end of the subject, the empty string match is +a possibility, and if PCRE2_PARTIAL_SOFT is set and neither of the above +conditions is true, it is returned. However, because adding more characters +might result in a non-empty match, PCRE2_PARTIAL_HARD returns a partial match, +which in this case means "there is going to be a match at this point, but until +some more characters are added, we do not know if it will be an empty string or +something longer". +. +. +. +.SH "PARTIAL MATCHING USING pcre2_match()" +.rs +.sp +When a partial matching option is set, the result of calling +\fBpcre2_match()\fP can be one of the following: +.TP 2 +\fBA successful match\fP +A complete match has been found, starting and ending within this subject. +.TP +\fBPCRE2_ERROR_NOMATCH\fP +No match can start anywhere in this subject. +.TP +\fBPCRE2_ERROR_PARTIAL\fP +Adding more characters may result in a complete match that uses one or more +characters from the end of this subject. +.P +When a partial match is returned, the first two elements in the ovector point +to the portion of the subject that was matched, but the values in the rest of +the ovector are undefined. The appearance of \eK in the pattern has no effect +for a partial match. Consider this pattern: +.sp + /abc\eK123/ +.sp +If it is matched against "456abc123xyz" the result is a complete match, and the +ovector defines the matched string as "123", because \eK resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all +these characters are needed for a subsequent re-match with additional +characters. +.P +If there is more than one partial match, the first one that was found provides +the data that is returned. Consider this pattern: +.sp + /123\ew+X|dogY/ +.sp +If this is matched against the subject string "abc123dog", both alternatives +fail to match, but the end of the subject is reached during matching, so +PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, identifying +"123dog" as the first partial match. (In this example, there are two partial +matches, because "dog" on its own partially matches the second alternative.) +. +. +.SS "How a partial match is processed by pcre2_match()" +.rs +.sp +What happens when a partial match is identified depends on which of the two +partial matching options is set. +.P +If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon as a +partial match is found, without continuing to search for possible complete +matches. This option is "hard" because it prefers an earlier partial match over +a later complete match. For this reason, the assumption is made that the end of +the supplied subject string is not the true end of the available data, which is +why \ez, \eZ, \eb, \eB, and $ always give a partial match. +.P +If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but matching +continues as normal, and other alternatives in the pattern are tried. If no +complete match can be found, PCRE2_ERROR_PARTIAL is returned instead of +PCRE2_ERROR_NOMATCH. This option is "soft" because it prefers a complete match +over a partial match. All the various matching items in a pattern behave as if +the subject string is potentially complete; \ez, \eZ, and $ match at the end of +the subject, as normal, and for \eb and \eB the end of the subject is treated +as a non-alphanumeric. +.P +The difference between the two partial matching options can be illustrated by a +pattern such as: +.sp + /dog(sbody)?/ +.sp +This matches either "dog" or "dogsbody", greedily (that is, it prefers the +longer string if possible). If it is matched against the string "dog" with +PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if +PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other +hand, if the pattern is made ungreedy the result is different: +.sp + /dog(sbody)??/ +.sp +In this case the result is always a complete match because that is found first, +and matching never continues after finding a complete match. It might be easier +to follow this explanation by thinking of the two patterns like this: +.sp + /dog(sbody)?/ is the same as /dogsbody|dog/ + /dog(sbody)??/ is the same as /dog|dogsbody/ +.sp +The second pattern will never match "dogsbody", because it will always find the +shorter match first. +. +. +.SS "Example of partial matching using pcre2test" +.rs +.sp +The \fBpcre2test\fP data modifiers \fBpartial_hard\fP (or \fBph\fP) and +\fBpartial_soft\fP (or \fBps\fP) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, +respectively, when calling \fBpcre2_match()\fP. Here is a run of +\fBpcre2test\fP using a pattern that matches the whole subject in the form of a +date: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 25dec3\e=ph + Partial match: 23dec3 + data> 3ju\e=ph + Partial match: 3ju + data> 3juj\e=ph + No match +.sp +This example gives the same results for both hard and soft partial matching +options. Here is an example where there is a difference: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 25jun04\e=ps + 0: 25jun04 + 1: jun + data> 25jun04\e=ph + Partial match: 25jun04 +.sp +With PCRE2_PARTIAL_SOFT, the subject is matched completely. For +PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, so +there is only a partial match. +. +. +. +.SH "MULTI-SEGMENT MATCHING WITH pcre2_match()" +.rs +.sp +PCRE was not originally designed with multi-segment matching in mind. However, +over time, features (including partial matching) that make multi-segment +matching possible have been added. A very long string can be searched segment +by segment by calling \fBpcre2_match()\fP repeatedly, with the aim of achieving +the same results that would happen if the entire string was available for +searching all the time. Normally, the strings that are being sought are much +shorter than each individual segment, and are in the middle of very long +strings, so the pattern is normally not anchored. +.P +Special logic must be implemented to handle a matched substring that spans a +segment boundary. PCRE2_PARTIAL_HARD should be used, because it returns a +partial match at the end of a segment whenever there is the possibility of +changing the match by adding more characters. The PCRE2_NOTBOL option should +also be set for all but the first segment. +.P +When a partial match occurs, the next segment must be added to the current +subject and the match re-run, using the \fIstartoffset\fP argument of +\fBpcre2_match()\fP to begin at the point where the partial match started. +For example: +.sp + re> /\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed/ + data> ...the date is 23ja\e=ph + Partial match: 23ja + data> ...the date is 23jan19 and on that day...\e=offset=15 + 0: 23jan19 + 1: jan +.sp +Note the use of the \fBoffset\fP modifier to start the new match where the +partial match was found. In this example, the next segment was added to the one +in which the partial match was found. This is the most straightforward +approach, typically using a memory buffer that is twice the size of each +segment. After a partial match, the first half of the buffer is discarded, the +second half is moved to the start of the buffer, and a new segment is added +before repeating the match as in the example above. After a no match, the +entire buffer can be discarded. +.P +If there are memory constraints, you may want to discard text that precedes a +partial match before adding the next segment. Unfortunately, this is not at +present straightforward. In cases such as the above, where the pattern does not +contain any lookbehinds, it is sufficient to retain only the partially matched +substring. However, if the pattern contains a lookbehind assertion, characters +that precede the start of the partial match may have been inspected during the +matching process. When \fBpcre2test\fP displays a partial match, it indicates +these characters with '<' if the \fBallusedtext\fP modifier is set: +.sp + re> "(?<=123)abc" + data> xx123ab\e=ph,allusedtext + Partial match: 123ab + <<< +.sp +However, the \fBallusedtext\fP modifier is not available for JIT matching, +because JIT matching does not record the first (or last) consulted characters. +For this reason, this information is not available via the API. It is therefore +not possible in general to obtain the exact number of characters that must be +retained in order to get the right match result. If you cannot retain the +entire segment, you must find some heuristic way of choosing. +.P +If you know the approximate length of the matching substrings, you can use that +to decide how much text to retain. The only lookbehind information that is +currently available via the API is the length of the longest individual +lookbehind in a pattern, but this can be misleading if there are nested +lookbehinds. The value returned by calling \fBpcre2_pattern_info()\fP with the +PCRE2_INFO_MAXLOOKBEHIND option is the maximum number of characters (not code +units) that any individual lookbehind moves back when it is processed. A +pattern such as "(?<=(? /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 23ja\e=dfa,ps + Partial match: 23ja + data> n05\e=dfa,dfa_restart + 0: n05 +.sp +The first call has "23ja" as the subject, and requests partial matching; the +second call has "n05" as the subject for the continued (restarted) match. +Notice that when the match is complete, only the last part is shown; PCRE2 does +not retain the previously partially-matched string. It is up to the calling +program to do that if it needs to. This means that, for an unanchored pattern, +if a continued match fails, it is not possible to try again at a new starting +point. All this facility is capable of doing is continuing with the previous +match attempt. For example, consider this pattern: +.sp + 1234|3789 +.sp +If the first part of the subject is "ABC123", a partial match of the first +alternative is found at offset 3. There is no partial match for the second +alternative, because such a match does not start at the same point in the +subject string. Attempting to continue with the string "7890" does not yield a +match because only those alternatives that match at one point in the subject +are remembered. Depending on the application, this may or may not be what you +want. +.P +If you do want to allow for starting again at the next character, one way of +doing it is to retain some or all of the segment and try a new complete match, +as described for \fBpcre2_match()\fP above. Another possibility is to work with +two buffers. If a partial match at offset \fIn\fP in the first buffer is +followed by "no match" when PCRE2_DFA_RESTART is used on the second buffer, you +can then try a new match starting at offset \fIn+1\fP in the first buffer. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 04 September 2019 +Copyright (c) 1997-2019 University of Cambridge. +.fi diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 new file mode 100644 index 0000000..c3ccb0b --- /dev/null +++ b/doc/pcre2pattern.3 @@ -0,0 +1,3896 @@ +.TH PCRE2PATTERN 3 "04 June 2024" "PCRE2 10.44" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 REGULAR EXPRESSION DETAILS" +.rs +.sp +The syntax and semantics of the regular expressions that are supported by PCRE2 +are described in detail below. There is a quick-reference syntax summary in the +.\" HREF +\fBpcre2syntax\fP +.\" +page. PCRE2 tries to match Perl syntax and semantics as closely as it can. +PCRE2 also supports some alternative regular expression syntax (which does not +conflict with the Perl syntax) in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. +.P +Perl's regular expressions are described in its own documentation, and regular +expressions in general are covered in a number of books, some of which have +copious examples. Jeffrey Friedl's "Mastering Regular Expressions", published +by O'Reilly, covers regular expressions in great detail. This description of +PCRE2's regular expressions is intended as reference material. +.P +This document discusses the regular expression patterns that are supported by +PCRE2 when its main matching function, \fBpcre2_match()\fP, is used. PCRE2 also +has an alternative matching function, \fBpcre2_dfa_match()\fP, which matches +using a different algorithm that is not Perl-compatible. Some of the features +discussed below are not available when DFA matching is used. The advantages and +disadvantages of the alternative function, and how it differs from the normal +function, are discussed in the +.\" HREF +\fBpcre2matching\fP +.\" +page. +. +. +.SH "SPECIAL START-OF-PATTERN ITEMS" +.rs +.sp +A number of options that can be passed to \fBpcre2_compile()\fP can also be set +by special items at the start of a pattern. These are not Perl-compatible, but +are provided to make these options accessible to pattern writers who are not +able to change the program that processes the pattern. Any number of these +items may appear, but they must all be together right at the start of the +pattern string, and the letters must be in upper case. +. +. +.SS "UTF support" +.rs +.sp +In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either as +single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 can be +specified for the 32-bit library, in which case it constrains the character +values to valid Unicode code points. To process UTF strings, PCRE2 must be +built to include Unicode support (which is the default). When using UTF strings +you must either call the compiling function with one or both of the PCRE2_UTF +or PCRE2_MATCH_INVALID_UTF options, or the pattern must start with the special +sequence (*UTF), which is equivalent to setting the relevant PCRE2_UTF. How +setting a UTF mode affects pattern matching is mentioned in several places +below. There is also a summary of features in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +.P +Some applications that allow their users to supply patterns may wish to +restrict them to non-UTF data for security reasons. If the PCRE2_NEVER_UTF +option is passed to \fBpcre2_compile()\fP, (*UTF) is not allowed, and its +appearance in a pattern causes an error. +. +. +.SS "Unicode property support" +.rs +.sp +Another special sequence that may appear at the start of a pattern is (*UCP). +This has the same effect as setting the PCRE2_UCP option: it causes sequences +such as \ed and \ew to use Unicode properties to determine character types, +instead of recognizing only characters with codes less than 256 via a lookup +table. If also causes upper/lower casing operations to use Unicode properties +for characters with code points greater than 127, even when UTF is not set. +These behaviours can be changed within the pattern; see the section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +below. +.P +Some applications that allow their users to supply patterns may wish to +restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to +\fBpcre2_compile()\fP, (*UCP) is not allowed, and its appearance in a pattern +causes an error. +. +. +.SS "Locking out empty string matching" +.rs +.sp +Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same effect +as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option to whichever +matching function is subsequently called to match the pattern. These options +lock out the matching of empty strings, either entirely, or only at the start +of the subject. +. +. +.SS "Disabling auto-possessification" +.rs +.sp +If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting +the PCRE2_NO_AUTO_POSSESS option. This stops PCRE2 from making quantifiers +possessive when what follows cannot match the repeated item. For example, by +default a+b is treated as a++b. For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling start-up optimizations" +.rs +.sp +If a pattern starts with (*NO_START_OPT), it has the same effect as setting the +PCRE2_NO_START_OPTIMIZE option. This disables several optimizations for quickly +reaching "no match" results. For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling automatic anchoring" +.rs +.sp +If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as +setting the PCRE2_NO_DOTSTAR_ANCHOR option. This disables optimizations that +apply to patterns whose top-level branches all start with .* (match any number +of arbitrary characters). For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling JIT compilation" +.rs +.sp +If a pattern that starts with (*NO_JIT) is successfully compiled, an attempt by +the application to apply the JIT optimization by calling +\fBpcre2_jit_compile()\fP is ignored. +. +. +.SS "Setting match resource limits" +.rs +.sp +The \fBpcre2_match()\fP function contains a counter that is incremented every +time it goes round its main loop. The caller of \fBpcre2_match()\fP can set a +limit on this counter, which therefore limits the amount of computing resource +used for a match. The maximum depth of nested backtracking can also be limited; +this indirectly restricts the amount of heap memory that is used, but there is +also an explicit memory limit that can be set. +.P +These facilities are provided to catch runaway matches that are provoked by +patterns with huge matching trees. A common example is a pattern with nested +unlimited repeats applied to a long string that does not match. When one of +these limits is reached, \fBpcre2_match()\fP gives an error return. The limits +can also be set by items at the start of the pattern of the form +.sp + (*LIMIT_HEAP=d) + (*LIMIT_MATCH=d) + (*LIMIT_DEPTH=d) +.sp +where d is any number of decimal digits. However, the value of the setting must +be less than the value set (or defaulted) by the caller of \fBpcre2_match()\fP +for it to have any effect. In other words, the pattern writer can lower the +limits set by the programmer, but not raise them. If there is more than one +setting of one of these limits, the lower value is used. The heap limit is +specified in kibibytes (units of 1024 bytes). +.P +Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is +still recognized for backwards compatibility. +.P +The heap limit applies only when the \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP interpreters are used for matching. It does not apply +to JIT. The match limit is used (but in a different way) when JIT is being +used, or when \fBpcre2_dfa_match()\fP is called, to limit computing resource +usage by those matching functions. The depth limit is ignored by JIT but is +relevant for DFA matching, which uses function recursion for recursions within +the pattern and for lookaround assertions and atomic groups. In this case, the +depth limit controls the depth of such recursion. +. +. +.\" HTML +.SS "Newline conventions" +.rs +.sp +PCRE2 supports six different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, any +Unicode newline sequence, or the NUL character (binary zero). The +.\" HREF +\fBpcre2api\fP +.\" +page has +.\" HTML +.\" +further discussion +.\" +about newlines, and shows how to set the newline convention when calling +\fBpcre2_compile()\fP. +.P +It is also possible to specify a newline convention by starting a pattern +string with one of the following sequences: +.sp + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences + (*NUL) the NUL character (binary zero) +.sp +These override the default and the options given to the compiling function. For +example, on a Unix system where LF is the default newline sequence, the pattern +.sp + (*CR)a.b +.sp +changes the convention to CR. That pattern matches "a\enb" because LF is no +longer a newline. If more than one of these settings is present, the last one +is used. +.P +The newline convention affects where the circumflex and dollar assertions are +true. It also affects the interpretation of the dot metacharacter when +PCRE2_DOTALL is not set, and the behaviour of \eN when not followed by an +opening brace. However, it does not affect what the \eR escape sequence +matches. By default, this is any Unicode newline sequence, for Perl +compatibility. However, this can be changed; see the next section and the +description of \eR in the section entitled +.\" HTML +.\" +"Newline sequences" +.\" +below. A change of \eR setting can be combined with a change of newline +convention. +. +. +.SS "Specifying what \eR matches" +.rs +.sp +It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. This effect can also be achieved by starting a pattern with +(*BSR_ANYCRLF). For completeness, (*BSR_UNICODE) is also recognized, +corresponding to PCRE2_BSR_UNICODE. +. +. +.SH "EBCDIC CHARACTER CODES" +.rs +.sp +PCRE2 can be compiled to run in an environment that uses EBCDIC as its +character code instead of ASCII or Unicode (typically a mainframe system). In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. +. +. +.SH "CHARACTERS AND METACHARACTERS" +.rs +.sp +A regular expression is a pattern that is matched against a subject string from +left to right. Most characters stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the pattern +.sp + The quick brown fox +.sp +matches a portion of a subject string that is identical to itself. When +caseless matching is specified (the PCRE2_CASELESS option or (?i) within the +pattern), letters are matched independently of case. Note that there are two +ASCII characters, K and S, that, in addition to their lower case ASCII +equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F +(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the +PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to +\fBpcre2_compile()\fP or set by (?r) within the pattern). +.P +The power of regular expressions comes from the ability to include wild cards, +character classes, alternatives, and repetitions in the pattern. These are +encoded in the pattern by the use of \fImetacharacters\fP, which do not stand +for themselves but instead are interpreted in some special way. +.P +There are two different sets of metacharacters: those that are recognized +anywhere in the pattern except within square brackets, and those that are +recognized within square brackets. Outside square brackets, the metacharacters +are as follows: +.sp + \e general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start group or control verb + ) end group or control verb + * 0 or more quantifier + + 1 or more quantifier; also "possessive quantifier" + ? 0 or 1 quantifier; also quantifier minimizer + { potential start of min/max quantifier +.sp +Brace characters { and } are also used to enclose data for constructions such +as \eg{2} or \ek{name}. In almost all uses of braces, space and/or horizontal +tab characters that follow { or precede } are allowed and are ignored. In the +case of quantifiers, they may also appear before or after the comma. The +exception to this is \eu{...} which is an ECMAScript compatibility feature +that is recognized only when the PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript +does not ignore such white space; it causes the item to be interpreted as +literal. +.P +Part of a pattern that is in square brackets is called a "character class". In +a character class the only metacharacters are: +.sp + \e general escape character + ^ negate the class, but only if the first character + - indicates character range + [ POSIX character class (if followed by POSIX syntax) + ] terminates the character class +.sp +If a pattern is compiled with the PCRE2_EXTENDED option, most white space in +the pattern, other than in a character class, within a \eQ...\eE sequence, or +between a # outside a character class and the next newline, inclusive, are +ignored. An escaping backslash can be used to include a white space or a # +character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the +same applies, but in addition unescaped space and horizontal tab characters are +ignored inside a character class. Note: only these two characters are ignored, +not the full set of pattern white space characters that are ignored outside a +character class. Option settings can be changed within a pattern; see the +section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +below. +.P +The following sections describe the use of each of the metacharacters. +. +. +.SH BACKSLASH +.rs +.sp +The backslash character has several uses. Firstly, if it is followed by a +character that is not a digit or a letter, it takes away any special meaning +that character may have. This use of backslash as an escape character applies +both inside and outside character classes. +.P +For example, if you want to match a * character, you must write \e* in the +pattern. This escaping action applies whether or not the following character +would otherwise be interpreted as a metacharacter, so it is always safe to +precede a non-alphanumeric with backslash to specify that it stands for itself. +In particular, if you want to match a backslash, you write \e\e. +.P +Only ASCII digits and letters have any special meaning after a backslash. All +other characters (in particular, those whose code points are greater than 127) +are treated as literals. +.P +If you want to treat all characters in a sequence as literals, you can do so by +putting them between \eQ and \eE. Note that this includes white space even when +the PCRE2_EXTENDED option is set so that most other white space is ignored. The +behaviour is different from Perl in that $ and @ are handled as literals in +\eQ...\eE sequences in PCRE2, whereas in Perl, $ and @ cause variable +interpolation. Also, Perl does "double-quotish backslash interpolation" on any +backslashes between \eQ and \eE which, its documentation says, "may lead to +confusing results". PCRE2 treats a backslash between \eQ and \eE just like any +other character. Note the following examples: +.sp + Pattern PCRE2 matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz + \eQA\eB\eE A\eB A\eB + \eQ\e\eE \e \e\eE +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes. +An isolated \eE that is not preceded by \eQ is ignored. If \eQ is not followed +by \eE later in the pattern, the literal interpretation continues to the end of +the pattern (that is, \eE is assumed at the end). If the isolated \eQ is inside +a character class, this causes an error, because the character class is then +not terminated by a closing square bracket. +. +. +.\" HTML +.SS "Non-printing characters" +.rs +.sp +A second use of backslash provides a way of encoding non-printing characters +in patterns in a visible manner. There is no restriction on the appearance of +non-printing characters in a pattern, but when a pattern is being prepared by +text editing, it is often easier to use one of the following escape sequences +instead of the binary character it represents. In an ASCII or Unicode +environment, these escapes are as follows: +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is a non-control ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en linefeed (hex 0A) + \er carriage return (hex 0D) (but see below) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or backreference + \eo{ddd..} character with octal code ddd.. + \exhh character with hex code hh + \ex{hhh..} character with hex code hhh.. + \eN{U+hhh..} character with Unicode hex code point hhh.. +.sp +By default, after \ex that is not followed by {, from zero to two hexadecimal +digits are read (letters can be in upper or lower case). Any number of +hexadecimal digits may appear between \ex{ and }. If a character other than a +hexadecimal digit appears between \ex{ and }, or if there is no terminating }, +an error occurs. +.P +Characters whose code points are less than 256 can be defined by either of the +two syntaxes for \ex or by an octal sequence. There is no difference in the way +they are handled. For example, \exdc is exactly the same as \ex{dc} or \e334. +However, using the braced versions does make such sequences easier to read. +.P +Support is available for some ECMAScript (aka JavaScript) escape sequences via +two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed +by { is not recognized. Only if \ex is followed by two hexadecimal digits is it +recognized as a character escape. Otherwise it is interpreted as a literal "x" +character. In this mode, support for code points greater than 256 is provided +by \eu, which must be followed by four hexadecimal digits; otherwise it is +interpreted as a literal "u" character. +.P +PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, +\eu{hhh..} is recognized as the character specified by hexadecimal code point. +There may be any number of hexadecimal digits, but unlike other places that +also use curly brackets, spaces are not allowed and would result in the string +being interpreted as a literal. This syntax is from ECMAScript 6. +.P +The \eN{U+hhh..} escape sequence is recognized only when PCRE2 is operating in +UTF mode. Perl also uses \eN{name} to specify characters by Unicode name; PCRE2 +does not support this. Note that when \eN is not followed by an opening brace +(curly bracket) it has an entirely different meaning, matching any character +that is not a newline. +.P +There are some legacy applications where the escape sequence \er is expected to +match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \er in a +pattern is converted to \en so that it matches a LF (linefeed) instead of a CR +(carriage return) character. +.P +An error occurs if \ec is not followed by a character whose ASCII code point +is in the range 32 to 126. The precise effect of \ecx is as follows: if x is a +lower case letter, it is converted to upper case. Then bit 6 of the character +(hex 40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is +5A), but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If +the code unit following \ec has a code point less than 32 or greater than 126, +a compile-time error occurs. +.P +When PCRE2 is compiled in EBCDIC mode, \eN{U+hhh..} is not supported. \ea, \ee, +\ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec +escape is processed as specified for Perl in the \fBperlebcdic\fP document. The +only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\ec@ encodes character code 0; after \ec the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F). +.P +Thus, apart from \ec?, these escapes generate the same character code values as +they do in an ASCII environment, though the meanings of the values mostly +differ. For example, \ecG always generates code value 7, which is BEL in ASCII +but DEL in EBCDIC. +.P +The sequence \ec? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \ec? generate 95; otherwise it generates 255. +.P +After \e0 up to two further octal digits are read. If there are fewer than two +digits, just those that are present are used. Thus the sequence \e0\ex\e015 +specifies two binary zeros followed by a CR character (code value 13). Make +sure you supply two digits after the initial zero if the pattern character that +follows is itself an octal digit. +.P +The escape \eo must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape is a recent +addition to Perl; it provides way of specifying character code points as octal +numbers greater than 0777, and it also allows octal numbers and backreferences +to be unambiguously specified. +.P +For greater clarity and unambiguity, it is best to avoid following \e by a +digit greater than zero. Instead, use \eo{...} or \ex{...} to specify numerical +character code points, and \eg{...} to specify backreferences. The following +paragraphs describe the old, ambiguous syntax. +.P +The handling of a backslash followed by a digit other than 0 is complicated, +and Perl has changed over time, causing PCRE2 also to change. +.P +Outside a character class, PCRE2 reads the digit and any following digits as a +decimal number. If the number is less than 10, begins with the digit 8 or 9, or +if there are at least that many previous capture groups in the expression, the +entire sequence is taken as a \fIbackreference\fP. A description of how this +works is given +.\" HTML +.\" +later, +.\" +following the discussion of +.\" HTML +.\" +parenthesized groups. +.\" +Otherwise, up to three octal digits are read to form a character code. +.P +Inside a character class, PCRE2 handles \e8 and \e9 as the literal characters +"8" and "9", and otherwise reads up to three octal digits following the +backslash, using them to generate a data character. Any subsequent digits stand +for themselves. For example, outside a character class: +.sp + \e040 is another way of writing an ASCII space +.\" JOIN + \e40 is the same, provided there are fewer than 40 + previous capture groups + \e7 is always a backreference +.\" JOIN + \e11 might be a backreference, or another way of + writing a tab + \e011 is always a tab + \e0113 is a tab followed by the character "3" +.\" JOIN + \e113 might be a backreference, otherwise the + character with octal code 113 +.\" JOIN + \e377 might be a backreference, otherwise + the value 255 (decimal) + \e81 is always a backreference +.sp +Note that octal values of 100 or greater that are specified using this syntax +must not be introduced by a leading zero, because no more than three octal +digits are ever read. +. +. +.SS "Constraints on character values" +.rs +.sp +Characters that are specified using octal or hexadecimal numbers are +limited to certain values, as follows: +.sp + 8-bit non-UTF mode no greater than 0xff + 16-bit non-UTF mode no greater than 0xffff + 32-bit non-UTF mode no greater than 0xffffffff + All UTF modes no greater than 0x10ffff and a valid code point +.sp +Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the +so-called "surrogate" code points). The check for these can be disabled by the +caller of \fBpcre2_compile()\fP by setting the option +PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8 +and UTF-32 modes, because these values are not representable in UTF-16. +. +. +.SS "Escape sequences in character classes" +.rs +.sp +All the sequences that define a single character value can be used both inside +and outside character classes. In addition, inside a character class, \eb is +interpreted as the backspace character (hex 08). +.P +When not followed by an opening brace, \eN is not allowed in a character class. +\eB, \eR, and \eX are not special inside a character class. Like other +unrecognized alphabetic escape sequences, they cause an error. Outside a +character class, these sequences have different meanings. +. +. +.SS "Unsupported escape sequences" +.rs +.sp +In Perl, the sequences \eF, \el, \eL, \eu, and \eU are recognized by its string +handler and used to modify the case of following characters. By default, PCRE2 +does not support these escape sequences in patterns. However, if either of the +PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \eU matches a "U" +character, and \eu can be used to define a character by code point, as +described above. +. +. +.SS "Absolute and relative backreferences" +.rs +.sp +The sequence \eg followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative backreference. A named backreference +can be coded as \eg{name}. Backreferences are discussed +.\" HTML +.\" +later, +.\" +following the discussion of +.\" HTML +.\" +parenthesized groups. +.\" +. +. +.SS "Absolute and relative subroutine calls" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a capture group as a subroutine. Details are discussed +.\" HTML +.\" +later. +.\" +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a backreference; the latter is a +.\" HTML +.\" +subroutine +.\" +call. +. +. +.\" HTML +.SS "Generic character types" +.rs +.sp +Another use of backslash is for specifying generic character types: +.sp + \ed any decimal digit + \eD any character that is not a decimal digit + \eh any horizontal white space character + \eH any character that is not a horizontal white space character + \eN any character that is not a newline + \es any white space character + \eS any character that is not a white space character + \ev any vertical white space character + \eV any character that is not a vertical white space character + \ew any "word" character + \eW any "non-word" character +.sp +The \eN escape sequence has the same meaning as +.\" HTML +.\" +the "." metacharacter +.\" +when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the +meaning of \eN. Note that when \eN is followed by an opening brace it has a +different meaning. See the section entitled +.\" HTML +.\" +"Non-printing characters" +.\" +above for details. Perl also uses \eN{name} to specify characters by Unicode +name; PCRE2 does not support this. +.P +Each pair of lower and upper case escape sequences partitions the complete set +of characters into two disjoint sets. Any given character matches one, and only +one, of each pair. The sequences can appear both inside and outside character +classes. They each match one character of the appropriate type. If the current +matching point is at the end of the subject string, all of them fail, because +there is no character to match. +.P +The default \es characters are HT (9), LF (10), VT (11), FF (12), CR (13), and +space (32), which are defined as white space in the "C" locale. This list may +vary if locale-specific matching is taking place. For example, in some locales +the "non-breaking space" character (\exA0) is recognized as white space, and in +others the VT character is not. +.P +A "word" character is an underscore or any character that is a letter or digit. +By default, the definition of letters and digits is controlled by PCRE2's +low-valued character tables, and may vary if locale-specific matching is taking +place (see +.\" HTML +.\" +"Locale support" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 127 are used for +accented letters, and these are then matched by \ew. The use of locales with +Unicode is discouraged. +.P +By default, characters whose code points are greater than 127 never match \ed, +\es, or \ew, and always match \eD, \eS, and \eW, although this may be different +for characters in the range 128-255 when locale-specific matching is happening. +These escape sequences retain their original meanings from before Unicode +support was available, mainly for efficiency reasons. If the PCRE2_UCP option +is set, the behaviour is changed so that Unicode properties are used to +determine character types, as follows: +.sp + \ed any character that matches \ep{Nd} (decimal digit) + \es any character that matches \ep{Z} or \eh or \ev + \ew any character that matches \ep{L}, \ep{N}, \ep{Mn}, or \ep{Pc} +.sp +The addition of \ep{Mn} (non-spacing mark) and the replacement of an explicit +test for underscore with a test for \ep{Pc} (connector punctuation) happened in +PCRE2 release 10.43. This brings PCRE2 into line with Perl. +.P +The upper case escapes match the inverse sets of characters. Note that \ed +matches only decimal digits, whereas \ew matches any Unicode digit, as well as +other character categories. Note also that PCRE2_UCP affects \eb, and +\eB because they are defined in terms of \ew and \eW. Matching these sequences +is noticeably slower when PCRE2_UCP is set. +.P +The effect of PCRE2_UCP on any one of these escape sequences can be negated by +the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, and +PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and reset within +a pattern by means of an internal option setting +.\" HTML +.\" +(see below). +.\" +.P +The sequences \eh, \eH, \ev, and \eV, in contrast to the other sequences, which +match only ASCII characters by default, always match a specific list of code +points, whether or not PCRE2_UCP is set. The horizontal space characters are: +.sp + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space +.sp +The vertical space characters are: +.sp + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator +.sp +In 8-bit, non-UTF-8 mode, only the characters with code points less than 256 +are relevant. +. +. +.\" HTML +.SS "Newline sequences" +.rs +.sp +Outside a character class, by default, the escape sequence \eR matches any +Unicode newline sequence. In 8-bit non-UTF-8 mode \eR is equivalent to the +following: +.sp + (?>\er\en|\en|\ex0b|\ef|\er|\ex85) +.sp +This is an example of an "atomic group", details of which are given +.\" HTML +.\" +below. +.\" +This particular group matches either the two-character sequence CR followed by +LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, +U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split. +.P +In other modes, two additional characters whose code points are greater than 255 +are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). +Unicode support is not needed for these characters to be recognized. +.P +It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. (BSR is an abbreviation for "backslash R".) This can be made +the default when PCRE2 is built; if this is the case, the other behaviour can +be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify +these settings by starting a pattern string with one of the following +sequences: +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence +.sp +These override the default and the options given to the compiling function. +Note that these special settings, which are not Perl-compatible, are recognized +only at the very start of a pattern, and that they must be in upper case. If +more than one of them is present, the last one is used. They can be combined +with a change of newline convention; for example, a pattern can start with: +.sp + (*ANY)(*BSR_ANYCRLF) +.sp +They can also be combined with the (*UTF) or (*UCP) special sequences. Inside a +character class, \eR is treated as an unrecognized escape sequence, and causes +an error. +. +. +.\" HTML +.SS Unicode character properties +.rs +.sp +When PCRE2 is built with Unicode support (the default), three additional escape +sequences that match characters with specific properties are available. They +can be used in any mode, though in 8-bit and 16-bit non-UTF modes these +sequences are of course limited to testing characters whose code points are +less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points +greater than 0x10ffff (the Unicode limit) may be encountered. These are all +treated as being in the Unknown script and with an unassigned type. +.P +Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \ed and \ew do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +.P +The extra escape sequences that provide property support are: +.sp + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eX a Unicode extended grapheme cluster +.sp +The property names represented by \fIxx\fP above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described +.\" HTML +.\" +below). +.\" +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \eP{Any} does not match any characters, so always causes a +match failure. +. +. +. +.SS "Script properties for \ep and \eP" +.rs +.sp +There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas +\ep{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40. +.P +Unassigned characters (and in non-UTF 32-bit mode, characters with code points +greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not +part of an identified script are lumped together as "Common". The current list +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +.sp + pcre2test -LS +.sp +. +. +. +.SS "The general category property for \ep and \eP" +.rs +.sp +Each character has exactly one Unicode general category property, specified by +a two-letter abbreviation. For compatibility with Perl, negation can be +specified by including a circumflex between the opening brace and the property +name. For example, \ep{^Lu} is the same as \eP{Lu}. +.P +If only one letter is specified with \ep or \eP, it includes all the general +category properties that start with that letter. In this case, in the absence +of negation, the curly brackets in the escape sequence are optional; these two +examples have the same effect: +.sp + \ep{L} + \epL +.sp +The following general category property codes are supported: +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +.sp +The special property LC, which has the synonym L&, is also supported: it +matches a character that has the Lu, Ll, or Lt property, in other words, a +letter that is not classified as a modifier or "other". +.P +The Cs (Surrogate) property applies only to characters whose code points are in +the range U+D800 to U+DFFF. These characters are no different to any other +character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library). +However, they are not valid in Unicode strings and so cannot be tested by PCRE2 +in UTF mode, unless UTF validity checking has been turned off (see the +discussion of PCRE2_NO_UTF_CHECK in the +.\" HREF +\fBpcre2api\fP +.\" +page). +.P +The long synonyms for property names that Perl supports (such as \ep{Letter}) +are not supported by PCRE2, nor is it permitted to prefix any of these +properties with "Is". +.P +No character that is in the Unicode table has the Cn (unassigned) property. +Instead, this property is assumed for any code point that is not in the +Unicode table. +.P +Specifying caseless matching does not affect these escape sequences. For +example, \ep{Lu} always matches only upper case letters. This is different from +the behaviour of current versions of Perl. +. +. +.SS "Binary (yes/no) properties for \ep and \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +.sp +. +. +.SS "The Bidi_Class property for \ep and \eP" +.rs +.sp + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space +.sp +An equals sign may be used instead of a colon. The class names are +case-insensitive; only the short names listed above are recognized. +. +. +.SS Extended grapheme clusters +.rs +.sp +The \eX escape matches any number of Unicode characters that form an "extended +grapheme cluster", and treats the sequence as an atomic group +.\" HTML +.\" +(see below). +.\" +Unicode supports various kinds of composite character by giving each character +a grapheme breaking property, and having rules that use these properties to +define the boundaries of extended grapheme clusters. The rules are defined in +Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0 +abandoned the use of some previous properties that had been used for emojis. +Instead it introduced various emoji-specific properties. PCRE2 uses only the +Extended Pictographic property. +.P +\eX always matches at least one character. Then it decides whether to add +additional characters according to the following rules for ending a cluster: +.P +1. End at the end of the subject string. +.P +2. Do not end between CR and LF; otherwise end after any control character. +.P +3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters +are of five types: L, V, T, LV, and LVT. An L character may be followed by an +L, V, LV, or LVT character; an LV or V character may be followed by a V or T +character; an LVT or T character may be followed only by a T character. +.P +4. Do not end before extending characters or spacing marks or the zero-width +joiner (ZWJ) character. Characters with the "mark" property always have the +"extend" grapheme breaking property. +.P +5. Do not end after prepend characters. +.P +6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width +joiner) sequences. An emoji ZWJ sequence consists of a character with the +Extended_Pictographic property, optionally followed by one or more characters +with the Extend property, followed by the ZWJ character, followed by another +Extended_Pictographic character. +.P +7. Do not break within emoji flag sequences. That is, do not break between +regional indicator (RI) characters if there are an odd number of RI characters +before the break point. +.P +8. Otherwise, end the cluster. +. +. +.\" HTML +.SS PCRE2's additional properties +.rs +.sp +As well as the standard Unicode properties described above, PCRE2 supports four +more that make it possible to convert traditional escape sequences such as \ew +and \es to use Unicode properties. PCRE2 uses these non-standard, non-Perl +properties internally when PCRE2_UCP is set. However, they may also be used +explicitly. These properties are: +.sp + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character +.sp +Xan matches characters that have either the L (letter) or the N (number) +property. Xps matches the characters tab, linefeed, vertical tab, form feed, or +carriage return, and any other character that has the Z (separator) property. +Xsp is the same as Xps; in PCRE1 it used to exclude vertical tab, for Perl +compatibility, but Perl changed. Xwd matches the same characters as Xan, plus +those that match Mn (non-spacing mark) or Pc (connector punctuation, which +includes underscore). +.P +There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH +where H is a hexadecimal digit. Note that the Xuc property does not match these +sequences but the characters that they represent.) +. +. +.\" HTML +.SS "Resetting the match start" +.rs +.sp +In normal use, the escape sequence \eK causes any previously matched characters +not to be included in the final matched sequence that is returned. For example, +the pattern: +.sp + foo\eKbar +.sp +matches "foobar", but reports that it has matched "bar". \eK does not interact +with anchoring in any way. The pattern: +.sp + ^foo\eKbar +.sp +matches only when the subject begins with "foobar" (in single line mode), +though it again reports the matched string as "bar". This feature is similar to +a lookbehind assertion +.\" HTML +.\" +(described below), +.\" +but the part of the pattern that precedes \eK is not constrained to match a +limited number of characters, as is required for a lookbehind assertion. The +use of \eK does not interfere with the setting of +.\" HTML +.\" +captured substrings. +.\" +For example, when the pattern +.sp + (foo)\eKbar +.sp +matches "foobar", the first substring is still set to "foo". +.P +From version 5.32.0 Perl forbids the use of \eK in lookaround assertions. From +release 10.38 PCRE2 also forbids this by default. However, the +PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling +\fBpcre2_compile()\fP to re-enable the previous behaviour. When this option is +set, \eK is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. Note that when a pattern such as (?=ab\eK) +matches, the reported start of the match can be greater than the end of the +match. Using \eK in a lookbehind assertion at the start of a pattern can also +lead to odd effects. For example, consider this pattern: +.sp + (?<=\eKfoo)bar +.sp +If the subject is "foobar", a call to \fBpcre2_match()\fP with a starting +offset of 3 succeeds and reports the matching string as "foobar", that is, the +start of the reported match is earlier than where the match started. +. +. +.\" HTML +.SS "Simple assertions" +.rs +.sp +The final use of backslash is for certain simple assertions. An assertion +specifies a condition that has to be met at a particular point in a match, +without consuming any characters from the subject string. The use of +groups for more complicated assertions is described +.\" HTML +.\" +below. +.\" +The backslashed assertions are: +.sp + \eb matches at a word boundary + \eB matches when not at a word boundary + \eA matches at the start of the subject + \eZ matches at the end of the subject + also matches before a newline at the end of the subject + \ez matches only at the end of the subject + \eG matches at the first matching position in the subject +.sp +Inside a character class, \eb has a different meaning; it matches the backspace +character. If any other of these assertions appears in a character class, an +"invalid escape sequence" error is generated. +.P +A word boundary is a position in the subject string where the current character +and the previous character do not both match \ew or \eW (i.e. one matches +\ew and the other matches \eW), or the start or end of the string if the +first or last character matches \ew, respectively. When PCRE2 is built with +Unicode support, the meanings of \ew and \eW can be changed by setting the +PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2 +nor Perl has a separate "start of word" or "end of word" metasequence. However, +whatever follows \eb normally determines which it is. For example, the fragment +\eba matches "a" at the start of a word. +.P +The \eA, \eZ, and \ez assertions differ from the traditional circumflex and +dollar (described in the next section) in that they only ever match at the very +start and end of the subject string, whatever options are set. Thus, they are +independent of multiline mode. These three assertions are not affected by the +PCRE2_NOTBOL or PCRE2_NOTEOL options, which affect only the behaviour of the +circumflex and dollar metacharacters. However, if the \fIstartoffset\fP +argument of \fBpcre2_match()\fP is non-zero, indicating that matching is to +start at a point other than the beginning of the subject, \eA can never match. +The difference between \eZ and \ez is that \eZ matches before a newline at the +end of the string as well as at the very end, whereas \ez matches only at the +end. +.P +The \eG assertion is true only when the current matching position is at the +start point of the matching process, as specified by the \fIstartoffset\fP +argument of \fBpcre2_match()\fP. It differs from \eA when the value of +\fIstartoffset\fP is non-zero. By calling \fBpcre2_match()\fP multiple times +with appropriate arguments, you can mimic Perl's /g option, and it is in this +kind of implementation where \eG can be useful. +.P +Note, however, that PCRE2's implementation of \eG, being true at the starting +character of the matching process, is subtly different from Perl's, which +defines it as true at the end of the previous match. In Perl, these can be +different when the previously matched string was empty. Because PCRE2 does just +one match at a time, it cannot reproduce this behaviour. +.P +If all the alternatives of a pattern begin with \eG, the expression is anchored +to the starting match position, and the "anchored" flag is set in the compiled +regular expression. +. +. +.SH "CIRCUMFLEX AND DOLLAR" +.rs +.sp +The circumflex and dollar metacharacters are zero-width assertions. That is, +they test for a particular condition being true without consuming any +characters from the subject string. These two metacharacters are concerned with +matching the starts and ends of lines. If the newline convention is set so that +only the two-character sequence CRLF is recognized as a newline, isolated CR +and LF characters are treated as ordinary data characters, and are not +recognized as newlines. +.P +Outside a character class, in the default matching mode, the circumflex +character is an assertion that is true only if the current matching point is at +the start of the subject string. If the \fIstartoffset\fP argument of +\fBpcre2_match()\fP is non-zero, or if PCRE2_NOTBOL is set, circumflex can +never match if the PCRE2_MULTILINE option is unset. Inside a character class, +circumflex has an entirely different meaning +.\" HTML +.\" +(see below). +.\" +.P +Circumflex need not be the first character of the pattern if a number of +alternatives are involved, but it should be the first thing in each alternative +in which it appears if the pattern is ever to match that branch. If all +possible alternatives start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is said to be an +"anchored" pattern. (There are also other constructs that can cause a pattern +to be anchored.) +.P +The dollar character is an assertion that is true only if the current matching +point is at the end of the subject string, or immediately before a newline at +the end of the string (by default), unless PCRE2_NOTEOL is set. Note, however, +that it does not actually match the newline. Dollar need not be the last +character of the pattern if a number of alternatives are involved, but it +should be the last item in any branch in which it appears. Dollar has no +special meaning in a character class. +.P +The meaning of dollar can be changed so that it matches only at the very end of +the string, by setting the PCRE2_DOLLAR_ENDONLY option at compile time. This +does not affect the \eZ assertion. +.P +The meanings of the circumflex and dollar metacharacters are changed if the +PCRE2_MULTILINE option is set. When this is the case, a dollar character +matches before any newlines in the string, as well as at the very end, and a +circumflex matches immediately after internal newlines as well as at the start +of the subject string. It does not match after a newline that ends the string, +for compatibility with Perl. However, this can be changed by setting the +PCRE2_ALT_CIRCUMFLEX option. +.P +For example, the pattern /^abc$/ matches the subject string "def\enabc" (where +\en represents a newline) in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode because all branches start with +^ are not anchored in multiline mode, and a match for circumflex is possible +when the \fIstartoffset\fP argument of \fBpcre2_match()\fP is non-zero. The +PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. +.P +When the newline convention (see +.\" HTML +.\" +"Newline conventions" +.\" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\er\enxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +.P +Note that the sequences \eA, \eZ, and \ez can be used to match the start and +end of the subject in both modes, and if all branches of a pattern start with +\eA it is always anchored, whether or not PCRE2_MULTILINE is set. +. +. +.\" HTML +.SH "FULL STOP (PERIOD, DOT) AND \eN" +.rs +.sp +Outside a character class, a dot in the pattern matches any one character in +the subject string except (by default) a character that signifies the end of a +line. One or more characters may be specified as line terminators (see +.\" HTML +.\" +"Newline conventions" +.\" +above). +.P +Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurrences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters. +.P +The behaviour of dot with regard to newlines can be changed. If the +PCRE2_DOTALL option is set, a dot matches any one character, without exception. +If the two-character sequence CRLF is present in the subject string, it takes +two dots to match it. +.P +The handling of dot is entirely independent of the handling of circumflex and +dollar, the only relationship being that they both involve newlines. Dot has no +special meaning in a character class. +.P +The escape sequence \eN when not followed by an opening brace behaves like a +dot, except that it is not affected by the PCRE2_DOTALL option. In other words, +it matches any character except one that signifies the end of a line. +.P +When \eN is followed by an opening brace it has a different meaning. See the +section entitled +.\" HTML +.\" +"Non-printing characters" +.\" +above for details. Perl also uses \eN{name} to specify characters by Unicode +name; PCRE2 does not support this. +. +. +.SH "MATCHING A SINGLE CODE UNIT" +.rs +.sp +Outside a character class, the escape sequence \eC matches any one code unit, +whether or not a UTF mode is set. In the 8-bit library, one code unit is one +byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is a +32-bit unit. Unlike a dot, \eC always matches line-ending characters. The +feature is provided in Perl in order to match individual bytes in UTF-8 mode, +but it is unclear how it can usefully be used. +.P +Because \eC breaks up characters into individual code units, matching one unit +with \eC in UTF-8 or UTF-16 mode means that the rest of the string may start +with a malformed UTF character. This has undefined results, because PCRE2 +assumes that it is matching character by character in a valid UTF string (by +default it checks the subject string's validity at the start of processing +unless the PCRE2_NO_UTF_CHECK or PCRE2_MATCH_INVALID_UTF option is used). +.P +An application can lock out the use of \eC by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \eC permanently disabled. +.P +PCRE2 does not allow \eC to appear in lookbehind assertions +.\" HTML +.\" +(described below) +.\" +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +\fBpcre2_dfa_match()\fP nor the JIT optimizer support \eC in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +.P +In the 32-bit library, however, \eC is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified. +.P +In general, the \eC escape sequence is best avoided. However, one way of using +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): +.sp + (?| (?=[\ex00-\ex7f])(\eC) | + (?=[\ex80-\ex{7ff}])(\eC)(\eC) | + (?=[\ex{800}-\ex{ffff}])(\eC)(\eC)(\eC) | + (?=[\ex{10000}-\ex{1fffff}])(\eC)(\eC)(\eC)(\eC)) +.sp +In this example, a group that starts with (?| resets the capturing parentheses +numbers in each alternative (see +.\" HTML +.\" +"Duplicate Group Numbers" +.\" +below). The assertions at the start of each branch check the next UTF-8 +character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The +character's individual bytes are then captured by the appropriate number of +\eC groups. +. +. +.\" HTML +.SH "SQUARE BRACKETS AND CHARACTER CLASSES" +.rs +.sp +An opening square bracket introduces a character class, terminated by a closing +square bracket. A closing square bracket on its own is not special by default. +If a closing square bracket is required as a member of the class, it should be +the first data character in the class (after an initial circumflex, if present) +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +square bracket at the start does end the (empty) class. +.P +A character class matches a single character in the subject. A matched +character must be in the set of characters defined by the class, unless the +first character in the class definition is a circumflex, in which case the +subject character must not be in the set defined by the class. If a circumflex +is actually required as a member of the class, ensure it is not the first +character, or escape it with a backslash. +.P +For example, the character class [aeiou] matches any lower case vowel, while +[^aeiou] matches any character that is not a lower case vowel. Note that a +circumflex is just a convenient notation for specifying the characters that +are in the class by enumerating those that are not. A class that starts with a +circumflex is not an assertion; it still consumes a character from the subject +string, and therefore it fails if the current pointer is at the end of the +string. +.P +Characters in a class may be specified by their code points using \eo, \ex, or +\eN{U+hh..} in the usual way. When caseless matching is set, any letters in a +class represent both their upper case and lower case versions, so for example, +a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not +match "A", whereas a caseful version would. Note that there are two ASCII +characters, K and S, that, in addition to their lower case ASCII equivalents, +are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) +respectively when either PCRE2_UTF or PCRE2_UCP is set. +.P +Characters that might indicate line breaks are never treated in any special way +when matching character classes, whatever line-ending sequence is in use, and +whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A +class such as [^a] always matches one of these characters. +.P +The generic character type escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, +\eS, \ev, \eV, \ew, and \eW may appear in a character class, and add the +characters that they match to the class. For example, [\edABCDEF] matches any +hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of +\ed, \es, \ew and their upper case partners, just as it does when they appear +outside a character class, as described in the section entitled +.\" HTML +.\" +"Generic character types" +.\" +above. The escape sequence \eb has a different meaning inside a character +class; it matches the backspace character. The sequences \eB, \eR, and \eX are +not special inside a character class. Like any other unrecognized escape +sequences, they cause an error. The same is true for \eN when not followed by +an opening brace. +.P +The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +.P +Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or before or after a character type escape such as \ed or \eH. +However, unless the hyphen is the last character in the class, Perl outputs a +warning in its warning mode, as this is most likely a user error. As PCRE2 has +no facility for warning, an error is given in these cases. +.P +It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of range, so [W-\e]46] is interpreted as a class containing a range +followed by two other characters. The octal or hexadecimal representation of +"]" can also be used to end a range. +.P +Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\e000-\e037]. Ranges can include any characters that are valid for the +current mode. In any UTF mode, the so-called "surrogate" characters (those +whose code points lie between 0xd800 and 0xdfff inclusive) may not be specified +explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables +this check). However, ranges such as [\ex{d7ff}-\ex{e000}], which include the +surrogates, are always permitted. +.P +There is a special case in EBCDIC environments for ranges whose end points are +both specified as literal letters in the same case. For compatibility with +Perl, EBCDIC code points within the range that are not letters are omitted. For +example, [h-k] matches only four characters, even though the codes for h and k +are 0x88 and 0x92, a range of 11 code points. However, if the range is +specified numerically, for example, [\ex88-\ex92] or [h-\ex92], all code points +are included. +.P +If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\exc8-\excb] matches accented E +characters in both cases. +.P +A circumflex can conveniently be used with the upper case character types to +specify a more restricted set of characters than the matching lower case type. +For example, the class [^\eW_] matches any letter or digit, but not underscore, +whereas [\ew] includes underscore. A positive character class should be read as +"something OR something OR ..." and a negative class as "NOT something AND NOT +something AND NOT ...". +.P +The only metacharacters that are recognized in character classes are backslash, +hyphen (only where it can be interpreted as specifying a range), circumflex +(only at the start), opening square bracket (only when it can be interpreted as +introducing a POSIX class name, or for a special compatibility feature - see +the next two sections), and the terminating closing square bracket. However, +escaping other non-alphanumeric characters does no harm. +. +. +.SH "POSIX CHARACTER CLASSES" +.rs +.sp +Perl supports the POSIX notation for character classes. This uses names +enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports +this notation. For example, +.sp + [01[:alpha:]%] +.sp +matches "0", "1", any alphabetic character, or "%". The supported class names +are: +.sp + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \ed) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \es from PCRE2 8.34) + upper upper case letters + word "word" characters (same as \ew) + xdigit hexadecimal digits +.sp +The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), +and space (32). If locale-specific matching is taking place, the list of space +characters may be different; there may be fewer or more of them. "Space" and +\es match the same set of characters, as do "word" and \ew. +.P +The name "word" is a Perl extension, and "blank" is a GNU extension from Perl +5.8. Another Perl extension is negation, which is indicated by a ^ character +after the colon. For example, +.sp + [12[:^digit:]] +.sp +matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the POSIX +syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not +supported, and an error is given if they are encountered. +.P +By default, characters with values greater than 127 do not match any of the +POSIX character classes, although this may be different for characters in the +range 128-255 when locale-specific matching is happening. However, in UCP mode, +unless certain options are set (see below), some of the classes are changed so +that Unicode character properties are used. This is achieved by replacing +POSIX classes with other sequences, as follows: +.sp + [:alnum:] becomes \ep{Xan} + [:alpha:] becomes \ep{L} + [:blank:] becomes \eh + [:cntrl:] becomes \ep{Cc} + [:digit:] becomes \ep{Nd} + [:lower:] becomes \ep{Ll} + [:space:] becomes \ep{Xps} + [:upper:] becomes \ep{Lu} + [:word:] becomes \ep{Xwd} +.sp +Negated versions, such as [:^alpha:] use \eP instead of \ep. Four other POSIX +classes are handled specially in UCP mode: +.TP 10 +[:graph:] +This matches characters that have glyphs that mark the page when printed. In +Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf +properties, except for: +.sp + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s +.sp +.TP 10 +[:print:] +This matches the same characters as [:graph:] plus space characters that are +not controls, that is, characters with the Zs property. +.TP 10 +[:punct:] +This matches all characters that have the Unicode P (punctuation) property, +plus those characters with code points less than 256 that have the S (Symbol) +property. +.TP 10 +[:xdigit:] +In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" +versions of those characters, whose Unicode code points start at U+FF10. This +is a change that was made in PCRE release 10.43 for Perl compatibility. +.P +The other POSIX classes are unchanged by PCRE2_UCP, and match only characters +with code points less than 256. +.P +There are two options that can be used to restrict the POSIX classes to ASCII +characters when PCRE2_UCP is set. The option PCRE2_EXTRA_ASCII_DIGIT affects +just [:digit:] and [:xdigit:]. Within a pattern, this can be set and unset by +(?aT) and (?-aT). The PCRE2_EXTRA_ASCII_POSIX option disables UCP processing +for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, +(?aP) and (?-aP) set and unset both these options for consistency. +. +. +.SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES" +.rs +.sp +In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly +syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of +word". PCRE2 treats these items as follows: +.sp + [[:<:]] is converted to \eb(?=\ew) + [[:>:]] is converted to \eb(?<=\ew) +.sp +Only these exact character sequences are recognized. A sequence such as +[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is +not compatible with Perl. It is provided to help migrations from other +environments, and is best not used in any new patterns. Note that \eb matches +at the start and the end of a word (see +.\" HTML +.\" +"Simple assertions" +.\" +above), and in a Perl-style pattern the preceding or following character +normally shows which is wanted, without the need for the assertions that are +used above in order to give exactly the POSIX behaviour. Note also that the +PCRE2_UCP option changes the meaning of \ew (and therefore \eb) by default, so +it also affects these POSIX sequences. +. +. +.SH "VERTICAL BAR" +.rs +.sp +Vertical bar characters are used to separate alternative patterns. For example, +the pattern +.sp + gilbert|sullivan +.sp +matches either "gilbert" or "sullivan". Any number of alternatives may appear, +and an empty alternative is permitted (matching the empty string). The matching +process tries each alternative in turn, from left to right, and the first one +that succeeds is used. If the alternatives are within a group +.\" HTML +.\" +(defined below), +.\" +"succeeds" means matching the rest of the main pattern as well as the +alternative in the group. +. +. +.\" HTML +.SH "INTERNAL OPTION SETTING" +.rs +.sp +The settings of several options can be changed within a pattern by a sequence +of letters enclosed between "(?" and ")". The following are Perl-compatible, +and are described in detail in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The option letters are: +.sp + i for PCRE2_CASELESS + m for PCRE2_MULTILINE + n for PCRE2_NO_AUTO_CAPTURE + s for PCRE2_DOTALL + x for PCRE2_EXTENDED + xx for PCRE2_EXTENDED_MORE +.sp +For example, (?im) sets caseless, multiline matching. It is also possible to +unset these options by preceding the relevant letters with a hyphen, for +example (?-im). The two "extended" options are not independent; unsetting +either one cancels the effects of both of them. +.P +A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS +and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also +permitted. Only one hyphen may appear in the options string. If a letter +appears both before and after the hyphen, the option is unset. An empty options +setting "(?)" is allowed. Needless to say, it has no effect. +.P +If the first character following (? is a circumflex, it causes all of the above +options to be unset. Letters may follow the circumflex to cause some options to +be re-instated, but a hyphen may not appear. +.P +Some PCRE2-specific options can be changed by the same mechanism using these +pairs or individual letters: +.sp + aD for PCRE2_EXTRA_ASCII_BSD + aS for PCRE2_EXTRA_ASCII_BSS + aW for PCRE2_EXTRA_ASCII_BSW + aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT + aT for PCRE2_EXTRA_ASCII_DIGIT + r for PCRE2_EXTRA_CASELESS_RESTRICT + J for PCRE2_DUPNAMES + U for PCRE2_UNGREEDY +.sp +However, except for 'r', these are not unset by (?^), which is equivalent to +(?-imnrsx). If 'a' is not followed by any of the upper case letters shown +above, it sets (or unsets) all the ASCII options. +.P +PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EXTRA_ASCII_POSIX +is set, but including it in (?aP) means that (?-aP) suppresses all ASCII +restrictions for POSIX classes. +.P +When one of these option changes occurs at top level (that is, not inside group +parentheses), the change applies until a subsequent change, or the end of the +pattern. An option change within a group (see below for a description of +groups) affects only that part of the group that follows it. At the end of the +group these options are reset to the state they were before the group. For +example, +.sp + (a(?i)b)c +.sp +matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not set +externally). Any changes made in one alternative do carry on into subsequent +branches within the same group. For example, +.sp + (a(?i)b|c) +.sp +matches "ab", "aB", "c", and "C", even though when matching "C" the first +branch is abandoned before the option setting. This is because the effects of +option settings happen at compile time. There would be some very weird +behaviour otherwise. +.P +As a convenient shorthand, if any option settings are required at the start of +a non-capturing group (see the next section), the option letters may +appear between the "?" and the ":". Thus the two patterns +.sp + (?i:saturday|sunday) + (?:(?i)saturday|sunday) +.sp +match exactly the same set of strings. +.P +\fBNote:\fP There are other PCRE2-specific options, applying to the whole +pattern, which can be set by the application when the compiling function is +called. In addition, the pattern can contain special leading sequences such as +(*CRLF) to override what the application has set or what has been defaulted. +Details are given in the section entitled +.\" HTML +.\" +"Newline sequences" +.\" +above. There are also the (*UTF) and (*UCP) leading sequences that can be used +to set UTF and Unicode property modes; they are equivalent to setting the +PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set +the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, which lock out the use of the +(*UTF) and (*UCP) sequences. +. +. +.\" HTML +.SH GROUPS +.rs +.sp +Groups are delimited by parentheses (round brackets), which can be nested. +Turning part of a pattern into a group does two things: +.sp +1. It localizes a set of alternatives. For example, the pattern +.sp + cat(aract|erpillar|) +.sp +matches "cataract", "caterpillar", or "cat". Without the parentheses, it would +match "cataract", "erpillar" or an empty string. +.sp +2. It creates a "capture group". This means that, when the whole pattern +matches, the portion of the subject string that matched the group is passed +back to the caller, separately from the portion that matched the whole pattern. +(This applies only to the traditional matching function; the DFA matching +function does not support capturing.) +.P +Opening parentheses are counted from left to right (starting from 1) to obtain +numbers for capture groups. For example, if the string "the red king" is +matched against the pattern +.sp + the ((red|white) (king|queen)) +.sp +the captured substrings are "red king", "red", and "king", and are numbered 1, +2, and 3, respectively. +.P +The fact that plain parentheses fulfil two functions is not always helpful. +There are often times when grouping is required without capturing. If an +opening parenthesis is followed by a question mark and a colon, the group +does not do any capturing, and is not counted when computing the number of any +subsequent capture groups. For example, if the string "the white queen" +is matched against the pattern +.sp + the ((?:red|white) (king|queen)) +.sp +the captured substrings are "white queen" and "queen", and are numbered 1 and +2. The maximum number of capture groups is 65535. +.P +As a convenient shorthand, if any option settings are required at the start of +a non-capturing group, the option letters may appear between the "?" and the +":". Thus the two patterns +.sp + (?i:saturday|sunday) + (?:(?i)saturday|sunday) +.sp +match exactly the same set of strings. Because alternative branches are tried +from left to right, and options are not reset until the end of the group is +reached, an option setting in one branch does affect subsequent branches, so +the above patterns match "SUNDAY" as well as "Saturday". +. +. +.\" HTML +.SH "DUPLICATE GROUP NUMBERS" +.rs +.sp +Perl 5.10 introduced a feature whereby each alternative in a group uses the +same numbers for its capturing parentheses. Such a group starts with (?| and is +itself a non-capturing group. For example, consider this pattern: +.sp + (?|(Sat)ur|(Sun))day +.sp +Because the two alternatives are inside a (?| group, both sets of capturing +parentheses are numbered one. Thus, when the pattern matches, you can look +at captured substring number one, whichever alternative matched. This construct +is useful when you want to capture part, but not all, of one of a number of +alternatives. Inside a (?| group, parentheses are numbered as usual, but the +number is reset at the start of each branch. The numbers of any capturing +parentheses that follow the whole group start after the highest number used in +any branch. The following example is taken from the Perl documentation. The +numbers underneath show in which buffer the captured content will be stored. +.sp + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 +.sp +A backreference to a capture group uses the most recent value that is set for +the group. The following pattern matches "abcabc" or "defdef": +.sp + /(?|(abc)|(def))\e1/ +.sp +In contrast, a subroutine call to a capture group always refers to the +first one in the pattern with the given number. The following pattern matches +"abcabc" or "defabc": +.sp + /(?|(abc)|(def))(?1)/ +.sp +A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +.P +If a +.\" HTML +.\" +condition test +.\" +for a group's having matched refers to a non-unique number, the test is +true if any group with that number has matched. +.P +An alternative approach to using this "branch reset" feature is to use +duplicate named groups, as described in the next section. +. +. +.SH "NAMED CAPTURE GROUPS" +.rs +.sp +Identifying capture groups by number is simple, but it can be very hard to keep +track of the numbers in complicated patterns. Furthermore, if an expression is +modified, the numbers may change. To help with this difficulty, PCRE2 supports +the naming of capture groups. This feature was not added to Perl until release +5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0, +using the Python syntax. PCRE2 supports both the Perl and the Python syntax. +.P +In PCRE2, a capture group can be named in one of three ways: (?...) or +(?'name'...) as in Perl, or (?P...) as in Python. Names may be up to 128 +code units long. When PCRE2_UTF is not set, they may contain only ASCII +alphanumeric characters and underscores, but must start with a non-digit. When +PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode +letter or Unicode decimal digit. In other words, group names must match one of +these patterns: +.sp + ^[_A-Za-z][_A-Za-z0-9]*\ez when PCRE2_UTF is not set + ^[_\ep{L}][_\ep{L}\ep{Nd}]*\ez when PCRE2_UTF is set +.sp +References to capture groups from other parts of the pattern, such as +.\" HTML +.\" +backreferences, +.\" +.\" HTML +.\" +recursion, +.\" +and +.\" HTML +.\" +conditions, +.\" +can all be made by name as well as by number. +.P +Named capture groups are allocated numbers as well as names, exactly as +if the names were not present. In both PCRE2 and Perl, capture groups +are primarily identified by numbers; any names are just aliases for these +numbers. The PCRE2 API provides function calls for extracting the complete +name-to-number translation table from a compiled pattern, as well as +convenience functions for extracting captured substrings by name. +.P +\fBWarning:\fP When more than one capture group has the same number, as +described in the previous section, a name given to one of them applies to all +of them. Perl allows identically numbered groups to have different names. +Consider this pattern, where there are two capture groups, both numbered 1: +.sp + (?|(?aa)|(?bb)) +.sp +Perl allows this, with both names AA and BB as aliases of group 1. Thus, after +a successful match, both names yield the same value (either "aa" or "bb"). +.P +In an attempt to reduce confusion, PCRE2 does not allow the same group number +to be associated with more than one name. The example above provokes a +compile-time error. However, there is still scope for confusion. Consider this +pattern: +.sp + (?|(?aa)|(bb)) +.sp +Although the second group number 1 is not explicitly named, the name AA is +still an alias for any group 1. Whether the pattern matches "aa" or "bb", a +reference by name to group AA yields the matched string. +.P +By default, a name must be unique within a pattern, except that duplicate names +are permitted for groups with the same number, for example: +.sp + (?|(?aa)|(?bb)) +.sp +The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES +option at compile time, or by the use of (?J) within the pattern, as described +in the section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +above. +.P +Duplicate names can be useful for patterns where only one instance of the named +capture group can match. Suppose you want to match the name of a weekday, +either as a 3-letter abbreviation or as the full name, and in both cases you +want to extract the abbreviation. This pattern (ignoring the line breaks) does +the job: +.sp + (?J) + (?Mon|Fri|Sun)(?:day)?| + (?Tue)(?:sday)?| + (?Wed)(?:nesday)?| + (?Thu)(?:rsday)?| + (?Sat)(?:urday)? +.sp +There are five capture groups, but only one is ever set after a match. The +convenience functions for extracting the data by name returns the substring for +the first (and in this example, the only) group of that name that matched. This +saves searching to find which numbered group it was. (An alternative way of +solving this problem is to use a "branch reset" group, as described in the +previous section.) +.P +If you make a backreference to a non-unique named group from elsewhere in the +pattern, the groups to which the name refers are checked in the order in which +they appear in the overall pattern. The first one that is set is used for the +reference. For example, this pattern matches both "foofoo" and "barbar" but not +"foobar" or "barfoo": +.sp + (?J)(?:(?foo)|(?bar))\ek +.sp +.P +If you make a subroutine call to a non-unique named group, the one that +corresponds to the first occurrence of the name is used. In the absence of +duplicate numbers this is the one with the lowest number. +.P +If you use a named reference in a condition +test (see the +.\" +.\" HTML +.\" +section about conditions +.\" +below), either to check whether a capture group has matched, or to check for +recursion, all groups with the same name are tested. If the condition is true +for any one of them, the overall condition is true. This is the same behaviour +as testing by number. For further details of the interfaces for handling named +capture groups, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SH REPETITION +.rs +.sp +Repetition is specified by quantifiers, which may follow any one of these +items: +.sp + a literal data character + the dot metacharacter + the \eC escape sequence + the \eR escape sequence + the \eX escape sequence + any escape sequence that matches a single character + a character class + a backreference + a parenthesized group (including lookaround assertions) + a subroutine call (recursive or otherwise) +.sp +If a quantifier does not follow a repeatable item, an error occurs. The +general repetition quantifier specifies a minimum and maximum number of +permitted matches by giving two numbers in curly brackets (braces), separated +by a comma. The numbers must be less than 65536, and the first must be less +than or equal to the second. For example, +.sp + z{2,4} +.sp +matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special +character. If the second number is omitted, but the comma is present, there is +no upper limit; if the second number and the comma are both omitted, the +quantifier specifies an exact number of required matches. Thus +.sp + [aeiou]{3,} +.sp +matches at least 3 successive vowels, but may match many more, whereas +.sp + \ed{8} +.sp +matches exactly 8 digits. If the first number is omitted, the lower limit is +taken as zero; in this case the upper limit must be present. +.sp + X{,4} is interpreted as X{0,4} +.sp +This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 10.43. In +earlier versions such a sequence was not interpreted as a quantifier. Other +regular expression engines may behave either way. +.P +If the characters that follow an opening brace do not match the syntax of a +quantifier, the brace is taken as a literal character. In particular, this +means that {,} is a literal string of three characters. +.P +Note that not every opening brace is potentially the start of a quantifier +because braces are used in other items such as \eN{U+345} or \ek{name}. +.P +In UTF modes, quantifiers apply to characters rather than to individual code +units. Thus, for example, \ex{100}{2} matches two characters, each of +which is represented by a two-byte sequence in a UTF-8 string. Similarly, +\eX{3} matches three Unicode extended grapheme clusters, each of which may be +several code units long (and they may be of different lengths). +.P +The quantifier {0} is permitted, causing the expression to behave as if the +previous item and the quantifier were not present. This may be useful for +capture groups that are referenced as +.\" HTML +.\" +subroutines +.\" +from elsewhere in the pattern (but see also the section entitled +.\" HTML +.\" +"Defining capture groups for use by reference only" +.\" +below). Except for parenthesized groups, items that have a {0} quantifier are +omitted from the compiled pattern. +.P +For convenience, the three most common quantifiers have single-character +abbreviations: +.sp + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} +.sp +It is possible to construct infinite loops by following a group that can match +no characters with a quantifier that has no upper limit, for example: +.sp + (a?)* +.sp +Earlier versions of Perl and PCRE1 used to give an error at compile time for +such patterns. However, because there are cases where this can be useful, such +patterns are now accepted, but whenever an iteration of such a group matches no +characters, matching moves on to the next item in the pattern instead of +repeatedly matching an empty string. This does not prevent backtracking into +any of the iterations if a subsequent item fails to match. +.P +By default, quantifiers are "greedy", that is, they match as much as possible +(up to the maximum number of permitted repetitions), without causing the rest +of the pattern to fail. The classic example of where this gives problems is in +trying to match comments in C programs. These appear between /* and */ and +within the comment, individual * and / characters may appear. An attempt to +match C comments by applying the pattern +.sp + /\e*.*\e*/ +.sp +to the string +.sp + /* first comment */ not comment /* second comment */ +.sp +fails, because it matches the entire string owing to the greediness of the .* +item. However, if a quantifier is followed by a question mark, it ceases to be +greedy, and instead matches the minimum number of times possible, so the +pattern +.sp + /\e*.*?\e*/ +.sp +does the right thing with C comments. The meaning of the various quantifiers is +not otherwise changed, just the preferred number of matches. Do not confuse +this use of question mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in +.sp + \ed??\ed +.sp +which matches one digit by preference, but can match two if that is the only +way the rest of the pattern matches. +.P +If the PCRE2_UNGREEDY option is set (an option that is not available in Perl), +the quantifiers are not greedy by default, but individual ones can be made +greedy by following them with a question mark. In other words, it inverts the +default behaviour. +.P +When a parenthesized group is quantified with a minimum repeat count that +is greater than 1 or with a limited maximum, more memory is required for the +compiled pattern, in proportion to the size of the minimum or maximum. +.P +If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option (equivalent +to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is +implicitly anchored, because whatever follows will be tried against every +character position in the subject string, so there is no point in retrying the +overall match at any position after the first. PCRE2 normally treats such a +pattern as though it were preceded by \eA. +.P +In cases where it is known that the subject string contains no newlines, it is +worth setting PCRE2_DOTALL in order to obtain this optimization, or +alternatively, using ^ to indicate anchoring explicitly. +.P +However, there are some cases where the optimization cannot be used. When .* +is inside capturing parentheses that are the subject of a backreference +elsewhere in the pattern, a match at the start may fail where a later one +succeeds. Consider, for example: +.sp + (.*)abc\e1 +.sp +If the subject is "xyz123abc123" the match point is the fourth character. For +this reason, such a pattern is not implicitly anchored. +.P +Another case where implicit anchoring is not applied is when the leading .* is +inside an atomic group. Once again, a match at the start may fail where a later +one succeeds. Consider this pattern: +.sp + (?>.*?a)b +.sp +It matches "ab" in the subject "aab". The use of the backtracking control verbs +(*PRUNE) and (*SKIP) also disable this optimization, and there is an option, +PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly. +.P +When a capture group is repeated, the value captured is the substring that +matched the final iteration. For example, after +.sp + (tweedle[dume]{3}\es*)+ +.sp +has matched "tweedledum tweedledee" the value of the captured substring is +"tweedledee". However, if there are nested capture groups, the corresponding +captured values may have been set in previous iterations. For example, after +.sp + (a|(b))+ +.sp +matches "aba" the value of the second captured substring is "b". +. +. +.\" HTML +.SH "ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS" +.rs +.sp +With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") +repetition, failure of what follows normally causes the repeated item to be +re-evaluated to see if a different number of repeats allows the rest of the +pattern to match. Sometimes it is useful to prevent this, either to change the +nature of the match, or to cause it fail earlier than it otherwise might, when +the author of the pattern knows there is no point in carrying on. +.P +Consider, for example, the pattern \ed+foo when applied to the subject line +.sp + 123456bar +.sp +After matching all 6 digits and then failing to match "foo", the normal +action of the matcher is to try again with only 5 digits matching the \ed+ +item, and then with 4, and so on, before ultimately failing. "Atomic grouping" +(a term taken from Jeffrey Friedl's book) provides the means for specifying +that once a group has matched, it is not to be re-evaluated in this way. +.P +If we use atomic grouping for the previous example, the matcher gives up +immediately on failing to match "foo" the first time. The notation is a kind of +special parenthesis, starting with (?> as in this example: +.sp + (?>\ed+)foo +.sp +Perl 5.28 introduced an experimental alphabetic form starting with (* which may +be easier to remember: +.sp + (*atomic:\ed+)foo +.sp +This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. +.P +An alternative description is that a group of this type matches exactly the +string of characters that an identical standalone pattern would match, if +anchored at the current point in the subject string. +.P +Atomic groups are not capture groups. Simple cases such as the above example +can be thought of as a maximizing repeat that must swallow everything it can. +So, while both \ed+ and \ed+? are prepared to adjust the number of digits they +match in order to make the rest of the pattern match, (?>\ed+) can only match +an entire sequence of digits. +.P +Atomic groups in general can of course contain arbitrarily complicated +expressions, and can be nested. However, when the contents of an atomic +group is just a single repeated item, as in the example above, a simpler +notation, called a "possessive quantifier" can be used. This consists of an +additional + character following a quantifier. Using this notation, the +previous example can be rewritten as +.sp + \ed++foo +.sp +Note that a possessive quantifier can be used with an entire group, for +example: +.sp + (abc|xyz){2,3}+ +.sp +Possessive quantifiers are always greedy; the setting of the PCRE2_UNGREEDY +option is ignored. They are a convenient notation for the simpler forms of +atomic group. However, there is no difference in the meaning of a possessive +quantifier and the equivalent atomic group, though there may be a performance +difference; possessive quantifiers should be slightly faster. +.P +The possessive quantifier syntax is an extension to the Perl 5.8 syntax. +Jeffrey Friedl originated the idea (and the name) in the first edition of his +book. Mike McCloskey liked it, so implemented it when he built Sun's Java +package, and PCRE1 copied it from there. It found its way into Perl at release +5.10. +.P +PCRE2 has an optimization that automatically "possessifies" certain simple +pattern constructs. For example, the sequence A+B is treated as A++B because +there is no point in backtracking into a sequence of A's when B must follow. +This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting +the pattern with (*NO_AUTO_POSSESS). +.P +When a pattern contains an unlimited repeat inside a group that can itself be +repeated an unlimited number of times, the use of an atomic group is the only +way to avoid some failing matches taking a very long time indeed. The pattern +.sp + (\eD+|<\ed+>)*[!?] +.sp +matches an unlimited number of substrings that either consist of non-digits, or +digits enclosed in <>, followed by either ! or ?. When it matches, it runs +quickly. However, if it is applied to +.sp + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +.sp +it takes a long time before reporting failure. This is because the string can +be divided between the internal \eD+ repeat and the external * repeat in a +large number of ways, and all have to be tried. (The example uses [!?] rather +than a single character at the end, because both PCRE2 and Perl have an +optimization that allows for fast failure when a single character is used. They +remember the last single character that is required for a match, and fail early +if it is not present in the string.) If the pattern is changed so that it uses +an atomic group, like this: +.sp + ((?>\eD+)|<\ed+>)*[!?] +.sp +sequences of non-digits cannot be broken, and failure happens quickly. +. +. +.\" HTML +.SH "BACKREFERENCES" +.rs +.sp +Outside a character class, a backslash followed by a digit greater than 0 (and +possibly further digits) is a backreference to a capture group earlier (that +is, to its left) in the pattern, provided there have been that many previous +capture groups. +.P +However, if the decimal number following the backslash is less than 8, it is +always taken as a backreference, and causes an error only if there are not that +many capture groups in the entire pattern. In other words, the group that is +referenced need not be to the left of the reference for numbers less than 8. A +"forward backreference" of this type can make sense when a repetition is +involved and the group to the right has participated in an earlier iteration. +.P +It is not possible to have a numerical "forward backreference" to a group whose +number is 8 or more using this syntax because a sequence such as \e50 is +interpreted as a character defined in octal. See the subsection entitled +"Non-printing characters" +.\" HTML +.\" +above +.\" +for further details of the handling of digits following a backslash. Other +forms of backreferencing do not suffer from this restriction. In particular, +there is no problem when named capture groups are used (see below). +.P +Another way of avoiding the ambiguity inherent in the use of digits following a +backslash is to use the \eg escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical: +.sp + (ring), \e1 + (ring), \eg1 + (ring), \eg{1} +.sp +An unsigned number specifies an absolute reference without the ambiguity that +is present in the older syntax. It is also useful when literal digits follow +the reference. A signed number is a relative reference. Consider this example: +.sp + (abc(def)ghi)\eg{-1} +.sp +The sequence \eg{-1} is a reference to the capture group whose number is one +less than the number of the next group to be started, so in this example (where +the next group would be numbered 3) is it equivalent to \e2, and \eg{-2} would +be equivalent to \e1. Note that if this construct is inside a capture group, +that group is included in the count, so in this example \eg{-2} also refers to +group 1: +.sp + (A)(\eg{-2}B) +.sp +The use of relative references can be helpful in long patterns, and also in +patterns that are created by joining together fragments that contain references +within themselves. +.P +The sequence \eg{+1} is a reference to the next capture group that is started +after this item, and \eg{+2} refers to the one after that, and so on. This kind +of forward reference can be useful in patterns that repeat. Perl does not +support the use of + in this way. +.P +A backreference matches whatever actually most recently matched the capture +group in the current subject string, rather than anything at all that matches +the group (see +.\" HTML +.\" +"Groups as subroutines" +.\" +below for a way of doing that). So the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If caseful matching is in force at the time of the +backreference, the case of letters is relevant. For example, +.sp + ((?i)rah)\es+\e1 +.sp +matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original +capture group is matched caselessly. +.P +There are several different ways of writing backreferences to named capture +groups. The .NET syntax is \ek{name}, the Python syntax is (?=name), and the +original Perl syntax is \ek or \ek'name'. All of these are now supported +by both Perl and PCRE2. Perl 5.10's unified backreference syntax, in which \eg +can be used for both numeric and named references, is also supported by PCRE2. +We could rewrite the above example in any of the following ways: +.sp + (?(?i)rah)\es+\ek + (?'p1'(?i)rah)\es+\ek{p1} + (?P(?i)rah)\es+(?P=p1) + (?(?i)rah)\es+\eg{p1} +.sp +A capture group that is referenced by name may appear in the pattern before or +after the reference. +.P +There may be more than one backreference to the same group. If a group has not +actually been used in a particular match, backreferences to it always fail by +default. For example, the pattern +.sp + (a|(bc))\e2 +.sp +always fails if it starts to match "a" rather than "bc". However, if the +PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an +unset value matches an empty string. +.P +Because there may be many capture groups in a pattern, all digits following a +backslash are taken as part of a potential backreference number. If the pattern +continues with a digit character, some delimiter must be used to terminate the +backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this +can be white space. Otherwise, the \eg{} syntax or an empty comment (see +.\" HTML +.\" +"Comments" +.\" +below) can be used. +. +. +.SS "Recursive backreferences" +.rs +.sp +A backreference that occurs inside the group to which it refers fails when the +group is first used, so, for example, (a\e1) never matches. However, such +references can be useful inside repeated groups. For example, the pattern +.sp + (a|b\e1)+ +.sp +matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of +the group, the backreference matches the character string corresponding to the +previous iteration. In order for this to work, the pattern must be such that +the first iteration does not need to match the backreference. This can be done +using alternation, as in the example above, or by a quantifier with a minimum +of zero. +.P +For versions of PCRE2 less than 10.25, backreferences of this type used to +cause the group that they reference to be treated as an +.\" HTML +.\" +atomic group. +.\" +This restriction no longer applies, and backtracking into such groups can occur +as normal. +. +. +.\" HTML +.SH ASSERTIONS +.rs +.sp +An assertion is a test on the characters following or preceding the current +matching point that does not consume any characters. The simple assertions +coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described +.\" HTML +.\" +above. +.\" +.P +More complicated assertions are coded as parenthesized groups. There are two +kinds: those that look ahead of the current position in the subject string, and +those that look behind it, and in each case an assertion may be positive (must +match for the assertion to be true) or negative (must not match for the +assertion to be true). An assertion group is matched in the normal way, +and if it is true, matching continues after it, but with the matching position +in the subject string reset to what it was before the assertion was processed. +.P +The Perl-compatible lookaround assertions are atomic. If an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic assertions can be +useful. PCRE2 has some support for these, described in the section entitled +.\" HTML +.\" +"Non-atomic assertions" +.\" +below, but they are not Perl-compatible. +.P +A lookaround assertion may appear as the condition in a +.\" HTML +.\" +conditional group +.\" +(see below). In this case, the result of matching the assertion determines +which branch of the condition is followed. +.P +Assertion groups are not capture groups. If an assertion contains capture +groups within it, these are counted for the purposes of numbering the capture +groups in the whole pattern. Within each branch of an assertion, locally +captured substrings may be referenced in the usual way. For example, a sequence +such as (.)\eg{-1} can be used to check that two adjacent characters are the +same. +.P +When a branch within an assertion fails to match, any substrings that were +captured are discarded (as happens with any pattern branch that fails to +match). A negative assertion is true only when all its branches fail to match; +this means that no captured substrings are ever retained after a successful +negative assertion. When an assertion contains a matching branch, what happens +depends on the type of assertion. +.P +For a positive assertion, internally captured substrings in the successful +branch are retained, and matching continues with the next pattern item after +the assertion. For a negative assertion, a matching branch means that the +assertion is not true. If such an assertion is being used as a condition in a +.\" HTML +.\" +conditional group +.\" +(see below), captured substrings are retained, because matching continues with +the "no" branch of the condition. For other failing negative assertions, +control passes to the previous backtracking point, thus discarding any captured +strings within the assertion. +.P +Most assertion groups may be repeated; though it makes no sense to assert the +same thing several times, the side effect of capturing in positive assertions +may occasionally be useful. However, an assertion that forms the condition for +a conditional group may not be quantified. PCRE2 used to restrict the +repetition of assertions, but from release 10.35 the only restriction is that +an unlimited maximum repetition is changed to be one more than the minimum. For +example, {3,} is treated as {3,4}. +. +. +.SS "Alphabetic assertion names" +.rs +.sp +Traditionally, symbolic sequences such as (?= and (?<= have been used to +specify lookaround assertions. Perl 5.28 introduced some experimental +alphabetic alternatives which might be easier to remember. They all start with +(* instead of (? and must be written using lower case letters. PCRE2 supports +the following synonyms: +.sp + (*positive_lookahead: or (*pla: is the same as (?= + (*negative_lookahead: or (*nla: is the same as (?! + (*positive_lookbehind: or (*plb: is the same as (?<= + (*negative_lookbehind: or (*nlb: is the same as (? +.SS "Lookbehind assertions" +.rs +.sp +Lookbehind assertions start with (?<= for positive assertions and (? +.\" +(see above) +.\" +can be used instead of a lookbehind assertion at the start of a pattern to get +round the length limit restriction. +.P +In UTF-8 and UTF-16 modes, PCRE2 does not allow the \eC escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\eX and \eR escapes, which can match different numbers of code units, are never +permitted in lookbehinds. +.P +.\" HTML +.\" +"Subroutine" +.\" +calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long +as the called capture group matches a limited-length string. However, +.\" HTML +.\" +recursion, +.\" +that is, a "subroutine" call into a group that is already active, +is not supported. +.P +PCRE2 supports backreferences in lookbehinds, but only if certain conditions +are met. The PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no +use of (?| in the pattern (it creates duplicate group numbers), and if the +backreference is by name, the name must be unique. Of course, the referenced +group must itself match a limited length substring. The following pattern +matches words containing at least two characters that begin and end with the +same character: +.sp + \eb(\ew)\ew++(?<=\e1) +.P +Possessive quantifiers can be used in conjunction with lookbehind assertions to +specify efficient matching at the end of subject strings. Consider a simple +pattern such as +.sp + abcd$ +.sp +when applied to a long string that does not match. Because matching proceeds +from left to right, PCRE2 will look for each "a" in the subject and then see if +what follows matches the rest of the pattern. If the pattern is specified as +.sp + ^.*abcd$ +.sp +the initial .* matches the entire string at first, but when this fails (because +there is no following "a"), it backtracks to match all but the last character, +then all but the last two characters, and so on. Once again the search for "a" +covers the entire string, from right to left, so we are no better off. However, +if the pattern is written as +.sp + ^.*+(?<=abcd) +.sp +there can be no backtracking for the .*+ item because of the possessive +quantifier; it can match only the entire string. The subsequent lookbehind +assertion does a single test on the last four characters. If it fails, the +match fails immediately. For long strings, this approach makes a significant +difference to the processing time. +. +. +.SS "Using multiple assertions" +.rs +.sp +Several assertions (of any sort) may occur in succession. For example, +.sp + (?<=\ed{3})(? +.SH "NON-ATOMIC ASSERTIONS" +.rs +.sp +Traditional lookaround assertions are atomic. That is, if an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic positive assertions +can be useful. PCRE2 provides these using the following syntax: +.sp + (*non_atomic_positive_lookahead: or (*napla: or (?* + (*non_atomic_positive_lookbehind: or (*naplb: or (?<* +.sp +Consider the problem of finding the right-most word in a string that also +appears earlier in the string, that is, it must appear at least twice in total. +This pattern returns the required result as captured substring 1: +.sp + ^(?x)(*napla: .* \eb(\ew++)) (?> .*? \eb\e1\eb ){2} +.sp +For a subject such as "word1 word2 word3 word2 word3 word4" the result is +"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the +"x" option, which causes white space (introduced for readability) to be +ignored. Inside the assertion, the greedy .* at first consumes the entire +string, but then has to backtrack until the rest of the assertion can match a +word, which is captured by group 1. In other words, when the assertion first +succeeds, it captures the right-most word in the string. +.P +The current matching point is then reset to the start of the subject, and the +rest of the pattern match checks for two occurrences of the captured word, +using an ungreedy .*? to scan from the left. If this succeeds, we are done, but +if the last word in the string does not occur twice, this part of the pattern +fails. If a traditional atomic lookahead (?= or (*pla: had been used, the +assertion could not be re-entered, and the whole match would fail. The pattern +would succeed only if the very last word in the subject was found twice. +.P +Using a non-atomic lookahead, however, means that when the last word does not +occur twice in the string, the lookahead can backtrack and find the second-last +word, and so on, until either the match succeeds, or all words have been +tested. +.P +Two conditions must be met for a non-atomic assertion to be useful: the +contents of one or more capturing groups must change after a backtrack into the +assertion, and there must be a backreference to a changed group later in the +pattern. If this is not the case, the rest of the pattern match fails exactly +as before because nothing has changed, so using a non-atomic assertion just +wastes resources. +.P +There is one exception to backtracking into a non-atomic assertion. If an +(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That +is, a subsequent match failure cannot backtrack into the assertion. +.P +Non-atomic assertions are not supported by the alternative matching function +\fBpcre2_dfa_match()\fP. They are supported by JIT, but only if they do not +contain any control verbs such as (*ACCEPT). (This may change in future). Note +that assertions that appear as conditions for +.\" HTML +.\" +conditional groups +.\" +(see below) must be atomic. +. +. +.SH "SCRIPT RUNS" +.rs +.sp +In concept, a script run is a sequence of characters that are all from the same +Unicode script such as Latin or Greek. However, because some scripts are +commonly used together, and because some diacritical and other marks are used +with multiple scripts, it is not that simple. There is a full description of +the rules that PCRE2 uses in the section entitled +.\" HTML +.\" +"Script Runs" +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +If part of a pattern is enclosed between (*script_run: or (*sr: and a closing +parenthesis, it fails if the sequence of characters that it matches are not a +script run. After a failure, normal backtracking occurs. Script runs can be +used to detect spoofing attacks using characters that look the same, but are +from different scripts. The string "paypal.com" is an infamous example, where +the letters could be a mixture of Latin and Cyrillic. This pattern ensures that +the matched characters in a sequence of non-spaces that follow white space are +a script run: +.sp + \es+(*sr:\eS+) +.sp +To be sure that they are all from the Latin script (for example), a lookahead +can be used: +.sp + \es+(?=\ep{Latin})(*sr:\eS+) +.sp +This works as long as the first character is expected to be a character in that +script, and not (for example) punctuation, which is allowed with any script. If +this is not the case, a more creative lookahead is needed. For example, if +digits, underscore, and dots are permitted at the start: +.sp + \es+(?=[0-9_.]*\ep{Latin})(*sr:\eS+) +.sp +.P +In many cases, backtracking into a script run pattern fragment is not +desirable. The script run can employ an atomic group to prevent this. Because +this is a common requirement, a shorthand notation is provided by +(*atomic_script_run: or (*asr: +.sp + (*asr:...) is the same as (*sr:(?>...)) +.sp +Note that the atomic group is inside the script run. Putting it outside would +not prevent backtracking into the script run pattern. +.P +Support for script runs is not available if PCRE2 is compiled without Unicode +support. A compile-time error is given if any of the above constructs is +encountered. Script runs are not supported by the alternate matching function, +\fBpcre2_dfa_match()\fP because they use the same mechanism as capturing +parentheses. +.P +\fBWarning:\fP The (*ACCEPT) control verb +.\" HTML +.\" +(see below) +.\" +should not be used within a script run group, because it causes an immediate +exit from the group, bypassing the script run checking. +. +. +.\" HTML +.SH "CONDITIONAL GROUPS" +.rs +.sp +It is possible to cause the matching process to obey a pattern fragment +conditionally or to choose between two alternative fragments, depending on +the result of an assertion, or whether a specific capture group has +already been matched. The two possible forms of conditional group are: +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp +If the condition is satisfied, the yes-pattern is used; otherwise the +no-pattern (if present) is used. An absent no-pattern is equivalent to an empty +string (it always matches). If there are more than two alternatives in the +group, a compile-time error occurs. Each of the two alternatives may itself +contain nested groups of any form, including conditional groups; the +restriction to two alternatives applies only at the level of the condition +itself. This pattern fragment is an example where the alternatives are complex: +.sp + (?(1) (A|B|C) | (D | (?(2)E|F) | E) ) +.sp +.P +There are five kinds of condition: references to capture groups, references to +recursion, two pseudo-conditions called DEFINE and VERSION, and assertions. +. +. +.SS "Checking for a used capture group by number" +.rs +.sp +If the text between the parentheses consists of a sequence of digits, the +condition is true if a capture group of that number has previously matched. If +there is more than one capture group with the same number (see the earlier +.\" +.\" HTML +.\" +section about duplicate group numbers), +.\" +the condition is true if any of them have matched. An alternative notation, +which is a PCRE2 extension, not supported by Perl, is to precede the digits +with a plus or minus sign. In this case, the group number is relative rather +than absolute. The most recently opened capture group (which could be enclosing +this condition) can be referenced by (?(-1), the next most recent by (?(-2), +and so on. Inside loops it can also make sense to refer to subsequent groups. +The next capture group to be opened can be referenced as (?(+1), and so on. The +value zero in any of these forms is not used; it provokes a compile-time error. +.P +Consider the following pattern, which contains non-significant white space to +make it more readable (assume the PCRE2_EXTENDED option) and to divide it into +three parts for ease of discussion: +.sp + ( \e( )? [^()]+ (?(1) \e) ) +.sp +The first part matches an optional opening parenthesis, and if that +character is present, sets it as the first captured substring. The second part +matches one or more characters that are not parentheses. The third part is a +conditional group that tests whether or not the first capture group +matched. If it did, that is, if subject started with an opening parenthesis, +the condition is true, and so the yes-pattern is executed and a closing +parenthesis is required. Otherwise, since no-pattern is not present, the +conditional group matches nothing. In other words, this pattern matches a +sequence of non-parentheses, optionally enclosed in parentheses. +.P +If you were embedding this pattern in a larger one, you could use a relative +reference: +.sp + ...other stuff... ( \e( )? [^()]+ (?(-1) \e) ) ... +.sp +This makes the fragment independent of the parentheses in the larger pattern. +. +. +.SS "Checking for a used capture group by name" +.rs +.sp +Perl uses the syntax (?()...) or (?('name')...) to test for a used +capture group by name. For compatibility with earlier versions of PCRE1, which +had this facility before Perl, the syntax (?(name)...) is also recognized. +Note, however, that undelimited names consisting of the letter R followed by +digits are ambiguous (see the following section). Rewriting the above example +to use a named group gives this: +.sp + (? \e( )? [^()]+ (?() \e) ) +.sp +If the name used in a condition of this kind is a duplicate, the test is +applied to all groups of the same name, and is true if any one of them has +matched. +. +. +.SS "Checking for pattern recursion" +.rs +.sp +"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled +.\" HTML +.\" +"Recursive patterns" +.\" +and +.\" HTML +.\" +"Groups as subroutines" +.\" +below for details of recursion and subroutine calls. +.P +If a condition is the string (R), and there is no capture group with the name +R, the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any capture group. If digits follow the letter R, +and there is no group with that name, the condition is true if the most recent +call is into a group with the given number, which must exist somewhere in the +overall pattern. This is a contrived example that is equivalent to a+b: +.sp + ((?(R1)a+|(?1)b)) +.sp +However, in both cases, if there is a capture group with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?) to the above pattern completely changes its meaning. +.P +If a name preceded by ampersand follows the letter R, for example: +.sp + (?(R&name)...) +.sp +the condition is true if the most recent recursion is into a group of that name +(which must exist within the pattern). +.P +This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all groups of the same name, and is true if any one of +them is the most recent recursion. +.P +At "top level", all these recursion test conditions are false. +. +. +.\" HTML +.SS "Defining capture groups for use by reference only" +.rs +.sp +If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one +alternative in the rest of the conditional group. It is always skipped if +control reaches this point in the pattern; the idea of DEFINE is that it can be +used to define subroutines that can be referenced from elsewhere. (The use of +.\" HTML +.\" +subroutines +.\" +is described below.) For example, a pattern to match an IPv4 address such as +"192.168.23.245" could be written like this (ignore white space and line +breaks): +.sp + (?(DEFINE) (? 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) ) + \eb (?&byte) (\e.(?&byte)){3} \eb +.sp +The first part of the pattern is a DEFINE group inside which another group +named "byte" is defined. This matches an individual component of an IPv4 +address (a number less than 256). When matching takes place, this part of the +pattern is skipped because DEFINE acts like a false condition. The rest of the +pattern uses references to the named group to match the four dot-separated +components of an IPv4 address, insisting on a word boundary at each end. +. +. +.SS "Checking the PCRE2 version" +.rs +.sp +Programs that link with a PCRE2 library can check the version by calling +\fBpcre2_config()\fP with appropriate arguments. Users of applications that do +not have access to the underlying code cannot do this. A special "condition" +called VERSION exists to allow such users to discover which version of PCRE2 +they are dealing with by using this condition to match a string such as +"yesno". VERSION must be followed either by "=" or ">=" and a version number. +For example: +.sp + (?(VERSION>=10.4)yes|no) +.sp +This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or +"no" otherwise. The fractional part of the version number may not contain more +than two digits. +. +. +.SS "Assertion conditions" +.rs +.sp +If the condition is not in any of the above formats, it must be a parenthesized +assertion. This may be a positive or negative lookahead or lookbehind +assertion. However, it must be a traditional atomic assertion, not one of the +.\" HTML +.\" +non-atomic assertions. +.\" +.P +Consider this pattern, again containing non-significant white space, and with +the two alternatives on the second line: +.sp + (?(?=[^a-z]*[a-z]) + \ed{2}-[a-z]{3}-\ed{2} | \ed{2}-\ed{2}-\ed{2} ) +.sp +The condition is a positive lookahead assertion that matches an optional +sequence of non-letters followed by a letter. In other words, it tests for the +presence of at least one letter in the subject. If a letter is found, the +subject is matched against the first alternative; otherwise it is matched +against the second. This pattern matches strings in one of the two forms +dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. +.P +When an assertion that is a condition contains capture groups, any +capturing that occurs in a matching branch is retained afterwards, for both +positive and negative assertions, because matching always continues after the +assertion, whether it succeeds or fails. (Compare non-conditional assertions, +for which captures are retained only for positive assertions that succeed.) +. +. +.\" HTML +.SH COMMENTS +.rs +.sp +There are two ways of including comments in patterns that are processed by +PCRE2. In both cases, the start of the comment must not be in a character +class, nor in the middle of any other sequence of related characters such as +(?: or a group name or number. The characters that make up a comment play +no part in the pattern matching. +.P +The sequence (?# marks the start of a comment that continues up to the next +closing parenthesis. Nested parentheses are not permitted. If the +PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character +also introduces a comment, which in this case continues to immediately after +the next newline character or character sequence in the pattern. Which +characters are interpreted as newlines is controlled by an option passed to the +compiling function or by a special sequence at the start of the pattern, as +described in the section entitled +.\" HTML +.\" +"Newline conventions" +.\" +above. Note that the end of this type of comment is a literal newline sequence +in the pattern; escape sequences that happen to represent a newline do not +count. For example, consider this pattern when PCRE2_EXTENDED is set, and the +default newline convention (a single linefeed character) is in force: +.sp + abc #comment \en still comment +.sp +On encountering the # character, \fBpcre2_compile()\fP skips along, looking for +a newline in the pattern. The sequence \en is still literal at this stage, so +it does not terminate the comment. Only an actual character with the code value +0x0a (the default newline) does so. +. +. +.\" HTML +.SH "RECURSIVE PATTERNS" +.rs +.sp +Consider the problem of matching a string in parentheses, allowing for +unlimited nested parentheses. Without the use of recursion, the best that can +be done is to use a pattern that matches up to some fixed depth of nesting. It +is not possible to handle an arbitrary nesting depth. +.P +For some time, Perl has provided a facility that allows regular expressions to +recurse (amongst other things). It does this by interpolating Perl code in the +expression at run time, and the code can refer to the expression itself. A Perl +pattern using code interpolation to solve the parentheses problem can be +created like this: +.sp + $re = qr{\e( (?: (?>[^()]+) | (?p{$re}) )* \e)}x; +.sp +The (?p{...}) item interpolates Perl code at run time, and in this case refers +recursively to the pattern in which it appears. +.P +Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it +supports special syntax for recursion of the entire pattern, and also for +individual capture group recursion. After its introduction in PCRE1 and Python, +this kind of recursion was subsequently introduced into Perl at release 5.10. +.P +A special item that consists of (? followed by a number greater than zero and a +closing parenthesis is a recursive subroutine call of the capture group of the +given number, provided that it occurs inside that group. (If not, it is a +.\" HTML +.\" +non-recursive subroutine +.\" +call, which is described in the next section.) The special item (?R) or (?0) is +a recursive call of the entire regular expression. +.P +This PCRE2 pattern solves the nested parentheses problem (assume the +PCRE2_EXTENDED option is set so that white space is ignored): +.sp + \e( ( [^()]++ | (?R) )* \e) +.sp +First it matches an opening parenthesis. Then it matches any number of +substrings which can either be a sequence of non-parentheses, or a recursive +match of the pattern itself (that is, a correctly parenthesized substring). +Finally there is a closing parenthesis. Note the use of a possessive quantifier +to avoid backtracking into sequences of non-parentheses. +.P +If this were part of a larger pattern, you would not want to recurse the entire +pattern, so instead you could use this: +.sp + ( \e( ( [^()]++ | (?1) )* \e) ) +.sp +We have put the pattern into parentheses, and caused the recursion to refer to +them instead of the whole pattern. +.P +In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. Instead of (?1) in the +pattern above you can write (?-2) to refer to the second most recently opened +parentheses preceding the recursion. In other words, a negative number counts +capturing parentheses leftwards from the point at which it is encountered. +.P +Be aware however, that if +.\" HTML +.\" +duplicate capture group numbers +.\" +are in use, relative references refer to the earliest group with the +appropriate number. Consider, for example: +.sp + (?|(a)|(b)) (c) (?-2) +.sp +The first two capture groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +.P +It is also possible to refer to subsequent capture groups, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +.\" HTML +.\" +non-recursive subroutine +.\" +calls, as described in the next section. +.P +An alternative approach is to use named parentheses. The Perl syntax for this +is (?&name); PCRE1's earlier syntax (?P>name) is also supported. We could +rewrite the above example as follows: +.sp + (? \e( ( [^()]++ | (?&pn) )* \e) ) +.sp +If there is more than one group with the same name, the earliest one is +used. +.P +The example pattern that we have been looking at contains nested unlimited +repeats, and so the use of a possessive quantifier for matching strings of +non-parentheses is important when applying the pattern to strings that do not +match. For example, when this pattern is applied to +.sp + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() +.sp +it yields "no match" quickly. However, if a possessive quantifier is not used, +the match runs for a very long time indeed because there are so many different +ways the + and * repeats can carve up the subject, and all have to be tested +before failure can be reported. +.P +At the end of a match, the values of capturing parentheses are those from +the outermost level. If you want to obtain intermediate values, a callout +function can be used (see below and the +.\" HREF +\fBpcre2callout\fP +.\" +documentation). If the pattern above is matched against +.sp + (ab(cd)ef) +.sp +the value for the inner capturing parentheses (numbered 2) is "ef", which is +the last value taken on at the top level. If a capture group is not matched at +the top level, its final captured value is unset, even if it was (temporarily) +set at a deeper level during the matching process. +.P +Do not confuse the (?R) item with the condition (R), which tests for recursion. +Consider this pattern, which matches text in angle brackets, allowing for +arbitrary nesting. Only digits are allowed in nested brackets (that is, when +recursing), whereas any characters are permitted at the outer level. +.sp + < (?: (?(R) \ed++ | [^<>]*+) | (?R)) * > +.sp +In this pattern, (?(R) is the start of a conditional group, with two different +alternatives for the recursive and non-recursive cases. The (?R) item is the +actual recursive call. +. +. +.\" HTML +.SS "Differences in recursion processing between PCRE2 and Perl" +.rs +.sp +Some former differences between PCRE2 and Perl no longer exist. +.P +Before release 10.30, recursion processing in PCRE2 differed from Perl in that +a recursive subroutine call was always treated as an atomic group. That is, +once it had matched some of the subject string, it was never re-entered, even +if it contained untried alternatives and there was a subsequent matching +failure. (Historical note: PCRE implemented recursion before Perl did.) +.P +Starting with release 10.30, recursive subroutine calls are no longer treated +as atomic. That is, they can be re-entered to try unused alternatives if there +is a matching failure later in the pattern. This is now compatible with the way +Perl works. If you want a subroutine call to be atomic, you must explicitly +enclose it in an atomic group. +.P +Supporting backtracking into recursions simplifies certain types of recursive +pattern. For example, this pattern matches palindromic strings: +.sp + ^((.)(?1)\e2|.?)$ +.sp +The second branch in the group matches a single central character in the +palindrome when there are an odd number of characters, or nothing when there +are an even number of characters, but in order to work it has to be able to try +the second case when the rest of the pattern match fails. If you want to match +typical palindromic phrases, the pattern has to ignore all non-word characters, +which can be done like this: +.sp + ^\eW*+((.)\eW*+(?1)\eW*+\e2|\eW*+.?)\eW*+$ +.sp +If run with the PCRE2_CASELESS option, this pattern matches phrases such as "A +man, a plan, a canal: Panama!". Note the use of the possessive quantifier *+ to +avoid backtracking into sequences of non-word characters. Without this, PCRE2 +takes a great deal longer (ten times or more) to match typical phrases, and +Perl takes so long that you think it has gone into a loop. +.P +Another way in which PCRE2 and Perl used to differ in their recursion +processing is in the handling of captured values. Formerly in Perl, when a +group was called recursively or as a subroutine (see the next section), it +had no access to any values that were captured outside the recursion, whereas +in PCRE2 these values can be referenced. Consider this pattern: +.sp + ^(.)(\e1|a(?2)) +.sp +This pattern matches "bab". The first capturing parentheses match "b", then in +the second group, when the backreference \e1 fails to match "b", the second +alternative matches "a" and then recurses. In the recursion, \e1 does now match +"b" and so the whole match succeeds. This match used to fail in Perl, but in +later versions (I tried 5.024) it now works. +. +. +.\" HTML +.SH "GROUPS AS SUBROUTINES" +.rs +.sp +If the syntax for a recursive group call (either by number or by name) is used +outside the parentheses to which it refers, it operates a bit like a subroutine +in a programming language. More accurately, PCRE2 treats the referenced group +as an independent subpattern which it tries to match at the current matching +position. The called group may be defined before or after the reference. A +numbered reference can be absolute or relative, as in these examples: +.sp + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... +.sp +An earlier example pointed out that the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If instead the pattern +.sp + (sens|respons)e and (?1)ibility +.sp +is used, it does match "sense and responsibility" as well as the other two +strings. Another example is given in the discussion of DEFINE above. +.P +Like recursions, subroutine calls used to be treated as atomic, but this +changed at PCRE2 release 10.30, so backtracking into subroutine calls can now +occur. However, any capturing parentheses that are set during the subroutine +call revert to their previous values afterwards. +.P +Processing options such as case-independence are fixed when a group is +defined, so if it is used as a subroutine, such options cannot be changed for +different calls. For example, consider this pattern: +.sp + (abc)(?i:(?-1)) +.sp +It matches "abcabc". It does not match "abcABC" because the change of +processing option does not affect the called group. +.P +The behaviour of +.\" HTML +.\" +backtracking control verbs +.\" +in groups when called as subroutines is described in the section entitled +.\" HTML +.\" +"Backtracking verbs in subroutines" +.\" +below. +. +. +.\" HTML +.SH "ONIGURUMA SUBROUTINE SYNTAX" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for calling a group as a subroutine, possibly recursively. Here are two +of the examples used above, rewritten using this syntax: +.sp + (? \e( ( (?>[^()]+) | \eg )* \e) ) + (sens|respons)e and \eg'1'ibility +.sp +PCRE2 supports an extension to Oniguruma: if a number is preceded by a +plus or a minus sign it is taken as a relative reference. For example: +.sp + (abc)(?i:\eg<-1>) +.sp +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a backreference; the latter is a subroutine call. +. +. +.SH CALLOUTS +.rs +.sp +Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl +code to be obeyed in the middle of matching a regular expression. This makes it +possible, amongst other things, to extract different substrings that match the +same pair of parentheses when there is a repetition. +.P +PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl +code. The feature is called "callout". The caller of PCRE2 provides an external +function by putting its entry point in a match context using the function +\fBpcre2_set_callout()\fP, and then passing that context to \fBpcre2_match()\fP +or \fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout +entry point is set to NULL, callouts are disabled. +.P +Within a regular expression, (?C) indicates a point at which the external +function is to be called. There are two kinds of callout: those with a +numerical argument and those with a string argument. (?C) on its own with no +argument is treated as (?C0). A numerical argument allows the application to +distinguish between different callouts. String arguments were added for release +10.20 to make it possible for script languages that use PCRE2 to embed short +scripts within patterns in a similar way to Perl. +.P +During matching, when PCRE2 reaches a callout point, the external function is +called. It is provided with the number or string argument of the callout, the +position in the pattern, and one item of data that is also set in the match +block. The callout function may cause matching to proceed, to backtrack, or to +fail. +.P +By default, PCRE2 implements a number of optimizations at matching time, and +one side-effect is that sometimes callouts are skipped. If you need all +possible callouts to happen, you need to set options that disable the relevant +optimizations. More details, including a complete description of the +programming interface to the callout function, are given in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +. +. +.SS "Callouts with numerical arguments" +.rs +.sp +If you just want to have a means of identifying different callout points, put a +number less than 256 after the letter C. For example, this pattern has two +callout points: +.sp + (?C1)abc(?C2)def +.sp +If the PCRE2_AUTO_CALLOUT flag is passed to \fBpcre2_compile()\fP, numerical +callouts are automatically installed before each item in the pattern. They are +all numbered 255. If there is a conditional group in the pattern whose +condition is an assertion, an additional callout is inserted just before the +condition. An explicit callout may also be set at this position, as in this +example: +.sp + (?(?C9)(?=a)abc|def) +.sp +Note that this applies only to assertion conditions, not to other types of +condition. +. +. +.SS "Callouts with string arguments" +.rs +.sp +A delimited string may be used instead of a number as a callout argument. The +starting delimiter must be one of ` ' " ^ % # $ { and the ending delimiter is +the same as the start, except for {, where the ending delimiter is }. If the +ending delimiter is needed within the string, it must be doubled. For +example: +.sp + (?C'ab ''c'' d')xyz(?C{any text})pqr +.sp +The doubling is removed before the string is passed to the callout function. +. +. +.\" HTML +.SH "BACKTRACKING CONTROL" +.rs +.sp +There are a number of special "Backtracking Control Verbs" (to use Perl's +terminology) that modify the behaviour of backtracking during matching. They +are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form, +and may behave differently depending on whether or not a name argument is +present. The names are not required to be unique within the pattern. +.P +By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +.P +When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \eQ, \eE, and sequences such as +\ex{100} that define character code points. Character type escapes such as \ed +are faulted. +.P +A closing parenthesis can be included in a name either as \e) or between \eQ +and \eE. In addition to backslash processing, if the PCRE2_EXTENDED or +PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is +skipped, and #-comments are recognized, exactly as in the rest of the pattern. +PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless +PCRE2_ALT_VERBNAMES is also set. +.P +The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern. Except for +(*ACCEPT), they may not be quantified. +.P +Since these verbs are specifically related to backtracking, most of them can be +used only when the pattern is to be matched using the traditional matching +function, because that uses a backtracking algorithm. With the exception of +(*FAIL), which behaves like a failing negative assertion, the backtracking +control verbs cause an error if encountered by the DFA matching function. +.P +The behaviour of these verbs in +.\" HTML +.\" +repeated groups, +.\" +.\" HTML +.\" +assertions, +.\" +and in +.\" HTML +.\" +capture groups called as subroutines +.\" +(whether or not recursively) is documented below. +. +. +.\" HTML +.SS "Optimizations that affect backtracking verbs" +.rs +.sp +PCRE2 contains some optimizations that are used to speed up matching by running +some checks at the start of each match attempt. For example, it may know the +minimum length of matching subject, or that a particular character must be +present. When one of these optimizations bypasses the running of a match, any +included backtracking verbs will not, of course, be processed. You can suppress +the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option +when calling \fBpcre2_compile()\fP, or by starting the pattern with +(*NO_START_OPT). There is more discussion of this option in the section +entitled +.\" HTML +.\" +"Compiling a pattern" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +Experiments with Perl suggest that it too has similar optimizations, and like +PCRE2, turning them off can change the result of a match. +. +. +.\" HTML +.SS "Verbs that act immediately" +.rs +.sp +The following verbs act as soon as they are encountered. +.sp + (*ACCEPT) or (*ACCEPT:NAME) +.sp +This verb causes the match to end successfully, skipping the remainder of the +pattern. However, when it is inside a capture group that is called as a +subroutine, only that group is ended successfully. Matching then continues +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +assertion succeeds; in a negative assertion, the assertion fails. +.P +If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For +example: +.sp + A((?:A|B(*ACCEPT)|C)D) +.sp +This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by +the outer parentheses. +.P +(*ACCEPT) is the only backtracking verb that is allowed to be quantified +because an ungreedy quantification with a minimum of zero acts only when a +backtrack happens. Consider, for example, +.sp + (A(*ACCEPT)??B)C +.sp +where A, B, and C may be complex expressions. After matching "A", the matcher +processes "BC"; if that fails, causing a backtrack, (*ACCEPT) is triggered and +the match succeeds. In both cases, all but C is captured. Whereas (*COMMIT) +(see below) means "fail on backtrack", a repeated (*ACCEPT) of this type means +"succeed on backtrack". +.P +\fBWarning:\fP (*ACCEPT) should not be used within a script run group, because +it causes an immediate exit from the group, bypassing the script run checking. +.sp + (*FAIL) or (*FAIL:NAME) +.sp +This verb causes a matching failure, forcing backtracking to occur. It may be +abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl +documentation notes that it is probably useful only when combined with (?{}) or +(??{}). Those are, of course, Perl features that are not present in PCRE2. The +nearest equivalent is the callout feature, as for example in this pattern: +.sp + a+(?C)(*FAIL) +.sp +A match with the string "aaaa" always fails, but the callout is taken before +each backtrack happens (in this example, 10 times). +.P +(*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is recorded just before +the verb acts. +. +. +.SS "Recording which path was taken" +.rs +.sp +There is one verb whose main purpose is to track how a match was arrived at, +though it also has a secondary use in conjunction with advancing the match +starting point (see (*SKIP) below). +.sp + (*MARK:NAME) or (*:NAME) +.sp +A name is always required with this verb. For all the other backtracking +control verbs, a NAME argument is optional. +.P +When a match succeeds, the name of the last-encountered mark name on the +matching path is passed back to the caller as described in the section entitled +.\" HTML +.\" +"Other information about the match" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. This applies to all instances of (*MARK) and other verbs, +including those inside assertions and atomic groups. However, there are +differences in those cases when (*MARK) is used in conjunction with (*SKIP) as +described below. +.P +The mark name that was last encountered on the matching path is passed back. A +verb without a NAME argument is ignored for this purpose. Here is an example of +\fBpcre2test\fP output, where the "mark" modifier requests the retrieval and +outputting of (*MARK) data: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B +.sp +The (*MARK) name is tagged with "MK:" in this output, and in this example it +indicates which of the two alternatives matched. This is a more efficient way +of obtaining this information than putting each alternative in its own +capturing parentheses. +.P +If a verb with a name is encountered in a positive assertion that is true, the +name is recorded and passed back if it is the last-encountered. This does not +happen for negative assertions or failing positive assertions. +.P +After a partial match or a failed match, the last encountered name in the +entire match process is returned. For example: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XP + No match, mark = B +.sp +Note that in this unanchored example the mark is retained from the match +attempt that started at the letter "X" in the subject. Subsequent match +attempts starting at "P" and then with an empty string do not get as far as the +(*MARK) item, but nevertheless do not reset it. +.P +If you are interested in (*MARK) values after failed matches, you should +probably set the PCRE2_NO_START_OPTIMIZE option +.\" HTML +.\" +(see above) +.\" +to ensure that the match is always attempted. +. +. +.SS "Verbs that act after backtracking" +.rs +.sp +The following verbs do nothing when they are encountered. Matching continues +with what follows, but if there is a subsequent match failure, causing a +backtrack to the verb, a failure is forced. That is, backtracking cannot pass +to the left of the verb. However, when one of these verbs appears inside an +atomic group or in a lookaround assertion that is true, its effect is confined +to that group, because once the group has been matched, there is never any +backtracking into it. Backtracking from beyond an assertion or an atomic group +ignores the entire group, and seeks a preceding backtracking point. +.P +These verbs differ in exactly what kind of failure occurs when backtracking +reaches them. The behaviour described below is what happens when the verb is +not in a subroutine or an assertion. Subsequent sections cover these special +cases. +.sp + (*COMMIT) or (*COMMIT:NAME) +.sp +This verb causes the whole match to fail outright if there is a later matching +failure that causes backtracking to reach it. Even if the pattern is +unanchored, no further attempts to find a match by advancing the starting point +take place. If (*COMMIT) is the only backtracking verb that is encountered, +once it has been passed \fBpcre2_match()\fP is committed to finding a match at +the current starting point, or not at all. For example: +.sp + a+(*COMMIT)b +.sp +This matches "xxaab" but not "aacaab". It can be thought of as a kind of +dynamic anchor, or "I've started, so I must finish." +.P +The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names that are set with +(*MARK), ignoring those set by any of the other backtracking verbs. +.P +If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +match does not always guarantee that a match must be at this starting point. +.P +Note that (*COMMIT) at the start of a pattern is not the same as an anchor, +unless PCRE2's start-of-match optimizations are turned off, as shown in this +output from \fBpcre2test\fP: +.sp + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> + re> /(*COMMIT)abc/no_start_optimize + data> xyzabc + No match +.sp +For the first pattern, PCRE2 knows that any match must start with "a", so the +optimization skips along the subject to "a" before applying the pattern to the +first set of data. The match attempt then succeeds. The second pattern disables +the optimization that skips along to the first character. The pattern is now +applied starting at "x", and so the (*COMMIT) causes the match to fail without +trying any other starting points. +.sp + (*PRUNE) or (*PRUNE:NAME) +.sp +This verb causes the match to fail at the current starting position in the +subject if there is a later matching failure that causes backtracking to reach +it. If the pattern is unanchored, the normal "bumpalong" advance to the next +starting character then happens. Backtracking can occur as usual to the left of +(*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but +if there is no match to the right, backtracking cannot cross (*PRUNE). In +simple cases, the use of (*PRUNE) is just an alternative to an atomic group or +possessive quantifier, but there are some uses of (*PRUNE) that cannot be +expressed in any other way. In an anchored pattern (*PRUNE) has the same effect +as (*COMMIT). +.P +The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +.sp + (*SKIP) +.sp +This verb, when given without a name, is like (*PRUNE), except that if the +pattern is unanchored, the "bumpalong" advance is not to the next character, +but to the position in the subject where (*SKIP) was encountered. (*SKIP) +signifies that whatever text was matched leading up to it cannot be part of a +successful match if there is a later mismatch. Consider: +.sp + a+(*SKIP)b +.sp +If the subject is "aaaac...", after the first match attempt fails (starting at +the first character in the string), the starting point skips on to start the +next attempt at "c". Note that a possessive quantifier does not have the same +effect as this example; although it would suppress backtracking during the +first match attempt, the second attempt would start at the second character +instead of skipping on to "c". +.P +If (*SKIP) is used to specify a new starting position that is the same as the +starting position of the current match, or (by being inside a lookbehind) +earlier, the position specified by (*SKIP) is ignored, and instead the normal +"bumpalong" occurs. +.sp + (*SKIP:NAME) +.sp +When (*SKIP) has an associated name, its behaviour is modified. When such a +(*SKIP) is triggered, the previous path through the pattern is searched for the +most recent (*MARK) that has the same name. If one is found, the "bumpalong" +advance is to the subject position that corresponds to that (*MARK) instead of +to where (*SKIP) was encountered. If no (*MARK) with a matching name is found, +the (*SKIP) is ignored. +.P +The search for a (*MARK) name uses the normal backtracking mechanism, which +means that it does not see (*MARK) settings that are inside atomic groups or +assertions, because they are never re-entered by backtracking. Compare the +following \fBpcre2test\fP examples: +.sp + re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: a + 1: a + data: + re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: b + 1: b +.sp +In the first example, the (*MARK) setting is in an atomic group, so it is not +seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows +the second branch of the pattern to be tried at the first character position. +In the second example, the (*MARK) setting is not in an atomic group. This +allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new +matching attempt to start at the second character. This time, the (*MARK) is +never seen because "a" does not match "b", so the matcher immediately jumps to +the second branch of the pattern. +.P +Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +names that are set by other backtracking verbs. +.sp + (*THEN) or (*THEN:NAME) +.sp +This verb causes a skip to the next innermost alternative when backtracking +reaches it. That is, it cancels any further backtracking within the current +alternative. Its name comes from the observation that it can be used for a +pattern-based if-then-else block: +.sp + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... +.sp +If the COND1 pattern matches, FOO is tried (and possibly further items after +the end of the group if FOO succeeds); on failure, the matcher skips to the +second alternative and tries COND2, without backtracking into COND1. If that +succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no +more alternatives, so there is a backtrack to whatever came before the entire +group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). +.P +The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +.P +A group that does not contain a | character is just a part of the enclosing +alternative; it is not a nested alternation with only one alternative. The +effect of (*THEN) extends beyond such a group to the enclosing alternative. +Consider this pattern, where A, B, etc. are complex pattern fragments that do +not contain any | characters at this level: +.sp + A (B(*THEN)C) | D +.sp +If A and B are matched, but there is a failure in C, matching does not +backtrack into A; instead it moves to the next alternative, that is, D. +However, if the group containing (*THEN) is given an alternative, it +behaves differently: +.sp + A (B(*THEN)C | (*FAIL)) | D +.sp +The effect of (*THEN) is now confined to the inner group. After a failure in C, +matching moves to (*FAIL), which causes the whole group to fail because there +are no more alternatives to try. In this case, matching does backtrack into A. +.P +Note that a conditional group is not considered as having two alternatives, +because only one is ever used. In other words, the | character in a conditional +group has a different meaning. Ignoring white space, consider: +.sp + ^.*? (?(?=a) a | b(*THEN)c ) +.sp +If the subject is "ba", this pattern does not match. Because .*? is ungreedy, +it initially matches zero characters. The condition (?=a) then fails, the +character "b" is matched, but "c" is not. At this point, matching does not +backtrack to .*? as might perhaps be expected from the presence of the | +character. The conditional group is part of the single alternative that +comprises the whole pattern, and so the match fails. (If there was a backtrack +into .*?, allowing it to match "b", the match would succeed.) +.P +The verbs just described provide four different "strengths" of control when +subsequent matching fails. (*THEN) is the weakest, carrying on the match at the +next alternative. (*PRUNE) comes next, failing the match at the current +starting position, but allowing an advance to the next character (for an +unanchored pattern). (*SKIP) is similar, except that the advance may be more +than one character. (*COMMIT) is the strongest, causing the entire match to +fail. +. +. +.SS "More than one backtracking verb" +.rs +.sp +If more than one backtracking verb is present in a pattern, the one that is +backtracked onto first acts. For example, consider this pattern, where A, B, +etc. are complex pattern fragments: +.sp + (A(*COMMIT)B(*THEN)C|ABD) +.sp +If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +the next alternative (ABD) to be tried. This behaviour is consistent, but is +not always the same as Perl's. It means that if two or more backtracking verbs +appear in succession, all but the last of them has no effect. Consider this +example: +.sp + ...(*COMMIT)(*PRUNE)... +.sp +If there is a matching failure to the right, backtracking onto (*PRUNE) causes +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). +. +. +.\" HTML +.SS "Backtracking verbs in repeated groups" +.rs +.sp +PCRE2 sometimes differs from Perl in its handling of backtracking verbs in +repeated groups. For example, consider: +.sp + /(a(*COMMIT)b)+ac/ +.sp +If the subject is "abac", Perl matches unless its optimizations are disabled, +but PCRE2 always fails because the (*COMMIT) in the second repeat of the group +acts. +. +. +.\" HTML +.SS "Backtracking verbs in assertions" +.rs +.sp +(*FAIL) in any assertion has its normal effect: it forces an immediate +backtrack. The behaviour of the other backtracking verbs depends on whether or +not the assertion is standalone or acting as the condition in a conditional +group. +.P +(*ACCEPT) in a standalone positive assertion causes the assertion to succeed +without any further processing; captured strings and a mark name (if set) are +retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to +fail without any further processing; captured substrings and any mark name are +discarded. +.P +If the assertion is a condition, (*ACCEPT) causes the condition to be true for +a positive assertion and false for a negative one; captured substrings are +retained in both cases. +.P +The remaining verbs act only when a later failure causes a backtrack to +reach them. This means that, for the Perl-compatible assertions, their effect +is confined to the assertion, because Perl lookaround assertions are atomic. A +backtrack that occurs after such an assertion is complete does not jump back +into the assertion. Note in particular that a (*MARK) name that is set in an +assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. +.P +PCRE2 now supports non-atomic positive assertions, as described in the section +entitled +.\" HTML +.\" +"Non-atomic assertions" +.\" +above. These assertions must be standalone (not used as conditions). They are +not Perl-compatible. For these assertions, a later backtrack does jump back +into the assertion, and therefore verbs such as (*COMMIT) can be triggered by +backtracks from later in the pattern. +.P +The effect of (*THEN) is not allowed to escape beyond an assertion. If there +are no more branches to try, (*THEN) causes a positive assertion to be false, +and a negative assertion to be true. +.P +The other backtracking verbs are not treated specially if they appear in a +standalone positive assertion. In a conditional positive assertion, +backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE) +causes the condition to be false. However, for both standalone and conditional +negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes +the assertion to be true, without considering any further alternative branches. +. +. +.\" HTML +.SS "Backtracking verbs in subroutines" +.rs +.sp +These behaviours occur whether or not the group is called recursively. +.P +(*ACCEPT) in a group called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the +subroutine call. Perl documents this behaviour. Perl's treatment of the other +verbs in subroutines is different in some cases. +.P +(*FAIL) in a group called as a subroutine has its normal effect: it forces +an immediate backtrack. +.P +(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when +triggered by being backtracked to in a group called as a subroutine. There is +then a backtrack at the outer level. +.P +(*THEN), when triggered, skips to the next alternative in the innermost +enclosing group that has alternatives (its normal behaviour). However, if there +is no such group within the subroutine's group, the subroutine match fails and +there is a backtrack at the outer level. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2callout\fP(3), \fBpcre2matching\fP(3), +\fBpcre2syntax\fP(3), \fBpcre2\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 04 June 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2perform.3 b/doc/pcre2perform.3 new file mode 100644 index 0000000..72aa67a --- /dev/null +++ b/doc/pcre2perform.3 @@ -0,0 +1,260 @@ +.TH PCRE2PERFORM 3 "27 July 2022" "PCRE2 10.41" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 PERFORMANCE" +.rs +.sp +Two aspects of performance are discussed below: memory usage and processing +time. The way you express your pattern as a regular expression can affect both +of them. +. +.SH "COMPILED PATTERN MEMORY USAGE" +.rs +.sp +Patterns are compiled by PCRE2 into a reasonably efficient interpretive code, +so that most simple patterns do not use much memory for storing the compiled +version. However, there is one case where the memory usage of a compiled +pattern can be unexpectedly large. If a parenthesized group has a quantifier +with a minimum greater than 1 and/or a limited maximum, the whole group is +repeated in the compiled code. For example, the pattern +.sp + (abc|def){2,4} +.sp +is compiled as if it were +.sp + (abc|def)(abc|def)((abc|def)(abc|def)?)? +.sp +(Technical aside: It is done this way so that backtrack points within each of +the repetitions can be independently maintained.) +.P +For regular expressions whose quantifiers use only small numbers, this is not +usually a problem. However, if the numbers are large, and particularly if such +repetitions are nested, the memory usage can become an embarrassment. For +example, the very simple pattern +.sp + ((ab){1,1000}c){1,3} +.sp +uses over 50KiB when compiled using the 8-bit library. When PCRE2 is +compiled with its default internal pointer size of two bytes, the size limit on +a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and +this is reached with the above pattern if the outer repetition is increased +from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus +handle larger compiled patterns, but it is better to try to rewrite your +pattern to use less memory if you can. +.P +One way of reducing the memory usage for such patterns is to make use of +PCRE2's +.\" HTML +.\" +"subroutine" +.\" +facility. Re-writing the above pattern as +.sp + ((ab)(?2){0,999}c)(?1){0,2} +.sp +reduces the memory requirements to around 16KiB, and indeed it remains under +20KiB even with the outer repetition increased to 100. However, this kind of +pattern is not always exactly equivalent, because any captures within +subroutine calls are lost when the subroutine completes. If this is not a +problem, this kind of rewriting will allow you to process patterns that PCRE2 +cannot otherwise handle. The matching performance of the two different versions +of the pattern are roughly the same. (This applies from release 10.30 - things +were different in earlier releases.) +. +. +.SH "STACK AND HEAP USAGE AT RUN TIME" +.rs +.sp +From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP +uses very little system stack at run time. In earlier releases recursive +function calls could use a great deal of stack, and this could cause problems, +but this usage has been eliminated. Backtracking positions are now explicitly +remembered in memory frames controlled by the code. +.P +The size of each frame depends on the size of pointer variables and the number +of capturing parenthesized groups in the pattern being matched. On a 64-bit +system the frame size for a pattern with no captures is 128 bytes. For each +capturing group the size increases by 16 bytes. +.P +Until release 10.41, an initial 20KiB frames vector was allocated on the system +stack, but this still caused some issues for multi-thread applications where +each thread has a very small stack. From release 10.41 backtracking memory +frames are always held in heap memory. An initial heap allocation is obtained +the first time any match data block is passed to \fBpcre2_match()\fP. This is +remembered with the match data block and re-used if that block is used for +another match. It is freed when the match data block itself is freed. +.P +The size of the initial block is the larger of 20KiB or ten times the pattern's +frame size, unless the heap limit is less than this, in which case the heap +limit is used. If the initial block proves to be too small during matching, it +is replaced by a larger block, subject to the heap limit. The heap limit is +checked only when a new block is to be allocated. Reducing the heap limit +between calls to \fBpcre2_match()\fP with the same match data block does not +affect the saved block. +.P +In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive +function calls, but only for processing atomic groups, lookaround assertions, +and recursion within the pattern. The original version of the code used to +allocate quite large internal workspace vectors on the stack, which caused some +problems for some patterns in environments with small stacks. From release +10.32 the code for \fBpcre2_dfa_match()\fP has been re-factored to use heap +memory when necessary for internal workspace when recursing, though recursive +function calls are still used. +.P +The "match depth" parameter can be used to limit the depth of function +recursion, and the "match heap" parameter to limit heap memory in +\fBpcre2_dfa_match()\fP. +. +. +.SH "PROCESSING TIME" +.rs +.sp +Certain items in regular expression patterns are processed more efficiently +than others. It is more efficient to use a character class like [aeiou] than a +set of single-character alternatives such as (a|e|i|o|u). In general, the +simplest construction that provides the required behaviour is usually the most +efficient. Jeffrey Friedl's book contains a lot of useful general discussion +about optimizing regular expressions for efficient performance. This document +contains a few observations about PCRE2. +.P +Using Unicode character properties (the \ep, \eP, and \eX escapes) is slow, +because PCRE2 has to use a multi-stage table lookup whenever it needs a +character's property. If you can find an alternative pattern that does not use +character properties, it will probably be faster. +.P +By default, the escape sequences \eb, \ed, \es, and \ew, and the POSIX +character classes such as [:alpha:] do not use Unicode properties, partly for +backwards compatibility, and partly for performance reasons. However, you can +set the PCRE2_UCP option or start the pattern with (*UCP) if you want Unicode +character properties to be used. This can double the matching time for items +such as \ed, when matched with \fBpcre2_match()\fP; the performance loss is +less with a DFA matching function, and in both cases there is not much +difference for \eb. +.P +When a pattern begins with .* not in atomic parentheses, nor in parentheses +that are the subject of a backreference, and the PCRE2_DOTALL option is set, +the pattern is implicitly anchored by PCRE2, since it can match only at the +start of a subject string. If the pattern has multiple top-level branches, they +must all be anchorable. The optimization can be disabled by the +PCRE2_NO_DOTSTAR_ANCHOR option, and is automatically disabled if the pattern +contains (*PRUNE) or (*SKIP). +.P +If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, because the +dot metacharacter does not then match a newline, and if the subject string +contains newlines, the pattern may match from the character immediately +following one of them instead of from the very start. For example, the pattern +.sp + .*second +.sp +matches the subject "first\enand second" (where \en stands for a newline +character), with the match starting at the seventh character. In order to do +this, PCRE2 has to retry the match starting after every newline in the subject. +.P +If you are using such a pattern with subject strings that do not contain +newlines, the best performance is obtained by setting PCRE2_DOTALL, or starting +the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE2 +from having to scan along the subject looking for a newline to restart at. +.P +Beware of patterns that contain nested indefinite repeats. These can take a +long time to run when applied to a string that does not match. Consider the +pattern fragment +.sp + ^(a+)* +.sp +This can match "aaaa" in 16 different ways, and this number increases very +rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 +times, and for each of those cases other than 0 or 4, the + repeats can match +different numbers of times.) When the remainder of the pattern is such that the +entire match is going to fail, PCRE2 has in principle to try every possible +variation, and this can take an extremely long time, even for relatively short +strings. +.P +An optimization catches some of the more simple cases such as +.sp + (a+)*b +.sp +where a literal character follows. Before embarking on the standard matching +procedure, PCRE2 checks that there is a "b" later in the subject string, and if +there is not, it fails the match immediately. However, when there is no +following literal this optimization cannot be used. You can see the difference +by comparing the behaviour of +.sp + (a+)*\ed +.sp +with the pattern above. The former gives a failure almost instantly when +applied to a whole line of "a" characters, whereas the latter takes an +appreciable time with strings longer than about 20 characters. +.P +In many cases, the solution to this kind of performance issue is to use an +atomic group or a possessive quantifier. This can often reduce memory +requirements as well. As another example, consider this pattern: +.sp + ([^<]|<(?!inet))+ +.sp +It matches from wherever it starts until it encounters " +.\" +"The match context" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The \fBpcre2test\fP test program has a modifier called "find_limits" which, if +applied to a subject line, causes it to find the smallest limits that allow a +pattern to match. This is done by repeatedly matching with different limits. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 July 2022 +Copyright (c) 1997-2022 University of Cambridge. +.fi diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3 new file mode 100644 index 0000000..3709299 --- /dev/null +++ b/doc/pcre2posix.3 @@ -0,0 +1,348 @@ +.TH PCRE2POSIX 3 "19 January 2024" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SYNOPSIS" +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP, +.B " int \fIcflags\fP);" +.sp +.B int pcre2_regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP, +.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);" +.sp +.B "size_t pcre2_regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP," +.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);" +.sp +.B void pcre2_regfree(regex_t *\fIpreg\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This set of functions provides a POSIX-style API for the PCRE2 regular +expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit +and 32-bit libraries. See the +.\" HREF +\fBpcre2api\fP +.\" +documentation for a description of PCRE2's native API, which contains much +additional functionality. +.P +\fBIMPORTANT NOTE\fP: The functions described here are NOT thread-safe, and +should not be used in multi-threaded applications. They are also limited to +processing subjects that are not bigger than 2GB. Use the native API instead. +.P +These functions are wrapper functions that ultimately call the PCRE2 native +API. Their prototypes are defined in the \fBpcre2posix.h\fP header file, and +they all have unique names starting with \fBpcre2_\fP. However, the +\fBpcre2posix.h\fP header also contains macro definitions that convert the +standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This +means that a program can use the usual POSIX names without running the risk of +accidentally linking with POSIX functions from a different library. +.P +On Unix-like systems the PCRE2 POSIX library is called \fBlibpcre2-posix\fP, so +can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an +application. Because the POSIX functions call the native ones, it is also +necessary to add \fB-lpcre2-8\fP. +.P +On Windows systems, if you are linking to a DLL version of the library, it is +recommended that \fBPCRE2POSIX_SHARED\fP is defined before including the +\fBpcre2posix.h\fP header, as it will allow for a more efficient way to +invoke the functions by adding the \fB__declspec(dllimport)\fP decorator. +.P +Although they were not defined as prototypes in \fBpcre2posix.h\fP, releases +10.33 to 10.36 of the library contained functions with the POSIX names +\fBregcomp()\fP etc. These simply passed their arguments to the PCRE2 +functions. These functions were provided for backwards compatibility with +earlier versions of PCRE2, which had only POSIX names. However, this has proved +troublesome in situations where a program links with several libraries, some of +which use PCRE2's POSIX interface while others use the real POSIX functions. +For this reason, the POSIX names have been removed since release 10.37. +.P +Calling the header file \fBpcre2posix.h\fP avoids any conflict with other POSIX +libraries. It can, of course, be renamed or aliased as \fBregex.h\fP, which is +the "correct" name, if there is no clash. It provides two structure types, +\fIregex_t\fP for compiled internal forms, and \fIregmatch_t\fP for returning +captured substrings. It also defines some constants whose names start with +"REG_"; these are used for setting options and identifying error codes. +. +. +.SH "USING THE POSIX FUNCTIONS" +.rs +.sp +Note that these functions are just POSIX-style wrappers for PCRE2's native API. +They do not give POSIX regular expression behaviour, and they are not +thread-safe or even POSIX compatible. +.P +Those POSIX option bits that can reasonably be mapped to PCRE2 native options +have been implemented. In addition, the option REG_EXTENDED is defined with the +value zero. This has no effect, but since programs that are written to the +POSIX interface often use it, this makes it easier to slot in PCRE2 as a +replacement library. Other POSIX options are not even defined. +.P +There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface or to add BSD or GNU functionality. +.P +When PCRE2 is called via these functions, it is only the API that is POSIX-like +in style. The syntax and semantics of the regular expressions themselves are +still those of Perl, subject to the setting of various PCRE2 options, as +described below. "POSIX-like in style" means that the API approximates to the +POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding +domains it is probably even less compatible. +.P +The descriptions below use the actual names of the functions, but, as described +above, the standard POSIX names (without the \fBpcre2_\fP prefix) may also be +used. +. +. +.SH "COMPILING A PATTERN" +.rs +.sp +The function \fBpcre2_regcomp()\fP is called to compile a pattern into an +internal form. By default, the pattern is a C string terminated by a binary +zero (but see REG_PEND below). The \fIpreg\fP argument is a pointer to a +\fBregex_t\fP structure that is used as a base for storing information about +the compiled regular expression. It is also used for input when REG_PEND is +set. The \fBregex_t\fP structure used by \fBpcre2_regcomp()\fP is defined in +\fBpcre2posix.h\fP and is not the same as the structure used by other libraries +that provide POSIX-style matching. +.P +The argument \fIcflags\fP is either zero, or contains one or more of the bits +defined by the following macros: +.sp + REG_DOTALL +.sp +The PCRE2_DOTALL option is set when the regular expression is passed for +compilation to the native function. Note that REG_DOTALL is not part of the +POSIX standard. +.sp + REG_ICASE +.sp +The PCRE2_CASELESS option is set when the regular expression is passed for +compilation to the native function. +.sp + REG_NEWLINE +.sp +The PCRE2_MULTILINE option is set when the regular expression is passed for +compilation to the native function. Note that this does \fInot\fP mimic the +defined POSIX behaviour for REG_NEWLINE (see the following section). +.sp + REG_NOSPEC +.sp +The PCRE2_LITERAL option is set when the regular expression is passed for +compilation to the native function. This disables all meta characters in the +pattern, causing it to be treated as a literal string. The only other options +that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and +REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. +.sp + REG_NOSUB +.sp +When a pattern that is compiled with this flag is passed to +\fBpcre2_regexec()\fP for matching, the \fInmatch\fP and \fIpmatch\fP arguments +are ignored, and no captured strings are returned. Versions of the PCRE library +prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this +no longer happens because it disables the use of backreferences. +.sp + REG_PEND +.sp +If this option is set, the \fBreg_endp\fP field in the \fIpreg\fP structure +(which has the type const char *) must be set to point to the character beyond +the end of the pattern before calling \fBpcre2_regcomp()\fP. The pattern itself +may now contain binary zeros, which are treated as data characters. Without +REG_PEND, a binary zero terminates the pattern and the \fBre_endp\fP field is +ignored. This is a GNU extension to the POSIX standard and should be used with +caution in software intended to be portable to other systems. +.sp + REG_UCP +.sp +The PCRE2_UCP option is set when the regular expression is passed for +compilation to the native function. This causes PCRE2 to use Unicode properties +when matching \ed, \ew, etc., instead of just recognizing ASCII values. Note +that REG_UCP is not part of the POSIX standard. +.sp + REG_UNGREEDY +.sp +The PCRE2_UNGREEDY option is set when the regular expression is passed for +compilation to the native function. Note that REG_UNGREEDY is not part of the +POSIX standard. +.sp + REG_UTF +.sp +The PCRE2_UTF option is set when the regular expression is passed for +compilation to the native function. This causes the pattern itself and all data +strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF +is not part of the POSIX standard. +.P +In the absence of these flags, no options are passed to the native function. +This means that the regex is compiled with PCRE2 default semantics. In +particular, the way it handles newline characters in the subject string is the +Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only +\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way +newlines are matched by the dot metacharacter (they are not) or by a negative +class such as [^a] (they are). +.P +The yield of \fBpcre2_regcomp()\fP is zero on success, and non-zero otherwise. +The \fIpreg\fP structure is filled in on success, and one other member of the +structure (as well as \fIre_endp\fP) is public: \fIre_nsub\fP contains the +number of capturing subpatterns in the regular expression. Various error codes +are defined in the header file. +.P +NOTE: If the yield of \fBpcre2_regcomp()\fP is non-zero, you must not attempt +to use the contents of the \fIpreg\fP structure. If, for example, you pass it +to \fBpcre2_regexec()\fP, the result is undefined and your program is likely to +crash. +. +. +.SH "MATCHING NEWLINE CHARACTERS" +.rs +.sp +This area is not simple, because POSIX and Perl take different views of things. +It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was +never intended to be a POSIX engine. The following table lists the different +possibilities for matching newline characters in Perl and PCRE2: +.sp + Default Change with +.sp + . matches newline no PCRE2_DOTALL + newline matches [^a] yes not changeable + $ matches \en at end yes PCRE2_DOLLAR_ENDONLY + $ matches \en in middle no PCRE2_MULTILINE + ^ matches \en in middle no PCRE2_MULTILINE +.sp +This is the equivalent table for a POSIX-compatible pattern matcher: +.sp + Default Change with +.sp + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \en at end no REG_NEWLINE + $ matches \en in middle no REG_NEWLINE + ^ matches \en in middle no REG_NEWLINE +.sp +This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a]. +.P +Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling \fBpcre2_compile()\fP directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's \fBpcre2_regcomp()\fP function +causes PCRE2_MULTILINE to be passed to \fBpcre2_compile()\fP, and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY. +. +. +.SH "MATCHING A PATTERN" +.rs +.sp +The function \fBpcre2_regexec()\fP is called to match a compiled pattern +\fIpreg\fP against a given \fIstring\fP, which is by default terminated by a +zero byte (but see REG_STARTEND below), subject to the options in \fIeflags\fP. +These can be: +.sp + REG_NOTBOL +.sp +The PCRE2_NOTBOL option is set when calling the underlying PCRE2 matching +function. +.sp + REG_NOTEMPTY +.sp +The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 matching +function. Note that REG_NOTEMPTY is not part of the POSIX standard. However, +setting this option can give more POSIX-like behaviour in some situations. +.sp + REG_NOTEOL +.sp +The PCRE2_NOTEOL option is set when calling the underlying PCRE2 matching +function. +.sp + REG_STARTEND +.sp +When this option is set, the subject string starts at \fIstring\fP + +\fIpmatch[0].rm_so\fP and ends at \fIstring\fP + \fIpmatch[0].rm_eo\fP, which +should point to the first character beyond the string. There may be binary +zeros within the subject string, and indeed, using REG_STARTEND is the only +way to pass a subject string that contains a binary zero. +.P +Whatever the value of \fIpmatch[0].rm_so\fP, the offsets of the matched string +and any captured substrings are still given relative to the start of +\fIstring\fP itself. (Before PCRE2 release 10.30 these were given relative to +\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other +implementations.) +.P +This is a BSD extension, compatible with but not specified by IEEE Standard +1003.2 (POSIX.2), and should be used with caution in software intended to be +portable to other systems. Note that a non-zero \fIrm_so\fP does not imply +REG_NOTBOL; REG_STARTEND affects only the location and length of the string, +not how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL +are mutually exclusive; the error REG_INVARG is returned. +.P +If the pattern was compiled with the REG_NOSUB flag, no data about any matched +strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of +\fBpcre2_regexec()\fP are ignored (except possibly as input for REG_STARTEND). +.P +The value of \fInmatch\fP may be zero, and the value \fIpmatch\fP may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned. +.P +Otherwise, the portion of the string that was matched, and also any captured +substrings, are returned via the \fIpmatch\fP argument, which points to an +array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the +members \fIrm_so\fP and \fIrm_eo\fP. These contain the byte offset to the first +character of each substring and the offset to the first character after the end +of each substring, respectively. The 0th element of the vector relates to the +entire portion of \fIstring\fP that was matched; subsequent elements relate to +the capturing subpatterns of the regular expression. Unused entries in the +array have both structure members set to -1. +.P +\fIregmatch_t\fP as well as the \fIregoff_t\fP typedef it uses are defined in +\fBpcre2posix.h\fP and are not warranted to have the same size or layout as other +similarly named types from other libraries that provide POSIX-style matching. +.P +A successful match yields a zero return; various error codes are defined in the +header file, of which REG_NOMATCH is the "expected" failure code. +. +. +.SH "ERROR MESSAGES" +.rs +.sp +The \fBpcre2_regerror()\fP function maps a non-zero errorcode from either +\fBpcre2_regcomp()\fP or \fBpcre2_regexec()\fP to a printable message. If +\fIpreg\fP is not NULL, the error should have arisen from the use of that +structure. A message terminated by a binary zero is placed in \fIerrbuf\fP. If +the buffer is too short, only the first \fIerrbuf_size\fP - 1 characters of the +error message are used. The yield of the function is the size of buffer needed +to hold the whole message, including the terminating zero. This value is +greater than \fIerrbuf_size\fP if the message was truncated. +. +. +.SH MEMORY USAGE +.rs +.sp +Compiling a regular expression causes memory to be allocated and associated +with the \fIpreg\fP structure. The function \fBpcre2_regfree()\fP frees all +such memory, after which \fIpreg\fP may no longer be used as a compiled +expression. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 19 January 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2sample.3 b/doc/pcre2sample.3 new file mode 100644 index 0000000..1e93ba4 --- /dev/null +++ b/doc/pcre2sample.3 @@ -0,0 +1,99 @@ +.TH PCRE2SAMPLE 3 "02 February 2016" "PCRE2 10.22" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 SAMPLE PROGRAM" +.rs +.sp +A simple, complete demonstration program to get you started with using PCRE2 is +supplied in the file \fIpcre2demo.c\fP in the \fBsrc\fP directory in the PCRE2 +distribution. A listing of this program is given in the +.\" HREF +\fBpcre2demo\fP +.\" +documentation. If you do not have a copy of the PCRE2 distribution, you can +save this listing to re-create the contents of \fIpcre2demo.c\fP. +.P +The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings. +.P +If the -g option is given on the command line, the program then goes on to +check for further matches of the same regular expression in the same subject +string. The logic is a little bit tricky because of the possibility of matching +an empty string. Comments in the code explain what is going on. +.P +The code in \fBpcre2demo.c\fP is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +.P +If PCRE2 is installed in the standard include and library directories for your +operating system, you should be able to compile the demonstration program using +a command like this: +.sp + cc -o pcre2demo pcre2demo.c -lpcre2-8 +.sp +If PCRE2 is installed elsewhere, you may need to add additional options to the +command line. For example, on a Unix-like system that has PCRE2 installed in +\fI/usr/local\fP, you can compile the demonstration program using a command +like this: +.sp +.\" JOINSH + cc -o pcre2demo -I/usr/local/include pcre2demo.c \e + -L/usr/local/lib -lpcre2-8 +.sp +Once you have built the demonstration program, you can run simple tests like +this: +.sp + ./pcre2demo 'cat|dog' 'the cat sat on the mat' + ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' +.sp +Note that there is a much more comprehensive test program, called +.\" HREF +\fBpcre2test\fP, +.\" +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The +.\" HREF +\fBpcre2demo\fP +.\" +program is provided as a relatively simple coding example. +.P +If you try to run +.\" HREF +\fBpcre2demo\fP +.\" +when PCRE2 is not installed in the standard library directory, you may get an +error like this on some operating systems (e.g. Solaris): +.sp + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory +.sp +This is caused by the way shared library support works on those systems. You +need to add +.sp + -R/usr/local/lib +.sp +(for example) to the compile command to get round this problem. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 02 February 2016 +Copyright (c) 1997-2016 University of Cambridge. +.fi diff --git a/doc/pcre2serialize.3 b/doc/pcre2serialize.3 new file mode 100644 index 0000000..6fe2c64 --- /dev/null +++ b/doc/pcre2serialize.3 @@ -0,0 +1,198 @@ +.TH PCRE2SERIALIZE 3 "27 June 2018" "PCRE2 10.32" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS" +.rs +.sp +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.sp +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +.sp +If you are running an application that uses a large number of regular +expression patterns, it may be useful to store them in a precompiled form +instead of having to compile them every time the application is run. However, +if you are using the just-in-time optimization feature, it is not possible to +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library. +.P +Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET serialization. The serialized output is +really just a bytecode dump, which is why it can only be reloaded in the same +environment as the one that created it. Hence the restrictions mentioned above. +Applications that are not statically linked with a fixed version of PCRE2 must +be prepared to recompile patterns from their sources, in order to be immune to +PCRE2 upgrades. +. +. +.SH "SECURITY CONCERNS" +.rs +.sp +The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +\fBpcre2_serialize_decode()\fP is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. Corrupted data may cause +undefined results. For example, if the length field of a pattern in the +serialized data is corrupted, the deserializing code may read beyond the end of +the byte stream that is passed to it. +. +. +.SH "SAVING COMPILED PATTERNS" +.rs +.sp +Before compiled patterns can be saved they must be serialized, which in PCRE2 +means converting the pattern to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). For more details of character tables, see the +.\" HTML +.\" +section on locale support +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The function \fBpcre2_serialize_encode()\fP creates a serialized byte stream +from a list of compiled patterns. Its first two arguments specify the list, +being a pointer to a vector of pointers to compiled patterns, and the length of +the vector. The third and fourth arguments point to variables which are set to +point to the created byte stream and its length, respectively. The final +argument is a pointer to a general context, which can be used to specify custom +memory management functions. If this argument is NULL, \fBmalloc()\fP is used +to obtain memory for the byte stream. The yield of the function is the number +of serialized patterns, or one of the following negative error codes: +.sp + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL +.sp +PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +.P +Once a set of patterns has been serialized you can save the data in any +appropriate manner. Here is sample code that compiles two patterns and writes +them to a file. It assumes that the variable \fIfd\fP refers to a file that is +open for output. The error checking that should be present in a real +application has been omitted for simplicity. +.sp + int errorcode; + uint8_t *bytes; + PCRE2_SIZE erroroffset; + PCRE2_SIZE bytescount; + pcre2_code *list_of_codes[2]; + list_of_codes[0] = pcre2_compile("first pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + list_of_codes[1] = pcre2_compile("second pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes, + &bytescount, NULL); + errorcode = fwrite(bytes, 1, bytescount, fd); +.sp +Note that the serialized data is binary data that may contain any of the 256 +possible byte values. On systems that make a distinction between binary and +non-binary data, be sure that the file is opened for binary output. +.P +Serializing a set of patterns leaves the original data untouched, so they can +still be used for matching. Their memory must eventually be freed in the usual +way by calling \fBpcre2_code_free()\fP. When you have finished with the byte +stream, it too must be freed by calling \fBpcre2_serialize_free()\fP. If this +function is called with a NULL argument, it returns immediately without doing +anything. +. +. +.SH "RE-USING PRECOMPILED PATTERNS" +.rs +.sp +In order to re-use a set of saved patterns you must first make the serialized +byte stream available in main memory (for example, by reading from a file). The +management of this memory block is up to the application. You can use the +\fBpcre2_serialize_get_number_of_codes()\fP function to find out how many +compiled patterns are in the serialized data without actually decoding the +patterns: +.sp + uint8_t *bytes = ; + int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes); +.sp +The \fBpcre2_serialize_decode()\fP function reads a byte stream and recreates +the compiled patterns in new memory blocks, setting pointers to them in a +vector. The first two arguments are a pointer to a suitable vector and its +length, and the third argument points to a byte stream. The final argument is a +pointer to a general context, which can be used to specify custom memory +management functions for the decoded patterns. If this argument is NULL, +\fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte +stream is no longer needed and can be discarded. +.sp + pcre2_code *list_of_codes[2]; + uint8_t *bytes = ; + int32_t number_of_codes = + pcre2_serialize_decode(list_of_codes, 2, bytes, NULL); +.sp +If the vector is not large enough for all the patterns in the byte stream, it +is filled with those that fit, and the remainder are ignored. The yield of the +function is the number of decoded patterns, or one of the following negative +error codes: +.sp + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +Decoded patterns can be used for matching in the usual way, and must be freed +by calling \fBpcre2_code_free()\fP. However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to +arrange for its memory to be automatically freed when the last pattern is +freed, but there is no locking on this reference count. Therefore, if you want +to call \fBpcre2_code_free()\fP for these patterns in different threads, you +must arrange your own locking, and ensure that \fBpcre2_code_free()\fP cannot +be called by two threads at the same time. +.P +If a pattern was processed by \fBpcre2_jit_compile()\fP before being +serialized, the JIT data is discarded and so is no longer available after a +save/restore cycle. You can, however, process a restored pattern with +\fBpcre2_jit_compile()\fP if you wish. +. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 June 2018 +Copyright (c) 1997-2018 University of Cambridge. +.fi diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 new file mode 100644 index 0000000..6f4f7aa --- /dev/null +++ b/doc/pcre2syntax.3 @@ -0,0 +1,618 @@ +.TH PCRE2SYNTAX 3 "12 October 2023" "PCRE2 10.43" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" +.rs +.sp +The full syntax and semantics of the regular expressions that are supported by +PCRE2 are described in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. This document contains a quick-reference summary of the syntax. +. +. +.SH "QUOTING" +.rs +.sp + \ex where x is non-alphanumeric is a literal x + \eQ...\eE treat enclosed characters as literal +.sp +Note that white space inside \eQ...\eE is always treated as literal, even if +PCRE2_EXTENDED is set, causing most other white space to be ignored. +. +. +.SH "BRACED ITEMS" +.rs +.sp +With one exception, wherever brace characters { and } are required to enclose +data for constructions such as \eg{2} or \ek{name}, space and/or horizontal tab +characters that follow { or precede } are allowed and are ignored. In the case +of quantifiers, they may also appear before or after the comma. The exception +is \eu{...} which is not Perl-compatible and is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and +follows ECMAScript's behaviour. +. +. +.SH "ESCAPED CHARACTERS" +.rs +.sp +This table applies to ASCII and Unicode environments. An unrecognized escape +sequence causes an error. +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is a non-control ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en newline (hex 0A) + \er carriage return (hex 0D) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or backreference + \eo{ddd..} character with octal code ddd.. + \eN{U+hh..} character with Unicode code point hh.. (Unicode mode only) + \exhh character with hex code hh + \ex{hh..} character with hex code hh.. +.sp +If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the +following are also recognized: +.sp + \eU the character "U" + \euhhhh character with hex code hhhh + \eu{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX +.sp +When \ex is not followed by {, from zero to two hexadecimal digits are read, +but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be +recognized as a hexadecimal escape; otherwise it matches a literal "x". +Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits +or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it +matches a literal "u". +.P +Note that \e0dd is always an octal code. The treatment of backslash followed by +a non-zero digit is complicated; for details see the section +.\" HTML +.\" +"Non-printing characters" +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation, where details of escape processing in EBCDIC environments are +also given. \eN{U+hh..} is synonymous with \ex{hh..} in PCRE2 but is not +supported in EBCDIC environments. Note that \eN not followed by an opening +curly bracket has a different meaning (see below). +. +. +.SH "CHARACTER TYPES" +.rs +.sp + . any character except newline; + in dotall mode, any character whatsoever + \eC one code unit, even in UTF mode (best avoided) + \ed a decimal digit + \eD a character that is not a decimal digit + \eh a horizontal white space character + \eH a character that is not a horizontal white space character + \eN a character that is not a newline + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eR a newline sequence + \es a white space character + \eS a character that is not a white space character + \ev a vertical white space character + \eV a character that is not a vertical white space character + \ew a "word" character + \eW a "non-word" character + \eX a Unicode extended grapheme cluster +.sp +\eC is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \eC by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \eC permanently disabled. +.P +By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode +or in the 16-bit and 32-bit libraries. However, if locale-specific matching is +happening, \es and \ew may also match characters with code points in the range +128-255. If the PCRE2_UCP option is set, the behaviour of these escape +sequences is changed to use Unicode properties and they match many more +characters, but there are some option settings that can restrict individual +sequences to matching only ASCII characters. +.P +Property descriptions in \ep and \eP are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. +. +. +.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + Lc Ll, Lu, or Lt + L& Ll, Lu, or Lt +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +. +. +.SH "PCRE2 SPECIAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + Xan Alphanumeric: union of properties L and N + Xps POSIX space: property Z or tab, NL, VT, FF, CR + Xsp Perl space: property Z or tab, NL, VT, FF, CR + Xuc Universally-named character: one that can be + represented by a Universal Character Name + Xwd Perl word: property Xan or underscore +.sp +Perl and POSIX space are now the same. Perl added VT to its space character set +at release 5.18. +. +. +.SH "BINARY PROPERTIES FOR \ep AND \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +. +. +. +.SH "SCRIPT MATCHING WITH \ep AND \eP" +.rs +.sp +Many script names and their 4-letter abbreviations are recognized in +\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of +course). You can obtain a list of these scripts by running this command: +.sp + pcre2test -LS +. +. +. +.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP" +.rs +.sp + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space +. +. +.SH "CHARACTER CLASSES" +.rs +.sp + [...] positive character class + [^...] negative character class + [x-y] range (can be used for hex characters) + [[:xxx:]] positive POSIX named set + [[:^xxx:]] negative POSIX named set +.sp + alnum alphanumeric + alpha alphabetic + ascii 0-127 + blank space or tab + cntrl control character + digit decimal digit + graph printing, excluding space + lower lower case letter + print printing, including space + punct printing, excluding alphanumeric + space white space + upper upper case letter + word same as \ew + xdigit hexadecimal digit +.sp +In PCRE2, POSIX character set names recognize only ASCII characters by default, +but some of them use Unicode properties if PCRE2_UCP is set. You can use +\eQ...\eE inside a character class. +. +. +.SH "QUANTIFIERS" +.rs +.sp + ? 0 or 1, greedy + ?+ 0 or 1, possessive + ?? 0 or 1, lazy + * 0 or more, greedy + *+ 0 or more, possessive + *? 0 or more, lazy + + 1 or more, greedy + ++ 1 or more, possessive + +? 1 or more, lazy + {n} exactly n + {n,m} at least n, no more than m, greedy + {n,m}+ at least n, no more than m, possessive + {n,m}? at least n, no more than m, lazy + {n,} n or more, greedy + {n,}+ n or more, possessive + {n,}? n or more, lazy + {,m} zero up to m, greedy + {,m}+ zero up to m, possessive + {,m}? zero up to m, lazy +. +. +.SH "ANCHORS AND SIMPLE ASSERTIONS" +.rs +.sp + \eb word boundary + \eB not a word boundary + ^ start of subject + also after an internal newline in multiline mode + (after any newline if PCRE2_ALT_CIRCUMFLEX is set) + \eA start of subject + $ end of subject + also before newline at end of subject + also before internal newline in multiline mode + \eZ end of subject + also before newline at end of subject + \ez end of subject + \eG first matching position in subject +. +. +.SH "REPORTED MATCH POINT SETTING" +.rs +.sp + \eK set reported start of match +.sp +From release 10.38 \eK is not permitted by default in lookaround assertions, +for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option is set, the previous behaviour is re-enabled. When this option is set, +\eK is honoured in positive assertions, but ignored in negative ones. +. +. +.SH "ALTERNATION" +.rs +.sp + expr|expr|expr... +. +. +.SH "CAPTURING" +.rs +.sp + (...) capture group + (?...) named capture group (Perl) + (?'name'...) named capture group (Perl) + (?P...) named capture group (Python) + (?:...) non-capture group + (?|...) non-capture group; reset group numbers for + capture groups in each alternative +.sp +In non-UTF modes, names may contain underscores and ASCII letters and digits; +in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In +both cases, a name must not start with a digit. +. +. +.SH "ATOMIC GROUPS" +.rs +.sp + (?>...) atomic non-capture group + (*atomic:...) atomic non-capture group +. +. +.SH "COMMENT" +.rs +.sp + (?#....) comment (not nestable) +. +. +.SH "OPTION SETTING" +.rs +Changes of these options within a group are automatically cancelled at the end +of the group. +.sp + (?a) all ASCII options + (?aD) restrict \ed to ASCII in UCP mode + (?aS) restrict \es to ASCII in UCP mode + (?aW) restrict \ew to ASCII in UCP mode + (?aP) restrict all POSIX classes to ASCII in UCP mode + (?aT) restrict POSIX digit classes to ASCII in UCP mode + (?i) caseless + (?J) allow duplicate named groups + (?m) multiline + (?n) no auto capture + (?r) restrict caseless to either ASCII or non-ASCII + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) ignore white space except in classes or \eQ...\eE + (?xx) as (?x) but also ignore space and tab in classes + (?-...) unset the given option(s) + (?^) unset imnrsx options +.sp +(?aP) implies (?aT) as well, though this has no additional effect. However, it +means that (?-aP) is really (?-PT) which disables all ASCII restrictions for +POSIX classes. +.P +Unsetting x or xx unsets both. Several options may be set at once, and a +mixture of setting and unsetting such as (?i-x) is allowed, but there may be +only one hyphen. Setting (but no unsetting) is allowed after (?^ for example +(?^in). An option setting may appear at the start of a non-capture group, for +example (?i:...). +.P +The following are recognized only at the very start of a pattern or after one +of the newline or \eR options with similar syntax. More than one of them may +appear. For the first three, d is a decimal number. +.sp + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \ed etc) +.sp +Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of +the limits set by the caller of \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP, +not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The +application can lock out the use of (*UTF) and (*UCP) by setting the +PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time. +. +. +.SH "NEWLINE CONVENTION" +.rs +.sp +These are recognized only at the very start of the pattern or after option +settings with a similar syntax. +.sp + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence + (*NUL) the NUL character (binary zero) +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +These are recognized only at the very start of the pattern or after option +setting with a similar syntax. +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence +. +. +.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS" +.rs +.sp + (?=...) ) + (*pla:...) ) positive lookahead + (*positive_lookahead:...) ) +.sp + (?!...) ) + (*nla:...) ) negative lookahead + (*negative_lookahead:...) ) +.sp + (?<=...) ) + (*plb:...) ) positive lookbehind + (*positive_lookbehind:...) ) +.sp + (? reference by name (Perl) + \ek'name' reference by name (Perl) + \eg{name} reference by name (Perl) + \ek{name} reference by name (.NET) + (?P=name) reference by name (Python) +. +. +.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)" +.rs +.sp + (?R) recurse whole pattern + (?n) call subroutine by absolute number + (?+n) call subroutine by relative number + (?-n) call subroutine by relative number + (?&name) call subroutine by name (Perl) + (?P>name) call subroutine by name (Python) + \eg call subroutine by name (Oniguruma) + \eg'name' call subroutine by name (Oniguruma) + \eg call subroutine by absolute number (Oniguruma) + \eg'n' call subroutine by absolute number (Oniguruma) + \eg<+n> call subroutine by relative number (PCRE2 extension) + \eg'+n' call subroutine by relative number (PCRE2 extension) + \eg<-n> call subroutine by relative number (PCRE2 extension) + \eg'-n' call subroutine by relative number (PCRE2 extension) +. +. +.SH "CONDITIONAL PATTERNS" +.rs +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp + (?(n) absolute reference condition + (?(+n) relative reference condition (PCRE2 extension) + (?(-n) relative reference condition (PCRE2 extension) + (?() named reference condition (Perl) + (?('name') named reference condition (Perl) + (?(name) named reference condition (PCRE2, deprecated) + (?(R) overall recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition + (?(DEFINE) define groups for reference + (?(VERSION[>]=n.m) test PCRE2 version + (?(assert) assertion condition +.sp +Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists. +. +. +.SH "BACKTRACKING CONTROL" +.rs +.sp +All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the +name is mandatory, for the others it is optional. (*SKIP) changes its behaviour +if :NAME is present. The others just set a name for passing back to the caller, +but this is not a name that (*SKIP) can see. The following act immediately they +are reached: +.sp + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) +.sp +The following act only when a subsequent match failure causes a backtrack to +reach them. They all force a match failure, but they differ in what happens +afterwards. Those that advance the start-of-match point do so only if the +pattern is not anchored. +.sp + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation +.sp +The effect of one of these verbs in a group called as a subroutine is confined +to the subroutine call. +. +. +.SH "CALLOUTS" +.rs +.sp + (?C) callout (assumed number 0) + (?Cn) callout with numerical data n + (?C"text") callout with string data +.sp +The allowed string delimiters are ` ' " ^ % # $ (which are the same for the +start and the end), and the starting delimiter { matched with the ending +delimiter }. To encode the ending delimiter within the string, double it. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2pattern\fP(3), \fBpcre2api\fP(3), \fBpcre2callout\fP(3), +\fBpcre2matching\fP(3), \fBpcre2\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 October 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 new file mode 100644 index 0000000..c7df418 --- /dev/null +++ b/doc/pcre2test.1 @@ -0,0 +1,2186 @@ +.TH PCRE2TEST 1 "24 April 2024" "PCRE 10.44" +.SH NAME +pcre2test - a program for testing Perl-compatible regular expressions. +.SH SYNOPSIS +.rs +.sp +.B pcre2test "[options] [input file [output file]]" +.sp +\fBpcre2test\fP is a test program for the PCRE2 regular expression libraries, +but it can also be used for experimenting with regular expressions. This +document describes the features of the test program; for details of the regular +expressions themselves, see the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. For details of the PCRE2 library function calls and their +options, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The input for \fBpcre2test\fP is a sequence of regular expression patterns and +subject strings to be matched. There are also command lines for setting +defaults and controlling some special actions. The output shows the result of +each match attempt. Modifiers on external or internal command lines, the +patterns, and the subject lines specify PCRE2 function options, control how the +subject is processed, and what output is produced. +.P +There are many obscure modifiers, some of which are specifically designed for +use in conjunction with the test script and data files that are distributed as +part of PCRE2. All the modifiers are documented here, some without much +justification, but many of them are unlikely to be of use except when testing +the libraries. +. +. +.SH "PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES" +.rs +.sp +Different versions of the PCRE2 library can be built to support character +strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or +all three of these libraries may be simultaneously installed. The +\fBpcre2test\fP program can be used to test all the libraries. However, its own +input and output are always in 8-bit format. When testing the 16-bit or 32-bit +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output. +.P +In the rest of this document, the names of library functions and structures +are given in generic form, for example, \fBpcre2_compile()\fP. The actual +names used in the libraries have a suffix _8, _16, or _32, as appropriate. +. +. +.\" HTML +.SH "INPUT ENCODING" +.rs +.sp +Input to \fBpcre2test\fP is processed line by line, either by calling the C +library's \fBfgets()\fP function, or via the \fBlibreadline\fP or \fBlibedit\fP +library. In some Windows environments character 26 (hex 1A) causes an immediate +end of file, and no further data is read, so this character should be avoided +unless you really want that action. +.P +The input is processed using C's string functions, so must not contain binary +zeros, even though in Unix-like environments, \fBfgets()\fP treats any bytes +other than newline as data characters. An error is generated if a binary zero +is encountered. By default subject lines are processed for backslash escapes, +which makes it possible to include any data value in strings that are passed to +the library for matching. For patterns, there is a facility for specifying some +or all of the 8-bit input characters as hexadecimal pairs, which makes it +possible to include binary zeros. +. +. +.SS "Input for the 16-bit and 32-bit libraries" +.rs +.sp +When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines, backslash escapes can be used. In addition, +when the \fButf\fP modifier (see +.\" HTML +.\" +"Setting compilation options" +.\" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +.P +For non-UTF testing of wide characters, the \fButf8_input\fP modifier can be +used. This is mutually exclusive with \fButf\fP, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +.P +UTF-8 (in its original definition) is not capable of encoding values greater +than 0x7fffffff, but such values can be handled by the 32-bit library. When +testing this library in non-UTF mode with \fButf8_input\fP set, if any +character is preceded by the byte 0xff (which is an invalid byte in UTF-8) +0x80000000 is added to the character's value. This is the only way of passing +such code points in a pattern string. For subject strings, using an escape +sequence is preferable. +. +. +.SH "COMMAND LINE OPTIONS" +.rs +.TP 10 +\fB-8\fP +If the 8-bit library has been built, this option causes it to be used (this is +the default). If the 8-bit library has not been built, this option causes an +error. +.TP 10 +\fB-16\fP +If the 16-bit library has been built, this option causes it to be used. If the +8-bit library has not been built, this is the default. If the 16-bit library +has not been built, this option causes an error. +.TP 10 +\fB-32\fP +If the 32-bit library has been built, this option causes it to be used. If no +other library has been built, this is the default. If the 32-bit library has +not been built, this option causes an error. +.TP 10 +\fB-ac\fP +Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert +automatic callouts into every pattern that is compiled. +.TP 10 +\fB-AC\fP +As for \fB-ac\fP, but in addition behave as if each subject line has the +\fBcallout_extra\fP modifier, that is, show additional information from +callouts. +.TP 10 +\fB-b\fP +Behave as if each pattern has the \fBfullbincode\fP modifier; the full +internal binary form of the pattern is output after compilation. +.TP 10 +\fB-C\fP +Output the version number of the PCRE2 library, and all available information +about the optional features that are included, and then exit with zero exit +code. All other options are ignored. If both -C and -LM are present, whichever +is first is recognized. +.TP 10 +\fB-C\fP \fIoption\fP +Output information about a specific build-time option, then exit. This +functionality is intended for use in scripts such as \fBRunTest\fP. The +following options output the value and set the exit code as indicated: +.sp + ebcdic-nl the code for LF (= NL) in an EBCDIC environment: + 0x15 or 0x25 + 0 if used in an ASCII environment + exit code is always 0 + linksize the configured internal link size (2, 3, or 4) + exit code is set to the link size + newline the default newline setting: + CR, LF, CRLF, ANYCRLF, ANY, or NUL + exit code is always 0 + bsr the default setting for what \eR matches: + ANYCRLF or ANY + exit code is always 0 +.sp +The following options output 1 for true or 0 for false, and set the exit code +to the same value: +.sp + backslash-C \eC is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available +.sp +If an unknown option is given, an error message is output; the exit code is 0. +.TP 10 +\fB-d\fP +Behave as if each pattern has the \fBdebug\fP modifier; the internal +form and information about the compiled pattern is output after compilation; +\fB-d\fP is equivalent to \fB-b -i\fP. +.TP 10 +\fB-dfa\fP +Behave as if each subject line has the \fBdfa\fP modifier; matching is done +using the \fBpcre2_dfa_match()\fP function instead of the default +\fBpcre2_match()\fP. +.TP 10 +\fB-error\fP \fInumber[,number,...]\fP +Call \fBpcre2_get_error_message()\fP for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +.TP 10 +\fB-help\fP +Output a brief summary these options and then exit. +.TP 10 +\fB-i\fP +Behave as if each pattern has the \fBinfo\fP modifier; information about the +compiled pattern is given after compilation. +.TP 10 +\fB-jit\fP +Behave as if each pattern line has the \fBjit\fP modifier; after successful +compilation, each pattern is passed to the just-in-time compiler, if available. +.TP 10 +\fB-jitfast\fP +Behave as if each pattern line has the \fBjitfast\fP modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and each subject line is passed directly to the JIT matcher via its +"fast path". +.TP 10 +\fB-jitverify\fP +Behave as if each pattern line has the \fBjitverify\fP modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and the use of JIT for matching is verified. +.TP 10 +\fB-LM\fP +List modifiers: write a list of available pattern and subject modifiers to the +standard output, then exit with zero exit code. All other options are ignored. +If both -C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LP\fP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LS\fP +List scripts: write a list of recognized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-pattern\fP \fImodifier-list\fP +Behave as if each pattern line contains the given modifiers. +.TP 10 +\fB-q\fP +Do not output the version number of \fBpcre2test\fP at the start of execution. +.TP 10 +\fB-S\fP \fIsize\fP +On Unix-like systems, set the size of the run-time stack to \fIsize\fP +mebibytes (units of 1024*1024 bytes). +.TP 10 +\fB-subject\fP \fImodifier-list\fP +Behave as if each subject line contains the given modifiers. +.TP 10 +\fB-t\fP +Run each compile and match many times with a timer, and output the resulting +times per compile or match. When JIT is used, separate times are given for the +initial compile and the JIT compile. You can control the number of iterations +that are used for timing by following \fB-t\fP with a number (as a separate +item on the command line). For example, "-t 1000" iterates 1000 times. The +default is to iterate 500,000 times. +.TP 10 +\fB-tm\fP +This is like \fB-t\fP except that it times only the matching phase, not the +compile phase. +.TP 10 +\fB-T\fP \fB-TM\fP +These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run, +the total times for all compiles and matches are output. +.TP 10 +\fB-version\fP +Output the PCRE2 version number and then exit. +. +. +.SH "DESCRIPTION" +.rs +.sp +If \fBpcre2test\fP is given two filename arguments, it reads from the first and +writes to the second. If the first name is "-", input is taken from the +standard input. If \fBpcre2test\fP is given only one argument, it reads from +that file and writes to stdout. Otherwise, it reads from stdin and writes to +stdout. +.P +When \fBpcre2test\fP is built, a configuration option can specify that it +should be linked with the \fBlibreadline\fP or \fBlibedit\fP library. When this +is done, if the input is from a terminal, it is read using the \fBreadline()\fP +function. This provides line-editing and history facilities. The output from +the \fB-help\fP option states whether or not \fBreadline()\fP will be used. +.P +The program handles any number of tests, each of which consists of a set of +input lines. Each set starts with a regular expression pattern, followed by any +number of subject lines to be matched against that pattern. In between sets of +test data, command lines that begin with # may appear. This file format, with +some restrictions, can also be processed by the \fBperltest.sh\fP script that +is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 +and Perl is the same. For a specification of \fBperltest.sh\fP, see the +comments near its beginning. See also the #perltest command below. +.P +When the input is a terminal, \fBpcre2test\fP prompts for each line of input, +using "re>" to prompt for regular expression patterns, and "data>" to prompt +for subject lines. Command lines starting with # can be entered only in +response to the "re>" prompt. +.P +Each subject line is matched separately and independently. If you want to do +multi-line matches, you have to use the \en escape sequence (or \er or \er\en, +etc., depending on the newline setting) in a single line of input to encode the +newline sequences. There is no limit on the length of subject lines; the input +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly. +.P +An empty line or the end of the file signals the end of the subject lines for a +test, at which point a new pattern or command line is expected if there is +still input to be read. +. +. +.SH "COMMAND LINES" +.rs +.sp +In between sets of test data, a line that begins with # is interpreted as a +command line. If the first character is followed by white space or an +exclamation mark, the line is treated as a comment, and ignored. Otherwise, the +following commands are recognized: +.sp + #forbid_utf +.sp +Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP +options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and +the use of (*UTF) and (*UCP) at the start of patterns. This command also forces +an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX, +which are still supported when PCRE2_UTF is not set, but which require Unicode +property support to be included in the library. +.P +This is a trigger guard that is used in test files to ensure that UTF or +Unicode property tests are not accidentally added to files that are used when +Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and +PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP; +the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic +options are not displayed in pattern information, to avoid cluttering up test +output. +.sp + #load +.sp +This command is used to load a set of precompiled patterns from a file, as +described in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #loadtables +.sp +This command is used to load a set of binary character tables that can be +accessed by the tables=3 qualifier. Such tables can be created by the +\fBpcre2_dftables\fP program with the -b option. +.sp + #newline_default [] +.sp +When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +.P +The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, +ANY, or NUL (in upper or lower case), for example: +.sp + #newline_default LF Any anyCRLF +.sp +If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a \fBnewline\fP modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a \fBnewline\fP modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +.P +When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the \fBposix\fP or \fBposix_nosub\fP +modifier is used when \fB#newline_default\fP would set a default for the +non-POSIX API. +.sp + #pattern +.sp +This command sets a default modifier list that applies to all subsequent +patterns. Modifiers on a pattern can change these settings. +.sp + #perltest +.sp +This line is used in test files that can also be processed by \fBperltest.sh\fP +to confirm that Perl gives the same results as PCRE2. Subsequent tests are +checked for the use of \fBpcre2test\fP features that are incompatible with the +\fBperltest.sh\fP script. +.P +Patterns must use '/' as their delimiter, and only certain modifiers are +supported. Comment lines, #pattern commands, and #subject commands that set or +unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and +#newline_default commands, which are needed in the relevant pcre2test files, +are silently ignored. All other command lines are ignored, but give a warning +message. The \fB#perltest\fP command helps detect tests that are accidentally +put in the wrong file or use the wrong delimiter. For more details of the +\fBperltest.sh\fP script see the comments it contains. +.sp + #pop [] + #popcopy [] +.sp +These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #save +.sp +This command is used to save a set of compiled patterns to a file, as described +in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #subject +.sp +This command sets a default modifier list that applies to all subsequent +subject lines. Modifiers on a subject line can change these settings. +. +. +.SH "MODIFIER SYNTAX" +.rs +.sp +Modifier lists are used with both pattern and subject lines. Items in a list +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting. +.P +A few of the more common modifiers can also be specified as single letters, for +example "i" for "caseless". In documentation, following the Perl convention, +these are written with a slash ("the /i modifier") for clarity. Abbreviated +modifiers must all be concatenated in the first item of a modifier list. If the +first item is not recognized as a long modifier name, it is interpreted as a +sequence of these abbreviations. For example: +.sp + /abc/ig,newline=cr,jit=3 +.sp +This is a pattern line whose modifier list starts with two one-letter modifiers +(/i and /g). The lower-case abbreviated modifiers are the same as used in Perl. +. +. +.SH "PATTERN SYNTAX" +.rs +.sp +A pattern line must start with one of the following characters (common symbols, +excluding pattern meta-characters): +.sp + / ! " ' ` - = _ : ; , % & @ ~ +.sp +This is interpreted as the pattern's delimiter. A regular expression may be +continued over several input lines, in which case the newline characters are +included within it. It is possible to include the delimiter as a literal within +the pattern by escaping it with a backslash, for example +.sp + /abc\e/def/ +.sp +If you do this, the escape and the delimiter form part of the pattern, but +since the delimiters are all non-alphanumeric, the inclusion of the backslash +does not affect the pattern's interpretation. Note, however, that this trick +does not work within \eQ...\eE literal bracketing because the backslash will +itself be interpreted as a literal. If the terminating delimiter is immediately +followed by a backslash, for example, +.sp + /abc/\e +.sp +a backslash is added to the end of the pattern. This is done to provide a way +of testing the error condition that arises if a pattern finishes with a +backslash, because +.sp + /abc\e/ +.sp +is interpreted as the first line of a pattern that starts with "abc/", causing +pcre2test to read the next line as a continuation of the regular expression. +.P +A pattern can be followed by a modifier list (details below). +. +. +.SH "SUBJECT LINE SYNTAX" +.rs +.sp +Before each subject line is passed to \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP, leading and trailing white +space is removed, and the line is scanned for backslash escapes, unless the +\fBsubject_literal\fP modifier was set for the pattern. The following provide a +means of encoding non-printing characters in a visible way: +.sp + \ea alarm (BEL, \ex07) + \eb backspace (\ex08) + \ee escape (\ex27) + \ef form feed (\ex0c) + \en newline (\ex0a) + \er carriage return (\ex0d) + \et tab (\ex09) + \ev vertical tab (\ex0b) + \ennn octal character (up to 3 octal digits); always + a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode + \eo{dd...} octal character (any number of octal digits} + \exhh hexadecimal byte (up to 2 hex digits) + \ex{hh...} hexadecimal character (any number of hex digits) +.sp +The use of \ex{hh...} is not dependent on the use of the \fButf\fP modifier on +the pattern. It is recognized always. There may be any number of hexadecimal +digits inside the braces; invalid values provoke error messages. +.P +Note that \exhh specifies one byte rather than one character in UTF-8 mode; +this makes it possible to construct invalid UTF-8 sequences for testing +purposes. On the other hand, \ex{hh} is interpreted as a UTF-8 character in +UTF-8 mode, generating more than one byte if the value is greater than 127. +When testing the 8-bit library not in UTF-8 mode, \ex{hh} generates one byte +for values less than 256, and causes an error for greater values. +.P +In UTF-16 mode, all 4-digit \ex{hhhh} values are accepted. This makes it +possible to construct invalid UTF-16 sequences for testing purposes. +.P +In UTF-32 mode, all 4- to 8-digit \ex{...} values are accepted. This makes it +possible to construct invalid UTF-32 sequences for testing purposes. +.P +There is a special backslash sequence that specifies replication of one or more +characters: +.sp + \e[]{} +.sp +This makes it possible to test long strings without having to provide them as +part of the file. For example: +.sp + \e[abc]{4} +.sp +is converted to "abcabcabcabc". This feature does not support nesting. To +include a closing square bracket in the characters, code it as \ex5D. +.P +A backslash followed by an equals sign marks the end of the subject string and +the start of a modifier list. For example: +.sp + abc\e=notbol,notempty +.sp +If the subject string is empty and \e= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +.sp + \e= This is a comment. + abc\e= This is an invalid modifier list. +.sp +A backslash followed by any other non-alphanumeric character just escapes that +character. A backslash followed by anything else causes an error. However, if +the very last character in the line is a backslash (and there is no modifier +list), it is ignored. This gives a way of passing an empty line as data, since +a real empty line terminates the data input. +.P +If the \fBsubject_literal\fP modifier is set for a pattern, all subject lines +that follow are treated as literals, with no special treatment of backslashes. +No replication is possible, and any subject modifiers must be set as defaults +by a \fB#subject\fP command. +. +. +.SH "PATTERN MODIFIERS" +.rs +.sp +There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in \fB#pattern\fP commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous \fB#pattern\fP command. +. +. +.\" HTML +.SS "Setting compilation options" +.rs +.sp +The following modifiers set options for \fBpcre2_compile()\fP. Most of them set +bits in the options argument of that function, but those whose names start with +PCRE2_EXTRA are additional options that are set in the compile context. +Some of these options have single-letter abbreviations. There is special +handling for /x: if a second x is present, PCRE2_EXTENDED is converted into +PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, +though this makes no difference to the way \fBpcre2_compile()\fP behaves. See +.\" HREF +\fBpcre2api\fP +.\" +for a description of the effects of these options. +.sp + allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS + allow_lookaround_bsk set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + alt_bsux set PCRE2_ALT_BSUX + alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_verbnames set PCRE2_ALT_VERBNAMES + anchored set PCRE2_ANCHORED + /a ascii_all set all ASCII options + ascii_bsd set PCRE2_EXTRA_ASCII_BSD + ascii_bss set PCRE2_EXTRA_ASCII_BSS + ascii_bsw set PCRE2_EXTRA_ASCII_BSW + ascii_digit set PCRE2_EXTRA_ASCII_DIGIT + ascii_posix set PCRE2_EXTRA_ASCII_POSIX + auto_callout set PCRE2_AUTO_CALLOUT + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + /i caseless set PCRE2_CASELESS + /r caseless_restrict set PCRE2_EXTRA_CASELESS_RESTRICT + dollar_endonly set PCRE2_DOLLAR_ENDONLY + /s dotall set PCRE2_DOTALL + dupnames set PCRE2_DUPNAMES + endanchored set PCRE2_ENDANCHORED + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + /x extended set PCRE2_EXTENDED + /xx extended_more set PCRE2_EXTENDED_MORE + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + firstline set PCRE2_FIRSTLINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE + match_invalid_utf set PCRE2_MATCH_INVALID_UTF + match_unset_backref set PCRE2_MATCH_UNSET_BACKREF + match_word set PCRE2_EXTRA_MATCH_WORD + /m multiline set PCRE2_MULTILINE + never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_ucp set PCRE2_NEVER_UCP + never_utf set PCRE2_NEVER_UTF + /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE + no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR + no_start_optimize set PCRE2_NO_START_OPTIMIZE + no_utf_check set PCRE2_NO_UTF_CHECK + ucp set PCRE2_UCP + ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT + utf set PCRE2_UTF +.sp +As well as turning on the PCRE2_UTF option, the \fButf\fP modifier causes all +non-printing characters in output strings to be printed using the \ex{hh...} +notation. Otherwise, those less than 0x100 are output in hex without the curly +brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions. +. +. +.\" HTML +.SS "Setting compilation controls" +.rs +.sp +The following modifiers affect the compilation process or request information +about the pattern. There are single-letter abbreviations for some that are +heavily used in the test files. +.sp + bsr=[anycrlf|unicode] specify \eR handling + /B bincode show binary code without lengths + callout_info show callout information + convert= request foreign pattern conversion + convert_glob_escape=c set glob escape character + convert_glob_separator=c set glob separator character + convert_length set convert buffer length + debug same as info,fullbincode + framesize show matching frame size + fullbincode show binary code with lengths + /I info show info about compiled pattern + hex unquoted characters are hexadecimal + jit[=] use JIT + jitfast use JIT fast path + jitverify verify JIT use + locale= use this locale + max_pattern_compiled ) set maximum compiled pattern + _length= ) length (bytes) + max_pattern_length= set maximum pattern length (code units) + max_varlookbehind= set maximum variable lookbehind length + memory show memory used + newline= set newline type + null_context compile with a NULL context + null_pattern pass pattern as NULL + parens_nest_limit= set maximum parentheses depth + posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB + push push compiled pattern onto the stack + pushcopy push a copy onto the stack + stackguard= test the stackguard feature + subject_literal treat all subject lines as literal + tables=[0|1|2|3] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 +.sp +The effects of these modifiers are described in the following sections. +. +. +.SS "Newline and \eR handling" +.rs +.sp +The \fBbsr\fP modifier specifies what \eR in a pattern should match. If it is +set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode", +\eR matches any Unicode newline sequence. The default can be specified when +PCRE2 is built; if it is not, the default is set to Unicode. +.P +The \fBnewline\fP modifier specifies which characters are to be interpreted as +newlines, both in the pattern and in subject lines. The type must be one of CR, +LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). +. +. +.SS "Information about a pattern" +.rs +.sp +The \fBdebug\fP modifier is a shorthand for \fBinfo,fullbincode\fP, requesting +all available information. +.P +The \fBbincode\fP modifier causes a representation of the compiled code to be +output after compilation. This information does not contain length and offset +values, which ensures that the same output is generated for different internal +link sizes and different code unit widths. By using \fBbincode\fP, the same +regression tests can be used in different environments. +.P +The \fBfullbincode\fP modifier, by contrast, \fIdoes\fP include length and +offset values. This is used in a few special tests that run only for specific +code unit widths and link sizes, and is also useful for one-off tests. +.P +The \fBinfo\fP modifier requests information about the compiled pattern +(whether it is anchored, has a fixed first character, and so on). The +information is obtained from the \fBpcre2_pattern_info()\fP function. Here are +some typical examples: +.sp + re> /(?i)(^a|^b)/m,info + Capture group count = 1 + Compile options: multiline + Overall options: caseless multiline + First code unit at start or follows newline + Subject length lower bound = 1 +.sp + re> /(?i)abc/info + Capture group count = 0 + Compile options: + Overall options: caseless + First code unit = 'a' (caseless) + Last code unit = 'c' (caseless) + Subject length lower bound = 3 +.sp +"Compile options" are those specified by modifiers; "overall options" have +added options that are taken or deduced from the pattern. If both sets of +options are the same, just a single "options" line is output; if there are no +options, the line is omitted. "First code unit" is where any match must start; +if there is more than one they are listed as "starting code units". "Last code +unit" is the last literal code unit that must be present in any match. This is +not necessarily the last character. These lines are omitted if no starting or +ending code units are recorded. The subject length line is omitted when +\fBno_start_optimize\fP is set because the minimum length is not calculated +when it can never be used. +.P +The \fBframesize\fP modifier shows the size, in bytes, of each storage frame +used by \fBpcre2_match()\fP for handling backtracking. The size depends on the +number of capturing parentheses in the pattern. A vector of these frames is +used at matching time; its overall size is shown when the \fBheaframes_size\fP +subject modifier is set. +.P +The \fBcallout_info\fP modifier requests information about all the callouts in +the pattern. A list of them is output at the end of any other information that +is requested. For each callout, either its number or string is given, followed +by the item that follows it in the pattern. +. +. +.SS "Passing a NULL context" +.rs +.sp +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_compile()\fP. If +the \fBnull_context\fP modifier is set, however, NULL is passed. This is for +testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses +default values). +. +. +.SS "Passing a NULL pattern" +.rs +.sp +The \fBnull_pattern\fP modifier is for testing the behaviour of +\fBpcre2_compile()\fP when the pattern argument is NULL. The length value +passed is the default PCRE2_ZERO_TERMINATED unless \fBuse_length\fP is set. +Any length other than zero causes an error. +. +. +.SS "Specifying pattern characters in hexadecimal" +.rs +.sp +The \fBhex\fP modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters: +.sp + /ab 32 59/hex +.sp +Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +.sp + /ab "literal" 32/hex +.sp +Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are +mutually exclusive. +. +. +.SS "Specifying the pattern's length" +.rs +.sp +By default, patterns are passed to the compiling functions as zero-terminated +strings but can be passed by length instead of being zero-terminated. The +\fBuse_length\fP modifier causes this to happen. Using a length happens +automatically (whether or not \fBuse_length\fP is set) when \fBhex\fP is set, +because patterns specified in hexadecimal may contain binary zeros. +.P +If \fBhex\fP or \fBuse_length\fP is used with the POSIX wrapper API (see +.\" HTML +.\" +"Using the POSIX wrapper API" +.\" +below), the REG_PEND extension is used to pass the pattern's length. +. +. +.SS "Specifying a maximum for variable lookbehinds" +.rs +.sp +Variable lookbehind assertions are supported only if, for each one, there is a +maximum length (in characters) that it can match. There is a limit on this, +whose default can be set at build time, with an ultimate default of 255. The +\fBmax_varlookbehind\fP modifier uses the \fBpcre2_set_max_varlookbehind()\fP +function to change the limit. Lookbehinds whose branches each match a fixed +length are limited to 65535 characters per branch. +. +. +.SS "Specifying wide characters in 16-bit and 32-bit modes" +.rs +.sp +In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the \fButf\fP modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the \fButf8_input\fP modifier +can be used. It is mutually exclusive with \fButf\fP. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +.\" HTML +.\" +"Input encoding" +.\" +above. +. +. +.SS "Generating long repetitive patterns" +.rs +.sp +Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +\fBexpand\fP modifier is present on a pattern, parts of the pattern that have +the form +.sp + \e[]{} +.sp +are expanded before the pattern is passed to \fBpcre2_compile()\fP. For +example, \e[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\e[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The \fBexpand\fP and \fBhex\fP modifiers are +mutually exclusive. +.P +If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \e[AB]{6000,6000} is not recognized as an +expansion item. +.P +If the \fBinfo\fP modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output. +. +. +.SS "JIT compilation" +.rs +.sp +Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the \fBpartial\fP +modifier in "Subject Modifiers" +.\" HTML +.\" +below +.\" +for details of how these options are specified for each match attempt. +.P +JIT compilation is requested by the \fBjit\fP pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +.sp + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching +.sp +The possible values for the \fBjit\fP modifier are therefore: +.sp + 0 disable JIT + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only + 7 all three modes +.sp +If no number is given, 7 is assumed. The phrase "partial matching" means a call +to \fBpcre2_match()\fP with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, jit=2) but do not set the \fBpartial\fP modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +.P +If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. See also the \fBjitstack\fP modifier below for a way of +setting the size of the JIT stack. +.P +If the \fBjitfast\fP modifier is specified, matching is done using the JIT +"fast path" interface, \fBpcre2_jit_match()\fP, which skips some of the sanity +checks that are done by \fBpcre2_match()\fP, and of course does not work when +JIT is not supported. If \fBjitfast\fP is specified without \fBjit\fP, jit=7 is +assumed. +.P +If the \fBjitverify\fP modifier is specified, information about the compiled +pattern shows whether JIT compilation was or was not successful. If +\fBjitverify\fP is specified without \fBjit\fP, jit=7 is assumed. If JIT +compilation is successful when \fBjitverify\fP is set, the text "(JIT)" is +added to the first output line after a match or non match when JIT-compiled +code was actually used in the match. +. +. +.SS "Setting a locale" +.rs +.sp +The \fBlocale\fP modifier must specify the name of a locale, for example: +.sp + /pattern/locale=fr_FR +.sp +The given locale is set, \fBpcre2_maketables()\fP is called to build a set of +character tables for the locale, and this is then passed to +\fBpcre2_compile()\fP when compiling the regular expression. The same tables +are used when matching the following subject lines. The \fBlocale\fP modifier +applies only to the pattern on which it appears, but can be given in a +\fB#pattern\fP command if a default is needed. Setting a locale and alternate +character tables are mutually exclusive. +. +. +.SS "Showing pattern memory" +.rs +.sp +The \fBmemory\fP modifier causes the size in bytes of the memory used to hold +the compiled pattern to be output. This does not include the size of the +\fBpcre2_code\fP block; it is just the actual compiled data. If the pattern is +subsequently passed to the JIT compiler, the size of the JIT compiled code is +also output. Here is an example: +.sp + re> /a(b)c/jit,memory + Memory allocation (code space): 21 + Memory allocation (JIT code): 1910 +.sp +. +. +.SS "Limiting nested parentheses" +.rs +.sp +The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested +parentheses in a pattern. Breaching the limit causes a compilation error. +The default for the library is set when PCRE2 is built, but \fBpcre2test\fP +sets its own default of 220, which is required for running the standard test +suite. +. +. +.SS "Limiting the pattern length" +.rs +.sp +The \fBmax_pattern_length\fP modifier sets a limit, in code units, to the +length of pattern that \fBpcre2_compile()\fP will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +. +. +.SS "Limiting the size of a compiled pattern" +.rs +.sp +The \fBmax_pattern_compiled_length\fP modifier sets a limit, in bytes, to the +amount of memory used by a compiled pattern. Breaching the limit causes a +compilation error. The default is the largest number a PCRE2_SIZE variable can +hold (essentially unlimited). +. +. +.\" HTML +.SS "Using the POSIX wrapper API" +.rs +.sp +The \fBposix\fP and \fBposix_nosub\fP modifiers cause \fBpcre2test\fP to call +PCRE2 via the POSIX wrapper API rather than its native API. When +\fBposix_nosub\fP is used, the POSIX option REG_NOSUB is passed to +\fBregcomp()\fP. The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. The following pattern modifiers set options for the +\fBregcomp()\fP function: +.sp + caseless REG_ICASE + multiline REG_NEWLINE + dotall REG_DOTALL ) + ungreedy REG_UNGREEDY ) These options are not part of + ucp REG_UCP ) the POSIX standard + utf REG_UTF8 ) +.sp +The \fBregerror_buffsize\fP modifier specifies a size for the error buffer that +is passed to \fBregerror()\fP in the event of a compilation error. For example: +.sp + /abc/posix,regerror_buffsize=20 +.sp +This provides a means of testing the behaviour of \fBregerror()\fP when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +.P +The \fBaftertext\fP and \fBallaftertext\fP subject modifiers work as described +below. All other modifiers are either ignored, with a warning message, or cause +an error. +.P +The pattern is passed to \fBregcomp()\fP as a zero-terminated string by +default, but if the \fBuse_length\fP or \fBhex\fP modifiers are set, the +REG_PEND extension is used to pass it by length. +. +. +.SS "Testing the stack guard feature" +.rs +.sp +The \fBstackguard\fP modifier is used to test the use of +\fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to +enable stack availability to be checked during compilation (see the +.\" HREF +\fBpcre2api\fP +.\" +documentation for details). If the number specified by the modifier is greater +than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up +callback from \fBpcre2_compile()\fP to a local function. The argument it +receives is the current nesting parenthesis depth; if this is greater than the +value given by the modifier, non-zero is returned, causing the compilation to +be aborted. +. +. +.SS "Using alternative character tables" +.rs +.sp +The value specified for the \fBtables\fP modifier must be one of the digits 0, +1, 2, or 3. It causes a specific set of built-in character tables to be passed +to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour +with different character tables. The digit specifies the tables as follows: +.sp + 0 do not pass any special character tables + 1 the default ASCII tables, as distributed in + pcre2_chartables.c.dist + 2 a set of tables defining ISO 8859 characters + 3 a set of tables loaded by the #loadtables command +.sp +In tables 2, some characters whose codes are greater than 128 are identified as +letters, digits, spaces, etc. Tables 3 can be used only after a +\fB#loadtables\fP command has loaded them from a binary file. Setting alternate +character tables and a locale are mutually exclusive. +. +. +.SS "Setting certain match controls" +.rs +.sp +The following modifiers are really subject modifiers, and are described under +"Subject Modifiers" below. However, they may be included in a pattern's +modifier list, in which case they are applied to every subject line that is +processed with that pattern. These modifiers do not affect the compilation +process. +.sp + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text + altglobal alternative global matching + /g global global matching + heapframes_size show match data heapframes size + jitstack= set size of JIT stack + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_callout use substitution callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution + substitute_stop= skip substitution and following + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY +.sp +These modifiers may not appear in a \fB#pattern\fP command. If you want them as +defaults, set them in a \fB#subject\fP command. +. +. +.SS "Specifying literal subject lines" +.rs +.sp +If the \fBsubject_literal\fP modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any +that are set as defaults by a \fB#subject\fP command are recognized. +. +. +.SS "Saving a compiled pattern" +.rs +.sp +When a pattern with the \fBpush\fP modifier is successfully compiled, it is +pushed onto a stack of compiled patterns, and \fBpcre2test\fP expects the next +line to contain a new pattern (or a command) instead of a subject line. This +facility is used when saving compiled patterns to a file, as described in the +section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +If \fBpushcopy\fP is used instead of \fBpush\fP, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +\fBpcre2_code_copy()\fP function. +.\" +The \fBpush\fP and \fBpushcopy \fP modifiers are incompatible with compilation +modifiers such as \fBglobal\fP that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +\fBreplace\fP, which causes an error. Note that \fBjitverify\fP, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. +. +. +.SS "Testing foreign pattern conversion" +.rs +.sp +The experimental foreign pattern conversion functions in PCRE2 can be tested by +setting the \fBconvert\fP modifier. Its argument is a colon-separated list of +options, which set the equivalent option for the \fBpcre2_pattern_convert()\fP +function: +.sp + glob PCRE2_CONVERT_GLOB + glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR + glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + posix_basic PCRE2_CONVERT_POSIX_BASIC + posix_extended PCRE2_CONVERT_POSIX_EXTENDED + unset Unset all options +.sp +The "unset" value is useful for turning off a default that has been set by a +\fB#pattern\fP command. When one of these options is set, the input pattern is +passed to \fBpcre2_pattern_convert()\fP. If the conversion is successful, the +result is reflected in the output and then passed to \fBpcre2_compile()\fP. The +normal \fButf\fP and \fBno_utf_check\fP options, if set, cause the +PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to +\fBpcre2_pattern_convert()\fP. +.P +By default, the conversion function is allowed to allocate a buffer for its +output. However, if the \fBconvert_length\fP modifier is set to a value greater +than zero, \fBpcre2test\fP passes a buffer of the given length. This makes it +possible to test the length check. +.P +The \fBconvert_glob_escape\fP and \fBconvert_glob_separator\fP modifiers can be +used to specify the escape and separator characters for glob processing, +overriding the defaults, which are operating-system dependent. +. +. +.\" HTML +.SH "SUBJECT MODIFIERS" +.rs +.sp +The modifiers that can appear in subject lines and the \fB#subject\fP +command are of two types. +. +. +.SS "Setting match options" +.rs +.sp +The following modifiers set options for \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP. See +.\" HREF +\fBpcreapi\fP +.\" +for a description of their effects. +.sp + anchored set PCRE2_ANCHORED + endanchored set PCRE2_ENDANCHORED + dfa_restart set PCRE2_DFA_RESTART + dfa_shortest set PCRE2_DFA_SHORTEST + disable_recurseloop_check set PCRE2_DISABLE_RECURSELOOP_CHECK + no_jit set PCRE2_NO_JIT + no_utf_check set PCRE2_NO_UTF_CHECK + notbol set PCRE2_NOTBOL + notempty set PCRE2_NOTEMPTY + notempty_atstart set PCRE2_NOTEMPTY_ATSTART + noteol set PCRE2_NOTEOL + partial_hard (or ph) set PCRE2_PARTIAL_HARD + partial_soft (or ps) set PCRE2_PARTIAL_SOFT +.sp +The partial matching modifiers are provided with abbreviations because they +appear frequently in tests. +.P +If the \fBposix\fP or \fBposix_nosub\fP modifier was present on the pattern, +causing the POSIX wrapper API to be used, the only option-setting modifiers +that have any effect are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, +causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to +\fBregexec()\fP. The other modifiers are ignored, with a warning message. +.P +There is one additional modifier that can be used with the POSIX wrapper. It is +ignored (with a warning) if used for non-POSIX matching. +.sp + posix_startend=[:] +.sp +This causes the subject string to be passed to \fBregexec()\fP using the +REG_STARTEND option, which uses offsets to specify which part of the string is +searched. If only one number is given, the end offset is passed as the end of +the subject string. For more detail of REG_STARTEND, see the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. If the subject string contains binary zeros (coded as escapes +such as \ex{00} because \fBpcre2test\fP does not support actual binary zeros in +its input), you must use \fBposix_startend\fP to specify its length. +. +. +.SS "Setting match controls" +.rs +.sp +The following modifiers affect the matching process or request additional +information. Some of them may also be specified on a pattern line (see above), +in which case they apply to every subject line that is matched against that +pattern, but can be overridden by modifiers on the subject. +.sp + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text (non-JIT only) + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_extra show extra callout information + callout_fail=[:] control callout failure + callout_no_where do not show position of a callout + callout_none do not supply a callout function + copy= copy captured substring + depth_limit= set a depth limit + dfa use \fBpcre2_dfa_match()\fP + find_limits find heap, match and depth limits + find_limits_noheap find match and depth limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + heapframes_size show match data heapframes size + heap_limit= set a limit on heap memory (Kbytes) + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show heap memory usage + null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= obsolete synonym for depth_limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_callout use substitution callouts + substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution number n + substitute_stop= skip substitution number n and greater + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated +.sp +The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the \fBaftertext\fP, \fBallaftertext\fP, +and \fBovector\fP subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error. +. +. +.SS "Showing more text" +.rs +.sp +The \fBaftertext\fP modifier requests that as well as outputting the part of +the subject string that matched the entire pattern, \fBpcre2test\fP should in +addition output the remainder of the subject string. This is useful for tests +where the subject contains multiple copies of the same substring. The +\fBallaftertext\fP modifier requests the same action for captured substrings as +well as the main matched substring. In each case the remainder is output on the +following line with a plus character following the capture number. +.P +The \fBallusedtext\fP modifier requests that all the text that was consulted +during a successful pattern match by the interpreter should be shown, for both +full and partial matches. This feature is not supported for JIT matching, and +if requested with JIT it is ignored (with a warning message). Setting this +modifier affects the output if there is a lookbehind at the start of a match, +or, for a complete match, a lookahead at the end, or if \eK is used in the +pattern. Characters that precede or follow the start and end of the actual +match are indicated in the output by '<' or '>' characters underneath them. +Here is an example: +.sp + re> /(?<=pqr)abc(?=xyz)/ + data> 123pqrabcxyz456\e=allusedtext + 0: pqrabcxyz + <<< >>> + data> 123pqrabcxy\e=ph,allusedtext + Partial match: pqrabcxy + <<< +.sp +The first, complete match shows that the matched string is "abc", with the +preceding and following strings "pqr" and "xyz" having been consulted during +the match (when processing the assertions). The partial match can indicate only +the preceding string. +.P +The \fBstartchar\fP modifier requests that the starting character for the match +be indicated, if it is different to the start of the matched string. The only +time when this occurs is when \eK has been processed as part of the match. In +this situation, the output for the matched string is displayed from the +starting character instead of from the match point, with circumflex characters +under the earlier characters. For example: +.sp + re> /abc\eKxyz/ + data> abcxyz\e=startchar + 0: abcxyz + ^^^ +.sp +Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT. +However, these two modifiers are mutually exclusive. +. +. +.SS "Showing the value of all capture groups" +.rs +.sp +The \fBallcaptures\fP modifier requests that the values of all potential +captured parentheses be output after a match. By default, only those up to the +highest one actually used in the match are output (corresponding to the return +code from \fBpcre2_match()\fP). Groups that did not take part in the match +are output as "". This modifier is not relevant for DFA matching (which +does no capturing) and does not apply when \fBreplace\fP is specified; it is +ignored, with a warning message, if present. +. +. +.SS "Showing the entire ovector, for all outcomes" +.rs +.sp +The \fBallvector\fP modifier requests that the entire ovector be shown, +whatever the outcome of the match. Compare \fBallcaptures\fP, which shows only +up to the maximum number of capture groups for the pattern, and then only for a +successful complete non-DFA match. This modifier, which acts after any match +result, and also for DFA matching, provides a means of checking that there are +no unexpected modifications to ovector fields. Before each match attempt, the +ovector is filled with a special value, and if this is found in both elements +of a capturing pair, "" is output. After a successful match, this +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of +ovector that is used depends on the number of matches that were found. +. +. +.SS "Testing pattern callouts" +.rs +.sp +A callout function is supplied when \fBpcre2test\fP calls the library matching +functions, unless \fBcallout_none\fP is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +\fBcallout_\fP. Details are given in the section entitled "Callouts" +.\" HTML +.\" +below. +.\" +Testing callouts from \fBpcre2_substitute()\fP is described separately in +"Testing the substitution function" +.\" HTML +.\" +below. +.\" +. +. +.SS "Finding all matches in a string" +.rs +.sp +Searching for all possible matches within a subject can be requested by the +\fBglobal\fP or \fBaltglobal\fP modifier. After finding a match, the matching +function is called again to search the remainder of the subject. The difference +between \fBglobal\fP and \fBaltglobal\fP is that the former uses the +\fIstart_offset\fP argument to \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP +to start searching at a new point within the entire string (which is what Perl +does), whereas the latter passes over a shortened subject. This makes a +difference to the matching process if the pattern begins with a lookbehind +assertion (including \eb or \eB). +.P +If an empty string is matched, the next match is done with the +PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for +another, non-empty, match at the same point in the subject. If this match +fails, the start offset is advanced, and the normal match is retried. This +imitates the way Perl handles such cases when using the \fB/g\fP modifier or +the \fBsplit()\fP function. Normally, the start offset is advanced by one +character, but if the newline convention recognizes CRLF as a newline, and the +current character is CR followed by LF, an advance of two characters occurs. +. +. +.SS "Testing substring extraction functions" +.rs +.sp +The \fBcopy\fP and \fBget\fP modifiers can be used to test the +\fBpcre2_substring_copy_xxx()\fP and \fBpcre2_substring_get_xxx()\fP functions. +They can be given more than once, and each can specify a capture group name or +number, for example: +.sp + abcd\e=copy=1,copy=3,get=G1 +.sp +If the \fB#subject\fP command is used to set default copy and/or get lists, +these can be unset by specifying a negative number to cancel all numbered +groups and an empty name to cancel all named groups. +.P +The \fBgetall\fP modifier tests \fBpcre2_substring_list_get()\fP, which +extracts all captured substrings. +.P +If the subject line is successfully matched, the substrings extracted by the +convenience functions are output with C, G, or L after the string number +instead of a colon. This is in addition to the normal full list. The string +length (that is, the return from the extraction function) is given in +parentheses after each substring, followed by the name when the extraction was +by name. +. +. +.\" HTML +.SS "Testing the substitution function" +.rs +.sp +If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is +called instead of one of the matching functions (or after one call of +\fBpcre2_match()\fP in the case of PCRE2_SUBSTITUTE_MATCHED). Note that +replacement strings cannot contain commas, because a comma signifies the end of +a modifier. This is not thought to be an issue in a test program. +.P +Specifying a completely empty replacement string disables this modifier. +However, it is possible to specify an empty replacement by providing a buffer +length, as described below, for an otherwise empty replacement. +.P +Unlike subject strings, \fBpcre2test\fP does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +.P +The following modifiers set options (in additional to the normal match options) +for \fBpcre2_substitute()\fP: +.sp + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_literal PCRE2_SUBSTITUTE_LITERAL + substitute_matched PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY +.sp +See the +.\" HREF +\fBpcre2api\fP +.\" +documentation for details of these options. +.P +After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test: +.sp + /abc/replace=xxx + =abc=abc= + 1: =xxx=abc= + =abc=abc=\e=global + 2: =xxx=xxx= +.sp +Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to \fBpcre2_substitute()\fP as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case: +.sp + /abc/ + 123abc123\e=replace=[10]XYZ + 1: 123XYZ123 + 123abc123\e=replace=[9]XYZ + Failed: error -47: no more memory +.sp +The default action of \fBpcre2_substitute()\fP is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues +to go through the motions of matching and substituting (but not doing any +callouts), in order to compute the size of buffer that is required. When this +happens, \fBpcre2test\fP shows the required buffer length (which includes space +for the trailing zero) as part of the error message. For example: +.sp + /abc/substitute_overflow_length + 123abc123\e=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed +.sp +A replacement string is ignored with POSIX and DFA matching. Specifying partial +matching provokes an error return ("bad option value") from +\fBpcre2_substitute()\fP. +. +. +.SS "Testing substitute callouts" +.rs +.sp +If the \fBsubstitute_callout\fP modifier is set, a substitution callout +function is set up. The \fBnull_context\fP modifier must not be set, because +the address of the callout function is passed in a match context. When the +callout function is called (after each substitution), details of the input +and output strings are output. For example: +.sp + /abc/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 "" + 2(1) Old 6 9 "abc" New 8 13 "" + 2: defpqr +.sp +The first number on each callout line is the count of matches. The +parenthesized number is the number of pairs that are set in the ovector (that +is, one more than the number of capturing groups that were set). Then are +listed the offsets of the old substring, its contents, and the same for the +replacement. +.P +By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If \fBsubstitute_skip\fP is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the +replacement to be rejected, and -1 causes no further matching to take place. If +either of them are set, \fBsubstitute_callout\fP is assumed. For example: +.sp + /abc/g,replace=<$0>,substitute_skip=1 + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 " SKIPPED" + 2(1) Old 6 9 "abc" New 6 11 "" + 2: abcdefpqr + abcdefabcpqr\e=substitute_stop=1 + 1(1) Old 0 3 "abc" New 0 5 " STOPPED" + 1: abcdefabcpqr +.sp +If both are set for the same number, stop takes precedence. Only a single skip +or stop is supported, which is sufficient for testing that the feature works. +. +. +.SS "Setting the JIT stack size" +.rs +.sp +The \fBjitstack\fP modifier provides a way of setting the maximum stack size +that is used by the just-in-time optimization code. It is ignored if JIT +optimization is not being used. The value is a number of kibibytes (units of +1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack +that is larger than the default is necessary only for very complicated +patterns. If \fBjitstack\fP is set non-zero on a subject line it overrides any +value that was set on the pattern. +. +. +.SS "Setting heap, match, and depth limits" +.rs +.sp +The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set +the appropriate limits in the match context. These values are ignored when the +\fBfind_limits\fP or \fBfind_limits_noheap\fP modifier is specified. +. +. +.SS "Finding minimum limits" +.rs +.sp +If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP +calls the relevant matching function several times, setting different values in +the match context via \fBpcre2_set_heap_limit()\fP, +\fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds +the smallest value for each parameter that allows the match to complete without +a "limit exceeded" error. The match itself may succeed or fail. An alternative +modifier, \fBfind_limits_noheap\fP, omits the heap limit. This is used in the +standard tests, because the minimum heap limit varies between systems. If JIT +is being used, only the match limit is relevant, and the other two are +automatically omitted. +.P +When using this modifier, the pattern should not contain any limit settings +such as (*LIMIT_MATCH=...) within it. If such a setting is present and is +lower than the minimum matching value, the minimum value cannot be found +because \fBpcre2_set_match_limit()\fP etc. are only able to reduce the value of +an in-pattern limit; they cannot increase it. +.P +For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how +much nested backtracking happens (that is, how deeply the pattern's tree is +searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of +recursive calls of the internal function that is used for handling pattern +recursion, lookaround assertions, and atomic groups. +.P +For non-DFA matching, the \fImatch_limit\fP number is a measure of the amount +of backtracking that takes place, and learning the minimum value can be +instructive. For most simple matches, the number is quite small, but for +patterns with very large numbers of matching possibilities, it can become large +very quickly with increasing length of subject string. In the case of DFA +matching, \fImatch_limit\fP controls the total number of calls, both recursive +and non-recursive, to the internal matching function, thus controlling the +overall amount of computing resource that is used. +.P +For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes +(units of 1024 bytes), limits the amount of heap memory used for matching. +. +. +.SS "Showing MARK names" +.rs +.sp +.P +The \fBmark\fP modifier causes the names from backtracking control verbs that +are returned from calls to \fBpcre2_match()\fP to be displayed. If a mark is +returned for a match, non-match, or partial match, \fBpcre2test\fP shows it. +For a match, it is on a line by itself, tagged with "MK:". Otherwise, it +is added to the non-match message. +. +. +.SS "Showing memory usage" +.rs +.sp +The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap +memory allocation and freeing calls that occur during a call to +\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. In the latter case, heap memory +is used only when a match requires more internal workspace that the default +allocation on the stack, so in many cases there will be no output. No heap +memory is allocated during matching with JIT. For this modifier to work, the +\fBnull_context\fP modifier must not be set on both the pattern and the +subject, though it can be set on one or the other. +. +. +.SS "Showing the heap frame overall vector size" +.rs +.sp +The \fBheapframes_size\fP modifier is relevant for matches using +\fBpcre2_match()\fP without JIT. After a match has run (whether successful or +not) the size, in bytes, of the allocated heap frames vector that is left +attached to the match data block is shown. If the matching action involved +several calls to \fBpcre2_match()\fP (for example, global matching or for +timing) only the final value is shown. +.P +This modifier is ignored, with a warning, for POSIX or DFA matching. JIT +matching does not use the heap frames vector, so the size is always zero, +unless there was a previous non-JIT match. Note that specifing a size of zero +for the output vector (see below) causes \fBpcre2test\fP to free its match data +block (and associated heap frames vector) and allocate a new one. +. +. +.SS "Setting a starting offset" +.rs +.sp +The \fBoffset\fP modifier sets an offset in the subject string at which +matching starts. Its value is a number of code units, not characters. +. +. +.SS "Setting an offset limit" +.rs +.sp +The \fBoffset_limit\fP modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the \fBuse_offset_limit\fP modifier must have been set +for the pattern; if not, an error is generated. +. +. +.SS "Setting the size of the output vector" +.rs +.sp +The \fBovector\fP modifier applies only to the subject line in which it +appears, though of course it can also be used to set a default in a +\fB#subject\fP command. It specifies the number of pairs of offsets that are +available for storing matching information. The default is 15. +.P +A value of zero is useful when testing the POSIX API because it causes +\fBregexec()\fP to be called with a NULL capture vector. When not testing the +POSIX API, a value of zero is used to cause +\fBpcre2_match_data_create_from_pattern()\fP to be called, in order to create a +new match block of exactly the right size for the pattern. (It is not possible +to create a match block with a zero-length ovector; there is always at least +one pair of offsets.) The old match data block is freed. +. +. +.SS "Passing the subject as zero-terminated" +.rs +.sp +By default, the subject string is passed to a native API matching function with +its correct length. In order to test the facility for passing a zero-terminated +string, the \fBzero_terminate\fP modifier is provided. It causes the length to +be passed as PCRE2_ZERO_TERMINATED. When matching via the POSIX interface, +this modifier is ignored, with a warning. +.P +When testing \fBpcre2_substitute()\fP, this modifier also has the effect of +passing the replacement string as zero-terminated. +. +. +.SS "Passing a NULL context, subject, or replacement" +.rs +.sp +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, \fBpcre2_jit_match()\fP or \fBpcre2_substitute()\fP. +If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for +testing that the matching and substitution functions behave correctly in this +case (they use default values). This modifier cannot be used with the +\fBfind_limits\fP, \fBfind_limits_noheap\fP, or \fBsubstitute_callout\fP +modifiers. +.P +Similarly, for testing purposes, if the \fBnull_subject\fP or +\fBnull_replacement\fP modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +. +. +.SH "THE ALTERNATIVE MATCHING FUNCTION" +.rs +.sp +By default, \fBpcre2test\fP uses the standard PCRE2 matching function, +\fBpcre2_match()\fP to match each subject line. PCRE2 also supports an +alternative matching function, \fBpcre2_dfa_match()\fP, which operates in a +different way, and has some restrictions. The differences between the two +functions are described in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. +.P +If the \fBdfa\fP modifier is set, the alternative matching function is used. +This function finds all possible matches at a given point in the subject. If, +however, the \fBdfa_shortest\fP modifier is set, processing stops after the +first match is found. This is always the shortest possible match. +. +. +.SH "DEFAULT OUTPUT FROM pcre2test" +.rs +.sp +This section describes the output when the normal matching function, +\fBpcre2_match()\fP, is being used. +.P +When a match succeeds, \fBpcre2test\fP outputs the list of captured substrings, +starting with number 0 for the string that matched the whole pattern. +Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or +"Partial match:" followed by the partially matching substring when the +return is PCRE2_ERROR_PARTIAL. (Note that this is the +entire substring that was inspected during the partial match; it may include +characters before the actual match start if a lookbehind assertion, \eK, \eb, +or \eB was involved.) +.P +For any other return, \fBpcre2test\fP outputs the PCRE2 negative error number +and a short descriptive phrase. If the error is a failed UTF string check, the +code unit offset of the start of the failing character is also output. Here is +an example of an interactive \fBpcre2test\fP run. +.sp + $ pcre2test + PCRE2 version 10.22 2016-07-29 +.sp + re> /^abc(\ed+)/ + data> abc123 + 0: abc123 + 1: 123 + data> xyz + No match +.sp +Unset capturing substrings that are not followed by one that is set are not +shown by \fBpcre2test\fP unless the \fBallcaptures\fP modifier is specified. In +the following example, there are two capturing substrings, but when the first +data line is matched, the second, unset substring is not shown. An "internal" +unset substring is shown as "", as for the second data line. +.sp + re> /(a)|(b)/ + data> a + 0: a + 1: a + data> b + 0: b + 1: + 2: b +.sp +If the strings contain any non-printing characters, they are output as \exhh +escapes if the value is less than 256 and UTF mode is not set. Otherwise they +are output as \ex{hh...} escapes. See below for the definition of non-printing +characters. If the \fBaftertext\fP modifier is set, the output for substring 0 +is followed by the rest of the subject string, identified by "0+" like this: +.sp + re> /cat/aftertext + data> cataract + 0: cat + 0+ aract +.sp +If global matching is requested, the results of successive matching attempts +are output in sequence, like this: +.sp + re> /\eBi(\ew\ew)/g + data> Mississippi + 0: iss + 1: ss + 0: iss + 1: ss + 0: ipp + 1: pp +.sp +"No match" is output only if the first match attempt fails. Here is an example +of a failure message (the offset 4 that is specified by the \fBoffset\fP +modifier is past the end of the subject string): +.sp + re> /xyz/ + data> xyz\e=offset=4 + Error -24 (bad offset value) +.P +Note that whereas patterns can be continued over several lines (a plain ">" +prompt is used for continuations), subject lines may not. However newlines can +be included in a subject by means of the \en escape (or \er, \er\en, etc., +depending on the newline sequence setting). +. +. +. +.SH "OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION" +.rs +.sp +When the alternative matching function, \fBpcre2_dfa_match()\fP, is used, the +output consists of a list of all the matches that start at the first point in +the subject where there is at least one match. For example: +.sp + re> /(tang|tangerine|tan)/ + data> yellow tangerine\e=dfa + 0: tangerine + 1: tang + 2: tan +.sp +Using the normal matching function on this data finds only "tang". The +longest matching string is always given first (and numbered zero). After a +PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the +partially matching substring. Note that this is the entire substring that was +inspected during the partial match; it may include characters before the actual +match start if a lookbehind assertion, \eb, or \eB was involved. (\eK is not +supported for DFA matching.) +.P +If global matching is requested, the search for further matches resumes +at the end of the longest match. For example: +.sp + re> /(tang|tangerine|tan)/g + data> yellow tangerine and tangy sultana\e=dfa + 0: tangerine + 1: tang + 2: tan + 0: tang + 1: tan + 0: tan +.sp +The alternative matching function does not support substring capture, so the +modifiers that are concerned with captured substrings are not relevant. +. +. +.SH "RESTARTING AFTER A PARTIAL MATCH" +.rs +.sp +When the alternative matching function has given the PCRE2_ERROR_PARTIAL +return, indicating that the subject partially matched the pattern, you can +restart the match with additional subject data by means of the +\fBdfa_restart\fP modifier. For example: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 23ja\e=ps,dfa + Partial match: 23ja + data> n05\e=dfa,dfa_restart + 0: n05 +.sp +For further information about partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +.\" HTML +.SH CALLOUTS +.rs +.sp +If the pattern contains any callout requests, \fBpcre2test\fP's callout +function is called during matching unless \fBcallout_none\fP is specified. This +works with both matching functions, and with JIT, though there are some +differences in behaviour. The output for callouts with numerical arguments and +those with string arguments is slightly different. +. +. +.SS "Callouts with numerical arguments" +.rs +.sp +By default, the callout function displays the callout number, the start and +current positions in the subject text at the callout time, and the next pattern +item to be tested. For example: +.sp + --->pqrabcdef + 0 ^ ^ \ed +.sp +This output indicates that callout number 0 occurred for a match attempt +starting at the fourth character of the subject string, when the pointer was at +the seventh character, and when the next pattern item was \ed. Just +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. +.P +Callouts numbered 255 are assumed to be automatic callouts, inserted as a +result of the \fBauto_callout\fP pattern modifier. In this case, instead of +showing the callout number, the offset in the pattern, preceded by a plus, is +output. For example: +.sp + re> /\ed?[A-E]\e*/auto_callout + data> E* + --->E* + +0 ^ \ed? + +3 ^ [A-E] + +8 ^^ \e* + +10 ^ ^ + 0: E* +.sp +If a pattern contains (*MARK) items, an additional line is output whenever +a change of latest mark is passed to the callout function. For example: +.sp + re> /a(*MARK:X)bc/auto_callout + data> abc + --->abc + +0 ^ a + +1 ^^ (*MARK:X) + +10 ^^ b + Latest Mark: X + +11 ^ ^ c + +12 ^ ^ + 0: abc +.sp +The mark changes between matching "a" and "b", but stays the same for the rest +of the match, so nothing more is output. If, as a result of backtracking, the +mark reverts to being unset, the text "" is output. +. +. +.SS "Callouts with string arguments" +.rs +.sp +The output for a callout with a string argument is similar, except that instead +of outputting a callout number before the position indicators, the callout +string and its offset in the pattern string are output before the reflection of +the subject string, and the subject string is reflected for each callout. For +example: +.sp + re> /^ab(?C'first')cd(?C"second")ef/ + data> abcdefg + Callout (7): 'first' + --->abcdefg + ^ ^ c + Callout (20): "second" + --->abcdefg + ^ ^ e + 0: abcdef +.sp +. +. +.SS "Callout modifiers" +.rs +.sp +The callout function in \fBpcre2test\fP returns zero (carry on matching) by +default, but you can use a \fBcallout_fail\fP modifier in a subject line to +change this and other parameters of the callout (see below). +.P +If the \fBcallout_capture\fP modifier is set, the current captured groups are +output when a callout occurs. This is useful only for non-DFA matching, as +\fBpcre2_dfa_match()\fP does not support capturing, so no captures are ever +shown. +.P +The normal callout output, showing the callout number or pattern offset (as +described above) is suppressed if the \fBcallout_no_where\fP modifier is set. +.P +When using the interpretive matching function \fBpcre2_match()\fP without JIT, +setting the \fBcallout_extra\fP modifier causes additional output from +\fBpcre2test\fP's callout function to be generated. For the first callout in a +match attempt at a new starting position in the subject, "New match attempt" is +output. If there has been a backtrack since the last callout (or start of +matching if this is the first callout), "Backtrack" is output, followed by "No +other matching paths" if the backtrack ended the previous match attempt. For +example: +.sp + re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess + data> aac\e=callout_extra + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^ ^ ) + +4 ^ ^ b + Backtrack + --->aac + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + No match +.sp +Notice that various optimizations must be turned off if you want all possible +matching paths to be scanned. If \fBno_start_optimize\fP is not used, there is +an immediate "no match", without any callouts, because the starting +optimization fails to find "b" in the subject, which it knows must be present +for any match. If \fBno_auto_possess\fP is not used, the "a+" item is turned +into "a++", which reduces the number of backtracks. +.P +The \fBcallout_extra\fP modifier has no effect if used with the DFA matching +function, or with JIT. +. +. +.SS "Return values from callouts" +.rs +.sp +The default return from the callout function is zero, which allows matching to +continue. The \fBcallout_fail\fP modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers (:) +are given, 1 is returned when callout is reached and there have been at +least callouts. The \fBcallout_error\fP modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +\fBcallout_error\fP takes precedence. Note that callouts with string arguments +are always given the number zero. +.P +The \fBcallout_data\fP modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from \fBpcre2test\fP's callout function. +.P +Inserting callouts can be helpful when using \fBpcre2test\fP to check +complicated regular expressions. For further information about callouts, see +the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +. +. +. +.SH "NON-PRINTING CHARACTERS" +.rs +.sp +When \fBpcre2test\fP is outputting text in the compiled version of a pattern, +bytes other than 32-126 are always treated as non-printing characters and are +therefore shown as hex escapes. +.P +When \fBpcre2test\fP is outputting text that is a matched part of a subject +string, it behaves in the same way, unless a different locale has been set for +the pattern (using the \fBlocale\fP modifier). In this case, the +\fBisprint()\fP function is used to distinguish printing and non-printing +characters. +. +. +. +.\" HTML +.SH "SAVING AND RESTORING COMPILED PATTERNS" +.rs +.sp +It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. JIT data cannot be saved. The host +on which the patterns are reloaded must be running the same version of PCRE2, +with the same code unit width, and must also have the same endianness, pointer +width and PCRE2_SIZE type. Before compiled patterns can be saved they must be +serialized, that is, converted to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). +.P +The functions whose names begin with \fBpcre2_serialize_\fP are used +for serializing and de-serializing. They are described in the +.\" HREF +\fBpcre2serialize\fP +.\" +documentation. In this section we describe the features of \fBpcre2test\fP that +can be used to test these functions. +.P +Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET. It just makes a reloadable byte code stream. +Hence the restrictions on reloading mentioned above. +.P +In \fBpcre2test\fP, when a pattern with \fBpush\fP modifier is successfully +compiled, it is pushed onto a stack of compiled patterns, and \fBpcre2test\fP +expects the next line to contain a new pattern (or command) instead of a +subject line. By contrast, the \fBpushcopy\fP modifier causes a copy of the +compiled pattern to be stacked, leaving the original available for immediate +matching. By using \fBpush\fP and/or \fBpushcopy\fP, a number of patterns can +be compiled and retained. These modifiers are incompatible with \fBposix\fP, +and control modifiers that act at match time are ignored (with a message) for +the stacked patterns. The \fBjitverify\fP modifier applies only at compile +time. +.P +The command +.sp + #save +.sp +causes all the stacked patterns to be serialized and the result written to the +named file. Afterwards, all the stacked patterns are freed. The command +.sp + #load +.sp +reads the data in the file, and then arranges for it to be de-serialized, with +the resulting compiled patterns added to the pattern stack. The pattern on the +top of the stack can be retrieved by the #pop command, which must be followed +by lines of subjects that are to be matched with the pattern, terminated as +usual by an empty line or end of file. This command may be followed by a +modifier list containing only +.\" HTML +.\" +control modifiers +.\" +that act after a pattern has been compiled. In particular, \fBhex\fP, +\fBposix\fP, \fBposix_nosub\fP, \fBpush\fP, and \fBpushcopy\fP are not allowed, +nor are any +.\" HTML +.\" +option-setting modifiers. +.\" +The JIT modifiers are, however permitted. Here is an example that saves and +reloads two patterns. +.sp + /abc/push + /xyz/push + #save tempfile + #load tempfile + #pop info + xyz +.sp + #pop jit,bincode + abc +.sp +If \fBjitverify\fP is used with #pop, it does not automatically imply +\fBjit\fP, which is different behaviour from when it is used on a pattern. +.P +The #popcopy command is analogous to the \fBpushcopy\fP modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. +. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2\fP(3), \fBpcre2api\fP(3), \fBpcre2callout\fP(3), +\fBpcre2jit\fP, \fBpcre2matching\fP(3), \fBpcre2partial\fP(d), +\fBpcre2pattern\fP(3), \fBpcre2serialize\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 24 April 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt new file mode 100644 index 0000000..ddb491d --- /dev/null +++ b/doc/pcre2test.txt @@ -0,0 +1,2014 @@ + +PCRE2TEST(1) General Commands Manual PCRE2TEST(1) + + +NAME + pcre2test - a program for testing Perl-compatible regular expressions. + + +SYNOPSIS + + pcre2test [options] [input file [output file]] + + pcre2test is a test program for the PCRE2 regular expression libraries, + but it can also be used for experimenting with regular expressions. + This document describes the features of the test program; for details + of the regular expressions themselves, see the pcre2pattern documenta- + tion. For details of the PCRE2 library function calls and their op- + tions, see the pcre2api documentation. + + The input for pcre2test is a sequence of regular expression patterns + and subject strings to be matched. There are also command lines for + setting defaults and controlling some special actions. The output shows + the result of each match attempt. Modifiers on external or internal + command lines, the patterns, and the subject lines specify PCRE2 func- + tion options, control how the subject is processed, and what output is + produced. + + There are many obscure modifiers, some of which are specifically de- + signed for use in conjunction with the test script and data files that + are distributed as part of PCRE2. All the modifiers are documented + here, some without much justification, but many of them are unlikely to + be of use except when testing the libraries. + + +PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES + + Different versions of the PCRE2 library can be built to support charac- + ter strings that are encoded in 8-bit, 16-bit, or 32-bit code units. + One, two, or all three of these libraries may be simultaneously in- + stalled. The pcre2test program can be used to test all the libraries. + However, its own input and output are always in 8-bit format. When + testing the 16-bit or 32-bit libraries, patterns and subject strings + are converted to 16-bit or 32-bit format before being passed to the li- + brary functions. Results are converted back to 8-bit code units for + output. + + In the rest of this document, the names of library functions and struc- + tures are given in generic form, for example, pcre2_compile(). The ac- + tual names used in the libraries have a suffix _8, _16, or _32, as ap- + propriate. + + +INPUT ENCODING + + Input to pcre2test is processed line by line, either by calling the C + library's fgets() function, or via the libreadline or libedit library. + In some Windows environments character 26 (hex 1A) causes an immediate + end of file, and no further data is read, so this character should be + avoided unless you really want that action. + + The input is processed using C's string functions, so must not contain + binary zeros, even though in Unix-like environments, fgets() treats any + bytes other than newline as data characters. An error is generated if a + binary zero is encountered. By default subject lines are processed for + backslash escapes, which makes it possible to include any data value in + strings that are passed to the library for matching. For patterns, + there is a facility for specifying some or all of the 8-bit input char- + acters as hexadecimal pairs, which makes it possible to include binary + zeros. + + Input for the 16-bit and 32-bit libraries + + When testing the 16-bit or 32-bit libraries, there is a need to be able + to generate character code points greater than 255 in the strings that + are passed to the library. For subject lines, backslash escapes can be + used. In addition, when the utf modifier (see "Setting compilation op- + tions" below) is set, the pattern and any following subject lines are + interpreted as UTF-8 strings and translated to UTF-16 or UTF-32 as ap- + propriate. + + For non-UTF testing of wide characters, the utf8_input modifier can be + used. This is mutually exclusive with utf, and is allowed only in + 16-bit or 32-bit mode. It causes the pattern and following subject + lines to be treated as UTF-8 according to the original definition (RFC + 2279), which allows for character values up to 0x7fffffff. Each charac- + ter is placed in one 16-bit or 32-bit code unit (in the 16-bit case, + values greater than 0xffff cause an error to occur). + + UTF-8 (in its original definition) is not capable of encoding values + greater than 0x7fffffff, but such values can be handled by the 32-bit + library. When testing this library in non-UTF mode with utf8_input set, + if any character is preceded by the byte 0xff (which is an invalid byte + in UTF-8) 0x80000000 is added to the character's value. This is the + only way of passing such code points in a pattern string. For subject + strings, using an escape sequence is preferable. + + +COMMAND LINE OPTIONS + + -8 If the 8-bit library has been built, this option causes it to + be used (this is the default). If the 8-bit library has not + been built, this option causes an error. + + -16 If the 16-bit library has been built, this option causes it + to be used. If the 8-bit library has not been built, this is + the default. If the 16-bit library has not been built, this + option causes an error. + + -32 If the 32-bit library has been built, this option causes it + to be used. If no other library has been built, this is the + default. If the 32-bit library has not been built, this op- + tion causes an error. + + -ac Behave as if each pattern has the auto_callout modifier, that + is, insert automatic callouts into every pattern that is com- + piled. + + -AC As for -ac, but in addition behave as if each subject line + has the callout_extra modifier, that is, show additional in- + formation from callouts. + + -b Behave as if each pattern has the fullbincode modifier; the + full internal binary form of the pattern is output after com- + pilation. + + -C Output the version number of the PCRE2 library, and all + available information about the optional features that are + included, and then exit with zero exit code. All other op- + tions are ignored. If both -C and -LM are present, whichever + is first is recognized. + + -C option Output information about a specific build-time option, then + exit. This functionality is intended for use in scripts such + as RunTest. The following options output the value and set + the exit code as indicated: + + ebcdic-nl the code for LF (= NL) in an EBCDIC environment: + 0x15 or 0x25 + 0 if used in an ASCII environment + exit code is always 0 + linksize the configured internal link size (2, 3, or 4) + exit code is set to the link size + newline the default newline setting: + CR, LF, CRLF, ANYCRLF, ANY, or NUL + exit code is always 0 + bsr the default setting for what \R matches: + ANYCRLF or ANY + exit code is always 0 + + The following options output 1 for true or 0 for false, and + set the exit code to the same value: + + backslash-C \C is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available + + If an unknown option is given, an error message is output; + the exit code is 0. + + -d Behave as if each pattern has the debug modifier; the inter- + nal form and information about the compiled pattern is output + after compilation; -d is equivalent to -b -i. + + -dfa Behave as if each subject line has the dfa modifier; matching + is done using the pcre2_dfa_match() function instead of the + default pcre2_match(). + + -error number[,number,...] + Call pcre2_get_error_message() for each of the error numbers + in the comma-separated list, display the resulting messages + on the standard output, then exit with zero exit code. The + numbers may be positive or negative. This is a convenience + facility for PCRE2 maintainers. + + -help Output a brief summary these options and then exit. + + -i Behave as if each pattern has the info modifier; information + about the compiled pattern is given after compilation. + + -jit Behave as if each pattern line has the jit modifier; after + successful compilation, each pattern is passed to the just- + in-time compiler, if available. + + -jitfast Behave as if each pattern line has the jitfast modifier; af- + ter successful compilation, each pattern is passed to the + just-in-time compiler, if available, and each subject line is + passed directly to the JIT matcher via its "fast path". + + -jitverify + Behave as if each pattern line has the jitverify modifier; + after successful compilation, each pattern is passed to the + just-in-time compiler, if available, and the use of JIT for + matching is verified. + + -LM List modifiers: write a list of available pattern and subject + modifiers to the standard output, then exit with zero exit + code. All other options are ignored. If both -C and any -Lx + options are present, whichever is first is recognized. + + -LP List properties: write a list of recognized Unicode proper- + ties to the standard output, then exit with zero exit code. + All other options are ignored. If both -C and any -Lx options + are present, whichever is first is recognized. + + -LS List scripts: write a list of recognized Unicode script names + to the standard output, then exit with zero exit code. All + other options are ignored. If both -C and any -Lx options are + present, whichever is first is recognized. + + -pattern modifier-list + Behave as if each pattern line contains the given modifiers. + + -q Do not output the version number of pcre2test at the start of + execution. + + -S size On Unix-like systems, set the size of the run-time stack to + size mebibytes (units of 1024*1024 bytes). + + -subject modifier-list + Behave as if each subject line contains the given modifiers. + + -t Run each compile and match many times with a timer, and out- + put the resulting times per compile or match. When JIT is + used, separate times are given for the initial compile and + the JIT compile. You can control the number of iterations + that are used for timing by following -t with a number (as a + separate item on the command line). For example, "-t 1000" + iterates 1000 times. The default is to iterate 500,000 times. + + -tm This is like -t except that it times only the matching phase, + not the compile phase. + + -T -TM These behave like -t and -tm, but in addition, at the end of + a run, the total times for all compiles and matches are out- + put. + + -version Output the PCRE2 version number and then exit. + + +DESCRIPTION + + If pcre2test is given two filename arguments, it reads from the first + and writes to the second. If the first name is "-", input is taken from + the standard input. If pcre2test is given only one argument, it reads + from that file and writes to stdout. Otherwise, it reads from stdin and + writes to stdout. + + When pcre2test is built, a configuration option can specify that it + should be linked with the libreadline or libedit library. When this is + done, if the input is from a terminal, it is read using the readline() + function. This provides line-editing and history facilities. The output + from the -help option states whether or not readline() will be used. + + The program handles any number of tests, each of which consists of a + set of input lines. Each set starts with a regular expression pattern, + followed by any number of subject lines to be matched against that pat- + tern. In between sets of test data, command lines that begin with # may + appear. This file format, with some restrictions, can also be processed + by the perltest.sh script that is distributed with PCRE2 as a means of + checking that the behaviour of PCRE2 and Perl is the same. For a speci- + fication of perltest.sh, see the comments near its beginning. See also + the #perltest command below. + + When the input is a terminal, pcre2test prompts for each line of input, + using "re>" to prompt for regular expression patterns, and "data>" to + prompt for subject lines. Command lines starting with # can be entered + only in response to the "re>" prompt. + + Each subject line is matched separately and independently. If you want + to do multi-line matches, you have to use the \n escape sequence (or \r + or \r\n, etc., depending on the newline setting) in a single line of + input to encode the newline sequences. There is no limit on the length + of subject lines; the input buffer is automatically extended if it is + too small. There are replication features that makes it possible to + generate long repetitive pattern or subject lines without having to + supply them explicitly. + + An empty line or the end of the file signals the end of the subject + lines for a test, at which point a new pattern or command line is ex- + pected if there is still input to be read. + + +COMMAND LINES + + In between sets of test data, a line that begins with # is interpreted + as a command line. If the first character is followed by white space or + an exclamation mark, the line is treated as a comment, and ignored. + Otherwise, the following commands are recognized: + + #forbid_utf + + Subsequent patterns automatically have the PCRE2_NEVER_UTF and + PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF + and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of + patterns. This command also forces an error if a subsequent pattern + contains any occurrences of \P, \p, or \X, which are still supported + when PCRE2_UTF is not set, but which require Unicode property support + to be included in the library. + + This is a trigger guard that is used in test files to ensure that UTF + or Unicode property tests are not accidentally added to files that are + used when Unicode support is not included in the library. Setting + PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained + by the use of #pattern; the difference is that #forbid_utf cannot be + unset, and the automatic options are not displayed in pattern informa- + tion, to avoid cluttering up test output. + + #load + + This command is used to load a set of precompiled patterns from a file, + as described in the section entitled "Saving and restoring compiled + patterns" below. + + #loadtables + + This command is used to load a set of binary character tables that can + be accessed by the tables=3 qualifier. Such tables can be created by + the pcre2_dftables program with the -b option. + + #newline_default [] + + When PCRE2 is built, a default newline convention can be specified. + This determines which characters and/or character pairs are recognized + as indicating a newline in a pattern or subject string. The default can + be overridden when a pattern is compiled. The standard test files con- + tain tests of various newline conventions, but the majority of the + tests expect a single linefeed to be recognized as a newline by de- + fault. Without special action the tests would fail when PCRE2 is com- + piled with either CR or CRLF as the default newline. + + The #newline_default command specifies a list of newline types that are + acceptable as the default. The types must be one of CR, LF, CRLF, ANY- + CRLF, ANY, or NUL (in upper or lower case), for example: + + #newline_default LF Any anyCRLF + + If the default newline is in the list, this command has no effect. Oth- + erwise, except when testing the POSIX API, a newline modifier that + specifies the first newline convention in the list (LF in the above ex- + ample) is added to any pattern that does not already have a newline + modifier. If the newline list is empty, the feature is turned off. This + command is present in a number of the standard test input files. + + When the POSIX API is being tested there is no way to override the de- + fault newline convention, though it is possible to set the newline con- + vention from within the pattern. A warning is given if the posix or + posix_nosub modifier is used when #newline_default would set a default + for the non-POSIX API. + + #pattern + + This command sets a default modifier list that applies to all subse- + quent patterns. Modifiers on a pattern can change these settings. + + #perltest + + This line is used in test files that can also be processed by perl- + test.sh to confirm that Perl gives the same results as PCRE2. Subse- + quent tests are checked for the use of pcre2test features that are in- + compatible with the perltest.sh script. + + Patterns must use '/' as their delimiter, and only certain modifiers + are supported. Comment lines, #pattern commands, and #subject commands + that set or unset "mark" are recognized and acted on. The #perltest, + #forbid_utf, and #newline_default commands, which are needed in the + relevant pcre2test files, are silently ignored. All other command lines + are ignored, but give a warning message. The #perltest command helps + detect tests that are accidentally put in the wrong file or use the + wrong delimiter. For more details of the perltest.sh script see the + comments it contains. + + #pop [] + #popcopy [] + + These commands are used to manipulate the stack of compiled patterns, + as described in the section entitled "Saving and restoring compiled + patterns" below. + + #save + + This command is used to save a set of compiled patterns to a file, as + described in the section entitled "Saving and restoring compiled pat- + terns" below. + + #subject + + This command sets a default modifier list that applies to all subse- + quent subject lines. Modifiers on a subject line can change these set- + tings. + + +MODIFIER SYNTAX + + Modifier lists are used with both pattern and subject lines. Items in a + list are separated by commas followed by optional white space. Trailing + whitespace in a modifier list is ignored. Some modifiers may be given + for both patterns and subject lines, whereas others are valid only for + one or the other. Each modifier has a long name, for example "an- + chored", and some of them must be followed by an equals sign and a + value, for example, "offset=12". Values cannot contain comma charac- + ters, but may contain spaces. Modifiers that do not take values may be + preceded by a minus sign to turn off a previous setting. + + A few of the more common modifiers can also be specified as single let- + ters, for example "i" for "caseless". In documentation, following the + Perl convention, these are written with a slash ("the /i modifier") for + clarity. Abbreviated modifiers must all be concatenated in the first + item of a modifier list. If the first item is not recognized as a long + modifier name, it is interpreted as a sequence of these abbreviations. + For example: + + /abc/ig,newline=cr,jit=3 + + This is a pattern line whose modifier list starts with two one-letter + modifiers (/i and /g). The lower-case abbreviated modifiers are the + same as used in Perl. + + +PATTERN SYNTAX + + A pattern line must start with one of the following characters (common + symbols, excluding pattern meta-characters): + + / ! " ' ` - = _ : ; , % & @ ~ + + This is interpreted as the pattern's delimiter. A regular expression + may be continued over several input lines, in which case the newline + characters are included within it. It is possible to include the delim- + iter as a literal within the pattern by escaping it with a backslash, + for example + + /abc\/def/ + + If you do this, the escape and the delimiter form part of the pattern, + but since the delimiters are all non-alphanumeric, the inclusion of the + backslash does not affect the pattern's interpretation. Note, however, + that this trick does not work within \Q...\E literal bracketing because + the backslash will itself be interpreted as a literal. If the terminat- + ing delimiter is immediately followed by a backslash, for example, + + /abc/\ + + a backslash is added to the end of the pattern. This is done to provide + a way of testing the error condition that arises if a pattern finishes + with a backslash, because + + /abc\/ + + is interpreted as the first line of a pattern that starts with "abc/", + causing pcre2test to read the next line as a continuation of the regu- + lar expression. + + A pattern can be followed by a modifier list (details below). + + +SUBJECT LINE SYNTAX + + Before each subject line is passed to pcre2_match(), pcre2_dfa_match(), + or pcre2_jit_match(), leading and trailing white space is removed, and + the line is scanned for backslash escapes, unless the subject_literal + modifier was set for the pattern. The following provide a means of en- + coding non-printing characters in a visible way: + + \a alarm (BEL, \x07) + \b backspace (\x08) + \e escape (\x27) + \f form feed (\x0c) + \n newline (\x0a) + \r carriage return (\x0d) + \t tab (\x09) + \v vertical tab (\x0b) + \nnn octal character (up to 3 octal digits); always + a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode + \o{dd...} octal character (any number of octal digits} + \xhh hexadecimal byte (up to 2 hex digits) + \x{hh...} hexadecimal character (any number of hex digits) + + The use of \x{hh...} is not dependent on the use of the utf modifier on + the pattern. It is recognized always. There may be any number of hexa- + decimal digits inside the braces; invalid values provoke error mes- + sages. + + Note that \xhh specifies one byte rather than one character in UTF-8 + mode; this makes it possible to construct invalid UTF-8 sequences for + testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8 + character in UTF-8 mode, generating more than one byte if the value is + greater than 127. When testing the 8-bit library not in UTF-8 mode, + \x{hh} generates one byte for values less than 256, and causes an error + for greater values. + + In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it + possible to construct invalid UTF-16 sequences for testing purposes. + + In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This + makes it possible to construct invalid UTF-32 sequences for testing + purposes. + + There is a special backslash sequence that specifies replication of one + or more characters: + + \[]{} + + This makes it possible to test long strings without having to provide + them as part of the file. For example: + + \[abc]{4} + + is converted to "abcabcabcabc". This feature does not support nesting. + To include a closing square bracket in the characters, code it as \x5D. + + A backslash followed by an equals sign marks the end of the subject + string and the start of a modifier list. For example: + + abc\=notbol,notempty + + If the subject string is empty and \= is followed by whitespace, the + line is treated as a comment line, and is not used for matching. For + example: + + \= This is a comment. + abc\= This is an invalid modifier list. + + A backslash followed by any other non-alphanumeric character just es- + capes that character. A backslash followed by anything else causes an + error. However, if the very last character in the line is a backslash + (and there is no modifier list), it is ignored. This gives a way of + passing an empty line as data, since a real empty line terminates the + data input. + + If the subject_literal modifier is set for a pattern, all subject lines + that follow are treated as literals, with no special treatment of back- + slashes. No replication is possible, and any subject modifiers must be + set as defaults by a #subject command. + + +PATTERN MODIFIERS + + There are several types of modifier that can appear in pattern lines. + Except where noted below, they may also be used in #pattern commands. A + pattern's modifier list can add to or override default modifiers that + were set by a previous #pattern command. + + Setting compilation options + + The following modifiers set options for pcre2_compile(). Most of them + set bits in the options argument of that function, but those whose + names start with PCRE2_EXTRA are additional options that are set in the + compile context. Some of these options have single-letter abbrevia- + tions. There is special handling for /x: if a second x is present, + PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A + third appearance adds PCRE2_EXTENDED as well, though this makes no dif- + ference to the way pcre2_compile() behaves. See pcre2api for a descrip- + tion of the effects of these options. + + allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS + allow_lookaround_bsk set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + alt_bsux set PCRE2_ALT_BSUX + alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_verbnames set PCRE2_ALT_VERBNAMES + anchored set PCRE2_ANCHORED + /a ascii_all set all ASCII options + ascii_bsd set PCRE2_EXTRA_ASCII_BSD + ascii_bss set PCRE2_EXTRA_ASCII_BSS + ascii_bsw set PCRE2_EXTRA_ASCII_BSW + ascii_digit set PCRE2_EXTRA_ASCII_DIGIT + ascii_posix set PCRE2_EXTRA_ASCII_POSIX + auto_callout set PCRE2_AUTO_CALLOUT + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + /i caseless set PCRE2_CASELESS + /r caseless_restrict set PCRE2_EXTRA_CASELESS_RESTRICT + dollar_endonly set PCRE2_DOLLAR_ENDONLY + /s dotall set PCRE2_DOTALL + dupnames set PCRE2_DUPNAMES + endanchored set PCRE2_ENDANCHORED + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + /x extended set PCRE2_EXTENDED + /xx extended_more set PCRE2_EXTENDED_MORE + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + firstline set PCRE2_FIRSTLINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE + match_invalid_utf set PCRE2_MATCH_INVALID_UTF + match_unset_backref set PCRE2_MATCH_UNSET_BACKREF + match_word set PCRE2_EXTRA_MATCH_WORD + /m multiline set PCRE2_MULTILINE + never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_ucp set PCRE2_NEVER_UCP + never_utf set PCRE2_NEVER_UTF + /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE + no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR + no_start_optimize set PCRE2_NO_START_OPTIMIZE + no_utf_check set PCRE2_NO_UTF_CHECK + ucp set PCRE2_UCP + ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT + utf set PCRE2_UTF + + As well as turning on the PCRE2_UTF option, the utf modifier causes all + non-printing characters in output strings to be printed using the + \x{hh...} notation. Otherwise, those less than 0x100 are output in hex + without the curly brackets. Setting utf in 16-bit or 32-bit mode also + causes pattern and subject strings to be translated to UTF-16 or + UTF-32, respectively, before being passed to library functions. + + Setting compilation controls + + The following modifiers affect the compilation process or request in- + formation about the pattern. There are single-letter abbreviations for + some that are heavily used in the test files. + + bsr=[anycrlf|unicode] specify \R handling + /B bincode show binary code without lengths + callout_info show callout information + convert= request foreign pattern conversion + convert_glob_escape=c set glob escape character + convert_glob_separator=c set glob separator character + convert_length set convert buffer length + debug same as info,fullbincode + framesize show matching frame size + fullbincode show binary code with lengths + /I info show info about compiled pattern + hex unquoted characters are hexadecimal + jit[=] use JIT + jitfast use JIT fast path + jitverify verify JIT use + locale= use this locale + max_pattern_compiled ) set maximum compiled pattern + _length= ) length (bytes) + max_pattern_length= set maximum pattern length (code units) + max_varlookbehind= set maximum variable lookbehind length + memory show memory used + newline= set newline type + null_context compile with a NULL context + null_pattern pass pattern as NULL + parens_nest_limit= set maximum parentheses depth + posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB + push push compiled pattern onto the stack + pushcopy push a copy onto the stack + stackguard= test the stackguard feature + subject_literal treat all subject lines as literal + tables=[0|1|2|3] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 + + The effects of these modifiers are described in the following sections. + + Newline and \R handling + + The bsr modifier specifies what \R in a pattern should match. If it is + set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to + "unicode", \R matches any Unicode newline sequence. The default can be + specified when PCRE2 is built; if it is not, the default is set to Uni- + code. + + The newline modifier specifies which characters are to be interpreted + as newlines, both in the pattern and in subject lines. The type must be + one of CR, LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). + + Information about a pattern + + The debug modifier is a shorthand for info,fullbincode, requesting all + available information. + + The bincode modifier causes a representation of the compiled code to be + output after compilation. This information does not contain length and + offset values, which ensures that the same output is generated for dif- + ferent internal link sizes and different code unit widths. By using + bincode, the same regression tests can be used in different environ- + ments. + + The fullbincode modifier, by contrast, does include length and offset + values. This is used in a few special tests that run only for specific + code unit widths and link sizes, and is also useful for one-off tests. + + The info modifier requests information about the compiled pattern + (whether it is anchored, has a fixed first character, and so on). The + information is obtained from the pcre2_pattern_info() function. Here + are some typical examples: + + re> /(?i)(^a|^b)/m,info + Capture group count = 1 + Compile options: multiline + Overall options: caseless multiline + First code unit at start or follows newline + Subject length lower bound = 1 + + re> /(?i)abc/info + Capture group count = 0 + Compile options: + Overall options: caseless + First code unit = 'a' (caseless) + Last code unit = 'c' (caseless) + Subject length lower bound = 3 + + "Compile options" are those specified by modifiers; "overall options" + have added options that are taken or deduced from the pattern. If both + sets of options are the same, just a single "options" line is output; + if there are no options, the line is omitted. "First code unit" is + where any match must start; if there is more than one they are listed + as "starting code units". "Last code unit" is the last literal code + unit that must be present in any match. This is not necessarily the + last character. These lines are omitted if no starting or ending code + units are recorded. The subject length line is omitted when + no_start_optimize is set because the minimum length is not calculated + when it can never be used. + + The framesize modifier shows the size, in bytes, of each storage frame + used by pcre2_match() for handling backtracking. The size depends on + the number of capturing parentheses in the pattern. A vector of these + frames is used at matching time; its overall size is shown when the + heaframes_size subject modifier is set. + + The callout_info modifier requests information about all the callouts + in the pattern. A list of them is output at the end of any other infor- + mation that is requested. For each callout, either its number or string + is given, followed by the item that follows it in the pattern. + + Passing a NULL context + + Normally, pcre2test passes a context block to pcre2_compile(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that pcre2_compile() behaves correctly in this case (it uses + default values). + + Passing a NULL pattern + + The null_pattern modifier is for testing the behaviour of pcre2_com- + pile() when the pattern argument is NULL. The length value passed is + the default PCRE2_ZERO_TERMINATED unless use_length is set. Any length + other than zero causes an error. + + Specifying pattern characters in hexadecimal + + The hex modifier specifies that the characters of the pattern, except + for substrings enclosed in single or double quotes, are to be inter- + preted as pairs of hexadecimal digits. This feature is provided as a + way of creating patterns that contain binary zeros and other non-print- + ing characters. White space is permitted between pairs of digits. For + example, this pattern contains three characters: + + /ab 32 59/hex + + Parts of such a pattern are taken literally if quoted. This pattern + contains nine characters, only two of which are specified in hexadeci- + mal: + + /ab "literal" 32/hex + + Either single or double quotes may be used. There is no way of includ- + ing the delimiter within a substring. The hex and expand modifiers are + mutually exclusive. + + Specifying the pattern's length + + By default, patterns are passed to the compiling functions as zero-ter- + minated strings but can be passed by length instead of being zero-ter- + minated. The use_length modifier causes this to happen. Using a length + happens automatically (whether or not use_length is set) when hex is + set, because patterns specified in hexadecimal may contain binary ze- + ros. + + If hex or use_length is used with the POSIX wrapper API (see "Using the + POSIX wrapper API" below), the REG_PEND extension is used to pass the + pattern's length. + + Specifying a maximum for variable lookbehinds + + Variable lookbehind assertions are supported only if, for each one, + there is a maximum length (in characters) that it can match. There is a + limit on this, whose default can be set at build time, with an ultimate + default of 255. The max_varlookbehind modifier uses the + pcre2_set_max_varlookbehind() function to change the limit. Lookbehinds + whose branches each match a fixed length are limited to 65535 charac- + ters per branch. + + Specifying wide characters in 16-bit and 32-bit modes + + In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 + and translated to UTF-16 or UTF-32 when the utf modifier is set. For + testing the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input + modifier can be used. It is mutually exclusive with utf. Input lines + are interpreted as UTF-8 as a means of specifying wide characters. More + details are given in "Input encoding" above. + + Generating long repetitive patterns + + Some tests use long patterns that are very repetitive. Instead of cre- + ating a very long input line for such a pattern, you can use a special + repetition feature, similar to the one described for subject lines + above. If the expand modifier is present on a pattern, parts of the + pattern that have the form + + \[]{} + + are expanded before the pattern is passed to pcre2_compile(). For exam- + ple, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction + cannot be nested. An initial "\[" sequence is recognized only if "]{" + followed by decimal digits and "}" is found later in the pattern. If + not, the characters remain in the pattern unaltered. The expand and hex + modifiers are mutually exclusive. + + If part of an expanded pattern looks like an expansion, but is really + part of the actual pattern, unwanted expansion can be avoided by giving + two values in the quantifier. For example, \[AB]{6000,6000} is not rec- + ognized as an expansion item. + + If the info modifier is set on an expanded pattern, the result of the + expansion is included in the information that is output. + + JIT compilation + + Just-in-time (JIT) compiling is a heavyweight optimization that can + greatly speed up pattern matching. See the pcre2jit documentation for + details. JIT compiling happens, optionally, after a pattern has been + successfully compiled into an internal form. The JIT compiler converts + this to optimized machine code. It needs to know whether the match-time + options PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, + because different code is generated for the different cases. See the + partial modifier in "Subject Modifiers" below for details of how these + options are specified for each match attempt. + + JIT compilation is requested by the jit pattern modifier, which may op- + tionally be followed by an equals sign and a number in the range 0 to + 7. The three bits that make up the number specify which of the three + JIT operating modes are to be compiled: + + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching + + The possible values for the jit modifier are therefore: + + 0 disable JIT + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only + 7 all three modes + + If no number is given, 7 is assumed. The phrase "partial matching" + means a call to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the + PCRE2_PARTIAL_HARD option set. Note that such a call may return a com- + plete match; the options enable the possibility of a partial match, but + do not require it. Note also that if you request JIT compilation only + for partial matching (for example, jit=2) but do not set the partial + modifier on a subject line, that match will not use JIT code because + none was compiled for non-partial matching. + + If JIT compilation is successful, the compiled JIT code will automati- + cally be used when an appropriate type of match is run, except when in- + compatible run-time options are specified. For more details, see the + pcre2jit documentation. See also the jitstack modifier below for a way + of setting the size of the JIT stack. + + If the jitfast modifier is specified, matching is done using the JIT + "fast path" interface, pcre2_jit_match(), which skips some of the san- + ity checks that are done by pcre2_match(), and of course does not work + when JIT is not supported. If jitfast is specified without jit, jit=7 + is assumed. + + If the jitverify modifier is specified, information about the compiled + pattern shows whether JIT compilation was or was not successful. If + jitverify is specified without jit, jit=7 is assumed. If JIT compila- + tion is successful when jitverify is set, the text "(JIT)" is added to + the first output line after a match or non match when JIT-compiled code + was actually used in the match. + + Setting a locale + + The locale modifier must specify the name of a locale, for example: + + /pattern/locale=fr_FR + + The given locale is set, pcre2_maketables() is called to build a set of + character tables for the locale, and this is then passed to pcre2_com- + pile() when compiling the regular expression. The same tables are used + when matching the following subject lines. The locale modifier applies + only to the pattern on which it appears, but can be given in a #pattern + command if a default is needed. Setting a locale and alternate charac- + ter tables are mutually exclusive. + + Showing pattern memory + + The memory modifier causes the size in bytes of the memory used to hold + the compiled pattern to be output. This does not include the size of + the pcre2_code block; it is just the actual compiled data. If the pat- + tern is subsequently passed to the JIT compiler, the size of the JIT + compiled code is also output. Here is an example: + + re> /a(b)c/jit,memory + Memory allocation (code space): 21 + Memory allocation (JIT code): 1910 + + + Limiting nested parentheses + + The parens_nest_limit modifier sets a limit on the depth of nested + parentheses in a pattern. Breaching the limit causes a compilation er- + ror. The default for the library is set when PCRE2 is built, but + pcre2test sets its own default of 220, which is required for running + the standard test suite. + + Limiting the pattern length + + The max_pattern_length modifier sets a limit, in code units, to the + length of pattern that pcre2_compile() will accept. Breaching the limit + causes a compilation error. The default is the largest number a + PCRE2_SIZE variable can hold (essentially unlimited). + + Limiting the size of a compiled pattern + + The max_pattern_compiled_length modifier sets a limit, in bytes, to the + amount of memory used by a compiled pattern. Breaching the limit causes + a compilation error. The default is the largest number a PCRE2_SIZE + variable can hold (essentially unlimited). + + Using the POSIX wrapper API + + The posix and posix_nosub modifiers cause pcre2test to call PCRE2 via + the POSIX wrapper API rather than its native API. When posix_nosub is + used, the POSIX option REG_NOSUB is passed to regcomp(). The POSIX + wrapper supports only the 8-bit library. Note that it does not imply + POSIX matching semantics; for more detail see the pcre2posix documenta- + tion. The following pattern modifiers set options for the regcomp() + function: + + caseless REG_ICASE + multiline REG_NEWLINE + dotall REG_DOTALL ) + ungreedy REG_UNGREEDY ) These options are not part of + ucp REG_UCP ) the POSIX standard + utf REG_UTF8 ) + + The regerror_buffsize modifier specifies a size for the error buffer + that is passed to regerror() in the event of a compilation error. For + example: + + /abc/posix,regerror_buffsize=20 + + This provides a means of testing the behaviour of regerror() when the + buffer is too small for the error message. If this modifier has not + been set, a large buffer is used. + + The aftertext and allaftertext subject modifiers work as described be- + low. All other modifiers are either ignored, with a warning message, or + cause an error. + + The pattern is passed to regcomp() as a zero-terminated string by de- + fault, but if the use_length or hex modifiers are set, the REG_PEND ex- + tension is used to pass it by length. + + Testing the stack guard feature + + The stackguard modifier is used to test the use of pcre2_set_com- + pile_recursion_guard(), a function that is provided to enable stack + availability to be checked during compilation (see the pcre2api docu- + mentation for details). If the number specified by the modifier is + greater than zero, pcre2_set_compile_recursion_guard() is called to set + up callback from pcre2_compile() to a local function. The argument it + receives is the current nesting parenthesis depth; if this is greater + than the value given by the modifier, non-zero is returned, causing the + compilation to be aborted. + + Using alternative character tables + + The value specified for the tables modifier must be one of the digits + 0, 1, 2, or 3. It causes a specific set of built-in character tables to + be passed to pcre2_compile(). This is used in the PCRE2 tests to check + behaviour with different character tables. The digit specifies the ta- + bles as follows: + + 0 do not pass any special character tables + 1 the default ASCII tables, as distributed in + pcre2_chartables.c.dist + 2 a set of tables defining ISO 8859 characters + 3 a set of tables loaded by the #loadtables command + + In tables 2, some characters whose codes are greater than 128 are iden- + tified as letters, digits, spaces, etc. Tables 3 can be used only after + a #loadtables command has loaded them from a binary file. Setting al- + ternate character tables and a locale are mutually exclusive. + + Setting certain match controls + + The following modifiers are really subject modifiers, and are described + under "Subject Modifiers" below. However, they may be included in a + pattern's modifier list, in which case they are applied to every sub- + ject line that is processed with that pattern. These modifiers do not + affect the compilation process. + + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text + altglobal alternative global matching + /g global global matching + heapframes_size show match data heapframes size + jitstack= set size of JIT stack + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_callout use substitution callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution + substitute_stop= skip substitution and following + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + + These modifiers may not appear in a #pattern command. If you want them + as defaults, set them in a #subject command. + + Specifying literal subject lines + + If the subject_literal modifier is present on a pattern, all the sub- + ject lines that it matches are taken as literal strings, with no inter- + pretation of backslashes. It is not possible to set subject modifiers + on such lines, but any that are set as defaults by a #subject command + are recognized. + + Saving a compiled pattern + + When a pattern with the push modifier is successfully compiled, it is + pushed onto a stack of compiled patterns, and pcre2test expects the + next line to contain a new pattern (or a command) instead of a subject + line. This facility is used when saving compiled patterns to a file, as + described in the section entitled "Saving and restoring compiled pat- + terns" below. If pushcopy is used instead of push, a copy of the com- + piled pattern is stacked, leaving the original as current, ready to + match the following input lines. This provides a way of testing the + pcre2_code_copy() function. The push and pushcopy modifiers are in- + compatible with compilation modifiers such as global that act at match + time. Any that are specified are ignored (for the stacked copy), with a + warning message, except for replace, which causes an error. Note that + jitverify, which is allowed, does not carry through to any subsequent + matching that uses a stacked pattern. + + Testing foreign pattern conversion + + The experimental foreign pattern conversion functions in PCRE2 can be + tested by setting the convert modifier. Its argument is a colon-sepa- + rated list of options, which set the equivalent option for the + pcre2_pattern_convert() function: + + glob PCRE2_CONVERT_GLOB + glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR + glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + posix_basic PCRE2_CONVERT_POSIX_BASIC + posix_extended PCRE2_CONVERT_POSIX_EXTENDED + unset Unset all options + + The "unset" value is useful for turning off a default that has been set + by a #pattern command. When one of these options is set, the input pat- + tern is passed to pcre2_pattern_convert(). If the conversion is suc- + cessful, the result is reflected in the output and then passed to + pcre2_compile(). The normal utf and no_utf_check options, if set, cause + the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be + passed to pcre2_pattern_convert(). + + By default, the conversion function is allowed to allocate a buffer for + its output. However, if the convert_length modifier is set to a value + greater than zero, pcre2test passes a buffer of the given length. This + makes it possible to test the length check. + + The convert_glob_escape and convert_glob_separator modifiers can be + used to specify the escape and separator characters for glob process- + ing, overriding the defaults, which are operating-system dependent. + + +SUBJECT MODIFIERS + + The modifiers that can appear in subject lines and the #subject command + are of two types. + + Setting match options + + The following modifiers set options for pcre2_match() or + pcre2_dfa_match(). See pcreapi for a description of their effects. + + anchored set PCRE2_ANCHORED + endanchored set PCRE2_ENDANCHORED + dfa_restart set PCRE2_DFA_RESTART + dfa_shortest set PCRE2_DFA_SHORTEST + disable_recurseloop_check set PCRE2_DISABLE_RECURSELOOP_CHECK + no_jit set PCRE2_NO_JIT + no_utf_check set PCRE2_NO_UTF_CHECK + notbol set PCRE2_NOTBOL + notempty set PCRE2_NOTEMPTY + notempty_atstart set PCRE2_NOTEMPTY_ATSTART + noteol set PCRE2_NOTEOL + partial_hard (or ph) set PCRE2_PARTIAL_HARD + partial_soft (or ps) set PCRE2_PARTIAL_SOFT + + The partial matching modifiers are provided with abbreviations because + they appear frequently in tests. + + If the posix or posix_nosub modifier was present on the pattern, caus- + ing the POSIX wrapper API to be used, the only option-setting modifiers + that have any effect are notbol, notempty, and noteol, causing REG_NOT- + BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to + regexec(). The other modifiers are ignored, with a warning message. + + There is one additional modifier that can be used with the POSIX wrap- + per. It is ignored (with a warning) if used for non-POSIX matching. + + posix_startend=[:] + + This causes the subject string to be passed to regexec() using the + REG_STARTEND option, which uses offsets to specify which part of the + string is searched. If only one number is given, the end offset is + passed as the end of the subject string. For more detail of REG_STAR- + TEND, see the pcre2posix documentation. If the subject string contains + binary zeros (coded as escapes such as \x{00} because pcre2test does + not support actual binary zeros in its input), you must use posix_star- + tend to specify its length. + + Setting match controls + + The following modifiers affect the matching process or request addi- + tional information. Some of them may also be specified on a pattern + line (see above), in which case they apply to every subject line that + is matched against that pattern, but can be overridden by modifiers on + the subject. + + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text (non-JIT only) + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_extra show extra callout information + callout_fail=[:] control callout failure + callout_no_where do not show position of a callout + callout_none do not supply a callout function + copy= copy captured substring + depth_limit= set a depth limit + dfa use pcre2_dfa_match() + find_limits find heap, match and depth limits + find_limits_noheap find match and depth limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + heapframes_size show match data heapframes size + heap_limit= set a limit on heap memory (Kbytes) + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show heap memory usage + null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= obsolete synonym for depth_limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_callout use substitution callouts + substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution number n + substitute_stop= skip substitution number n and greater + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated + + The effects of these modifiers are described in the following sections. + When matching via the POSIX wrapper API, the aftertext, allaftertext, + and ovector subject modifiers work as described below. All other modi- + fiers are either ignored, with a warning message, or cause an error. + + Showing more text + + The aftertext modifier requests that as well as outputting the part of + the subject string that matched the entire pattern, pcre2test should in + addition output the remainder of the subject string. This is useful for + tests where the subject contains multiple copies of the same substring. + The allaftertext modifier requests the same action for captured sub- + strings as well as the main matched substring. In each case the remain- + der is output on the following line with a plus character following the + capture number. + + The allusedtext modifier requests that all the text that was consulted + during a successful pattern match by the interpreter should be shown, + for both full and partial matches. This feature is not supported for + JIT matching, and if requested with JIT it is ignored (with a warning + message). Setting this modifier affects the output if there is a look- + behind at the start of a match, or, for a complete match, a lookahead + at the end, or if \K is used in the pattern. Characters that precede or + follow the start and end of the actual match are indicated in the out- + put by '<' or '>' characters underneath them. Here is an example: + + re> /(?<=pqr)abc(?=xyz)/ + data> 123pqrabcxyz456\=allusedtext + 0: pqrabcxyz + <<< >>> + data> 123pqrabcxy\=ph,allusedtext + Partial match: pqrabcxy + <<< + + The first, complete match shows that the matched string is "abc", with + the preceding and following strings "pqr" and "xyz" having been con- + sulted during the match (when processing the assertions). The partial + match can indicate only the preceding string. + + The startchar modifier requests that the starting character for the + match be indicated, if it is different to the start of the matched + string. The only time when this occurs is when \K has been processed as + part of the match. In this situation, the output for the matched string + is displayed from the starting character instead of from the match + point, with circumflex characters under the earlier characters. For ex- + ample: + + re> /abc\Kxyz/ + data> abcxyz\=startchar + 0: abcxyz + ^^^ + + Unlike allusedtext, the startchar modifier can be used with JIT. How- + ever, these two modifiers are mutually exclusive. + + Showing the value of all capture groups + + The allcaptures modifier requests that the values of all potential cap- + tured parentheses be output after a match. By default, only those up to + the highest one actually used in the match are output (corresponding to + the return code from pcre2_match()). Groups that did not take part in + the match are output as "". This modifier is not relevant for + DFA matching (which does no capturing) and does not apply when replace + is specified; it is ignored, with a warning message, if present. + + Showing the entire ovector, for all outcomes + + The allvector modifier requests that the entire ovector be shown, what- + ever the outcome of the match. Compare allcaptures, which shows only up + to the maximum number of capture groups for the pattern, and then only + for a successful complete non-DFA match. This modifier, which acts af- + ter any match result, and also for DFA matching, provides a means of + checking that there are no unexpected modifications to ovector fields. + Before each match attempt, the ovector is filled with a special value, + and if this is found in both elements of a capturing pair, "" is output. After a successful match, this applies to all + groups after the maximum capture group for the pattern. In other cases + it applies to the entire ovector. After a partial match, the first two + elements are the only ones that should be set. After a DFA match, the + amount of ovector that is used depends on the number of matches that + were found. + + Testing pattern callouts + + A callout function is supplied when pcre2test calls the library match- + ing functions, unless callout_none is specified. Its behaviour can be + controlled by various modifiers listed above whose names begin with + callout_. Details are given in the section entitled "Callouts" below. + Testing callouts from pcre2_substitute() is described separately in + "Testing the substitution function" below. + + Finding all matches in a string + + Searching for all possible matches within a subject can be requested by + the global or altglobal modifier. After finding a match, the matching + function is called again to search the remainder of the subject. The + difference between global and altglobal is that the former uses the + start_offset argument to pcre2_match() or pcre2_dfa_match() to start + searching at a new point within the entire string (which is what Perl + does), whereas the latter passes over a shortened subject. This makes a + difference to the matching process if the pattern begins with a lookbe- + hind assertion (including \b or \B). + + If an empty string is matched, the next match is done with the + PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search + for another, non-empty, match at the same point in the subject. If this + match fails, the start offset is advanced, and the normal match is re- + tried. This imitates the way Perl handles such cases when using the /g + modifier or the split() function. Normally, the start offset is ad- + vanced by one character, but if the newline convention recognizes CRLF + as a newline, and the current character is CR followed by LF, an ad- + vance of two characters occurs. + + Testing substring extraction functions + + The copy and get modifiers can be used to test the pcre2_sub- + string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be + given more than once, and each can specify a capture group name or num- + ber, for example: + + abcd\=copy=1,copy=3,get=G1 + + If the #subject command is used to set default copy and/or get lists, + these can be unset by specifying a negative number to cancel all num- + bered groups and an empty name to cancel all named groups. + + The getall modifier tests pcre2_substring_list_get(), which extracts + all captured substrings. + + If the subject line is successfully matched, the substrings extracted + by the convenience functions are output with C, G, or L after the + string number instead of a colon. This is in addition to the normal + full list. The string length (that is, the return from the extraction + function) is given in parentheses after each substring, followed by the + name when the extraction was by name. + + Testing the substitution function + + If the replace modifier is set, the pcre2_substitute() function is + called instead of one of the matching functions (or after one call of + pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that re- + placement strings cannot contain commas, because a comma signifies the + end of a modifier. This is not thought to be an issue in a test pro- + gram. + + Specifying a completely empty replacement string disables this modi- + fier. However, it is possible to specify an empty replacement by pro- + viding a buffer length, as described below, for an otherwise empty re- + placement. + + Unlike subject strings, pcre2test does not process replacement strings + for escape sequences. In UTF mode, a replacement string is checked to + see if it is a valid UTF-8 string. If so, it is correctly converted to + a UTF string of the appropriate code unit width. If it is not a valid + UTF-8 string, the individual code units are copied directly. This pro- + vides a means of passing an invalid UTF-8 string for testing purposes. + + The following modifiers set options (in additional to the normal match + options) for pcre2_substitute(): + + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_literal PCRE2_SUBSTITUTE_LITERAL + substitute_matched PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY + + See the pcre2api documentation for details of these options. + + After a successful substitution, the modified string is output, pre- + ceded by the number of replacements. This may be zero if there were no + matches. Here is a simple example of a substitution test: + + /abc/replace=xxx + =abc=abc= + 1: =xxx=abc= + =abc=abc=\=global + 2: =xxx=xxx= + + Subject and replacement strings should be kept relatively short (fewer + than 256 characters) for substitution tests, as fixed-size buffers are + used. To make it easy to test for buffer overflow, if the replacement + string starts with a number in square brackets, that number is passed + to pcre2_substitute() as the size of the output buffer, with the re- + placement string starting at the next character. Here is an example + that tests the edge case: + + /abc/ + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 + 123abc123\=replace=[9]XYZ + Failed: error -47: no more memory + + The default action of pcre2_substitute() is to return PCRE2_ER- + ROR_NOMEMORY when the output buffer is too small. However, if the + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi- + tute_overflow_length modifier), pcre2_substitute() continues to go + through the motions of matching and substituting (but not doing any + callouts), in order to compute the size of buffer that is required. + When this happens, pcre2test shows the required buffer length (which + includes space for the trailing zero) as part of the error message. For + example: + + /abc/substitute_overflow_length + 123abc123\=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed + + A replacement string is ignored with POSIX and DFA matching. Specifying + partial matching provokes an error return ("bad option value") from + pcre2_substitute(). + + Testing substitute callouts + + If the substitute_callout modifier is set, a substitution callout func- + tion is set up. The null_context modifier must not be set, because the + address of the callout function is passed in a match context. When the + callout function is called (after each substitution), details of the + input and output strings are output. For example: + + /abc/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 "" + 2(1) Old 6 9 "abc" New 8 13 "" + 2: defpqr + + The first number on each callout line is the count of matches. The + parenthesized number is the number of pairs that are set in the ovector + (that is, one more than the number of capturing groups that were set). + Then are listed the offsets of the old substring, its contents, and the + same for the replacement. + + By default, the substitution callout function returns zero, which ac- + cepts the replacement and causes matching to continue if /g was used. + Two further modifiers can be used to test other return values. If sub- + stitute_skip is set to a value greater than zero the callout function + returns +1 for the match of that number, and similarly substitute_stop + returns -1. These cause the replacement to be rejected, and -1 causes + no further matching to take place. If either of them are set, substi- + tute_callout is assumed. For example: + + /abc/g,replace=<$0>,substitute_skip=1 + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 " SKIPPED" + 2(1) Old 6 9 "abc" New 6 11 "" + 2: abcdefpqr + abcdefabcpqr\=substitute_stop=1 + 1(1) Old 0 3 "abc" New 0 5 " STOPPED" + 1: abcdefabcpqr + + If both are set for the same number, stop takes precedence. Only a sin- + gle skip or stop is supported, which is sufficient for testing that the + feature works. + + Setting the JIT stack size + + The jitstack modifier provides a way of setting the maximum stack size + that is used by the just-in-time optimization code. It is ignored if + JIT optimization is not being used. The value is a number of kibibytes + (units of 1024 bytes). Setting zero reverts to the default of 32KiB. + Providing a stack that is larger than the default is necessary only for + very complicated patterns. If jitstack is set non-zero on a subject + line it overrides any value that was set on the pattern. + + Setting heap, match, and depth limits + + The heap_limit, match_limit, and depth_limit modifiers set the appro- + priate limits in the match context. These values are ignored when the + find_limits or find_limits_noheap modifier is specified. + + Finding minimum limits + + If the find_limits modifier is present on a subject line, pcre2test + calls the relevant matching function several times, setting different + values in the match context via pcre2_set_heap_limit(), + pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the + smallest value for each parameter that allows the match to complete + without a "limit exceeded" error. The match itself may succeed or fail. + An alternative modifier, find_limits_noheap, omits the heap limit. This + is used in the standard tests, because the minimum heap limit varies + between systems. If JIT is being used, only the match limit is rele- + vant, and the other two are automatically omitted. + + When using this modifier, the pattern should not contain any limit set- + tings such as (*LIMIT_MATCH=...) within it. If such a setting is + present and is lower than the minimum matching value, the minimum value + cannot be found because pcre2_set_match_limit() etc. are only able to + reduce the value of an in-pattern limit; they cannot increase it. + + For non-DFA matching, the minimum depth_limit number is a measure of + how much nested backtracking happens (that is, how deeply the pattern's + tree is searched). In the case of DFA matching, depth_limit controls + the depth of recursive calls of the internal function that is used for + handling pattern recursion, lookaround assertions, and atomic groups. + + For non-DFA matching, the match_limit number is a measure of the amount + of backtracking that takes place, and learning the minimum value can be + instructive. For most simple matches, the number is quite small, but + for patterns with very large numbers of matching possibilities, it can + become large very quickly with increasing length of subject string. In + the case of DFA matching, match_limit controls the total number of + calls, both recursive and non-recursive, to the internal matching func- + tion, thus controlling the overall amount of computing resource that is + used. + + For both kinds of matching, the heap_limit number, which is in + kibibytes (units of 1024 bytes), limits the amount of heap memory used + for matching. + + Showing MARK names + + + The mark modifier causes the names from backtracking control verbs that + are returned from calls to pcre2_match() to be displayed. If a mark is + returned for a match, non-match, or partial match, pcre2test shows it. + For a match, it is on a line by itself, tagged with "MK:". Otherwise, + it is added to the non-match message. + + Showing memory usage + + The memory modifier causes pcre2test to log the sizes of all heap mem- + ory allocation and freeing calls that occur during a call to + pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory is + used only when a match requires more internal workspace that the de- + fault allocation on the stack, so in many cases there will be no out- + put. No heap memory is allocated during matching with JIT. For this + modifier to work, the null_context modifier must not be set on both the + pattern and the subject, though it can be set on one or the other. + + Showing the heap frame overall vector size + + The heapframes_size modifier is relevant for matches using + pcre2_match() without JIT. After a match has run (whether successful or + not) the size, in bytes, of the allocated heap frames vector that is + left attached to the match data block is shown. If the matching action + involved several calls to pcre2_match() (for example, global matching + or for timing) only the final value is shown. + + This modifier is ignored, with a warning, for POSIX or DFA matching. + JIT matching does not use the heap frames vector, so the size is always + zero, unless there was a previous non-JIT match. Note that specifing a + size of zero for the output vector (see below) causes pcre2test to free + its match data block (and associated heap frames vector) and allocate a + new one. + + Setting a starting offset + + The offset modifier sets an offset in the subject string at which + matching starts. Its value is a number of code units, not characters. + + Setting an offset limit + + The offset_limit modifier sets a limit for unanchored matches. If a + match cannot be found starting at or before this offset in the subject, + a "no match" return is given. The data value is a number of code units, + not characters. When this modifier is used, the use_offset_limit modi- + fier must have been set for the pattern; if not, an error is generated. + + Setting the size of the output vector + + The ovector modifier applies only to the subject line in which it ap- + pears, though of course it can also be used to set a default in a #sub- + ject command. It specifies the number of pairs of offsets that are + available for storing matching information. The default is 15. + + A value of zero is useful when testing the POSIX API because it causes + regexec() to be called with a NULL capture vector. When not testing the + POSIX API, a value of zero is used to cause pcre2_match_data_cre- + ate_from_pattern() to be called, in order to create a new match block + of exactly the right size for the pattern. (It is not possible to cre- + ate a match block with a zero-length ovector; there is always at least + one pair of offsets.) The old match data block is freed. + + Passing the subject as zero-terminated + + By default, the subject string is passed to a native API matching func- + tion with its correct length. In order to test the facility for passing + a zero-terminated string, the zero_terminate modifier is provided. It + causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching + via the POSIX interface, this modifier is ignored, with a warning. + + When testing pcre2_substitute(), this modifier also has the effect of + passing the replacement string as zero-terminated. + + Passing a NULL context, subject, or replacement + + Normally, pcre2test passes a context block to pcre2_match(), + pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that the matching and substitution functions behave correctly + in this case (they use default values). This modifier cannot be used + with the find_limits, find_limits_noheap, or substitute_callout modi- + fiers. + + Similarly, for testing purposes, if the null_subject or null_replace- + ment modifier is set, the subject or replacement string pointers are + passed as NULL, respectively, to the relevant functions. + + +THE ALTERNATIVE MATCHING FUNCTION + + By default, pcre2test uses the standard PCRE2 matching function, + pcre2_match() to match each subject line. PCRE2 also supports an alter- + native matching function, pcre2_dfa_match(), which operates in a dif- + ferent way, and has some restrictions. The differences between the two + functions are described in the pcre2matching documentation. + + If the dfa modifier is set, the alternative matching function is used. + This function finds all possible matches at a given point in the sub- + ject. If, however, the dfa_shortest modifier is set, processing stops + after the first match is found. This is always the shortest possible + match. + + +DEFAULT OUTPUT FROM pcre2test + + This section describes the output when the normal matching function, + pcre2_match(), is being used. + + When a match succeeds, pcre2test outputs the list of captured sub- + strings, starting with number 0 for the string that matched the whole + pattern. Otherwise, it outputs "No match" when the return is PCRE2_ER- + ROR_NOMATCH, or "Partial match:" followed by the partially matching + substring when the return is PCRE2_ERROR_PARTIAL. (Note that this is + the entire substring that was inspected during the partial match; it + may include characters before the actual match start if a lookbehind + assertion, \K, \b, or \B was involved.) + + For any other return, pcre2test outputs the PCRE2 negative error number + and a short descriptive phrase. If the error is a failed UTF string + check, the code unit offset of the start of the failing character is + also output. Here is an example of an interactive pcre2test run. + + $ pcre2test + PCRE2 version 10.22 2016-07-29 + + re> /^abc(\d+)/ + data> abc123 + 0: abc123 + 1: 123 + data> xyz + No match + + Unset capturing substrings that are not followed by one that is set are + not shown by pcre2test unless the allcaptures modifier is specified. In + the following example, there are two capturing substrings, but when the + first data line is matched, the second, unset substring is not shown. + An "internal" unset substring is shown as "", as for the second + data line. + + re> /(a)|(b)/ + data> a + 0: a + 1: a + data> b + 0: b + 1: + 2: b + + If the strings contain any non-printing characters, they are output as + \xhh escapes if the value is less than 256 and UTF mode is not set. + Otherwise they are output as \x{hh...} escapes. See below for the defi- + nition of non-printing characters. If the aftertext modifier is set, + the output for substring 0 is followed by the rest of the subject + string, identified by "0+" like this: + + re> /cat/aftertext + data> cataract + 0: cat + 0+ aract + + If global matching is requested, the results of successive matching at- + tempts are output in sequence, like this: + + re> /\Bi(\w\w)/g + data> Mississippi + 0: iss + 1: ss + 0: iss + 1: ss + 0: ipp + 1: pp + + "No match" is output only if the first match attempt fails. Here is an + example of a failure message (the offset 4 that is specified by the + offset modifier is past the end of the subject string): + + re> /xyz/ + data> xyz\=offset=4 + Error -24 (bad offset value) + + Note that whereas patterns can be continued over several lines (a plain + ">" prompt is used for continuations), subject lines may not. However + newlines can be included in a subject by means of the \n escape (or \r, + \r\n, etc., depending on the newline sequence setting). + + +OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION + + When the alternative matching function, pcre2_dfa_match(), is used, the + output consists of a list of all the matches that start at the first + point in the subject where there is at least one match. For example: + + re> /(tang|tangerine|tan)/ + data> yellow tangerine\=dfa + 0: tangerine + 1: tang + 2: tan + + Using the normal matching function on this data finds only "tang". The + longest matching string is always given first (and numbered zero). Af- + ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol- + lowed by the partially matching substring. Note that this is the entire + substring that was inspected during the partial match; it may include + characters before the actual match start if a lookbehind assertion, \b, + or \B was involved. (\K is not supported for DFA matching.) + + If global matching is requested, the search for further matches resumes + at the end of the longest match. For example: + + re> /(tang|tangerine|tan)/g + data> yellow tangerine and tangy sultana\=dfa + 0: tangerine + 1: tang + 2: tan + 0: tang + 1: tan + 0: tan + + The alternative matching function does not support substring capture, + so the modifiers that are concerned with captured substrings are not + relevant. + + +RESTARTING AFTER A PARTIAL MATCH + + When the alternative matching function has given the PCRE2_ERROR_PAR- + TIAL return, indicating that the subject partially matched the pattern, + you can restart the match with additional subject data by means of the + dfa_restart modifier. For example: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 23ja\=ps,dfa + Partial match: 23ja + data> n05\=dfa,dfa_restart + 0: n05 + + For further information about partial matching, see the pcre2partial + documentation. + + +CALLOUTS + + If the pattern contains any callout requests, pcre2test's callout func- + tion is called during matching unless callout_none is specified. This + works with both matching functions, and with JIT, though there are some + differences in behaviour. The output for callouts with numerical argu- + ments and those with string arguments is slightly different. + + Callouts with numerical arguments + + By default, the callout function displays the callout number, the start + and current positions in the subject text at the callout time, and the + next pattern item to be tested. For example: + + --->pqrabcdef + 0 ^ ^ \d + + This output indicates that callout number 0 occurred for a match at- + tempt starting at the fourth character of the subject string, when the + pointer was at the seventh character, and when the next pattern item + was \d. Just one circumflex is output if the start and current posi- + tions are the same, or if the current position precedes the start posi- + tion, which can happen if the callout is in a lookbehind assertion. + + Callouts numbered 255 are assumed to be automatic callouts, inserted as + a result of the auto_callout pattern modifier. In this case, instead of + showing the callout number, the offset in the pattern, preceded by a + plus, is output. For example: + + re> /\d?[A-E]\*/auto_callout + data> E* + --->E* + +0 ^ \d? + +3 ^ [A-E] + +8 ^^ \* + +10 ^ ^ + 0: E* + + If a pattern contains (*MARK) items, an additional line is output when- + ever a change of latest mark is passed to the callout function. For ex- + ample: + + re> /a(*MARK:X)bc/auto_callout + data> abc + --->abc + +0 ^ a + +1 ^^ (*MARK:X) + +10 ^^ b + Latest Mark: X + +11 ^ ^ c + +12 ^ ^ + 0: abc + + The mark changes between matching "a" and "b", but stays the same for + the rest of the match, so nothing more is output. If, as a result of + backtracking, the mark reverts to being unset, the text "" is + output. + + Callouts with string arguments + + The output for a callout with a string argument is similar, except that + instead of outputting a callout number before the position indicators, + the callout string and its offset in the pattern string are output be- + fore the reflection of the subject string, and the subject string is + reflected for each callout. For example: + + re> /^ab(?C'first')cd(?C"second")ef/ + data> abcdefg + Callout (7): 'first' + --->abcdefg + ^ ^ c + Callout (20): "second" + --->abcdefg + ^ ^ e + 0: abcdef + + + Callout modifiers + + The callout function in pcre2test returns zero (carry on matching) by + default, but you can use a callout_fail modifier in a subject line to + change this and other parameters of the callout (see below). + + If the callout_capture modifier is set, the current captured groups are + output when a callout occurs. This is useful only for non-DFA matching, + as pcre2_dfa_match() does not support capturing, so no captures are + ever shown. + + The normal callout output, showing the callout number or pattern offset + (as described above) is suppressed if the callout_no_where modifier is + set. + + When using the interpretive matching function pcre2_match() without + JIT, setting the callout_extra modifier causes additional output from + pcre2test's callout function to be generated. For the first callout in + a match attempt at a new starting position in the subject, "New match + attempt" is output. If there has been a backtrack since the last call- + out (or start of matching if this is the first callout), "Backtrack" is + output, followed by "No other matching paths" if the backtrack ended + the previous match attempt. For example: + + re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess + data> aac\=callout_extra + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^ ^ ) + +4 ^ ^ b + Backtrack + --->aac + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + No match + + Notice that various optimizations must be turned off if you want all + possible matching paths to be scanned. If no_start_optimize is not + used, there is an immediate "no match", without any callouts, because + the starting optimization fails to find "b" in the subject, which it + knows must be present for any match. If no_auto_possess is not used, + the "a+" item is turned into "a++", which reduces the number of back- + tracks. + + The callout_extra modifier has no effect if used with the DFA matching + function, or with JIT. + + Return values from callouts + + The default return from the callout function is zero, which allows + matching to continue. The callout_fail modifier can be given one or two + numbers. If there is only one number, 1 is returned instead of 0 (caus- + ing matching to backtrack) when a callout of that number is reached. If + two numbers (:) are given, 1 is returned when callout is + reached and there have been at least callouts. The callout_error + modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus- + ing the entire matching process to be aborted. If both these modifiers + are set for the same callout number, callout_error takes precedence. + Note that callouts with string arguments are always given the number + zero. + + The callout_data modifier can be given an unsigned or a negative num- + ber. This is set as the "user data" that is passed to the matching + function, and passed back when the callout function is invoked. Any + value other than zero is used as a return from pcre2test's callout + function. + + Inserting callouts can be helpful when using pcre2test to check compli- + cated regular expressions. For further information about callouts, see + the pcre2callout documentation. + + +NON-PRINTING CHARACTERS + + When pcre2test is outputting text in the compiled version of a pattern, + bytes other than 32-126 are always treated as non-printing characters + and are therefore shown as hex escapes. + + When pcre2test is outputting text that is a matched part of a subject + string, it behaves in the same way, unless a different locale has been + set for the pattern (using the locale modifier). In this case, the is- + print() function is used to distinguish printing and non-printing char- + acters. + + +SAVING AND RESTORING COMPILED PATTERNS + + It is possible to save compiled patterns on disc or elsewhere, and re- + load them later, subject to a number of restrictions. JIT data cannot + be saved. The host on which the patterns are reloaded must be running + the same version of PCRE2, with the same code unit width, and must also + have the same endianness, pointer width and PCRE2_SIZE type. Before + compiled patterns can be saved they must be serialized, that is, con- + verted to a stream of bytes. A single byte stream may contain any num- + ber of compiled patterns, but they must all use the same character ta- + bles. A single copy of the tables is included in the byte stream (its + size is 1088 bytes). + + The functions whose names begin with pcre2_serialize_ are used for se- + rializing and de-serializing. They are described in the pcre2serialize + documentation. In this section we describe the features of pcre2test + that can be used to test these functions. + + Note that "serialization" in PCRE2 does not convert compiled patterns + to an abstract format like Java or .NET. It just makes a reloadable + byte code stream. Hence the restrictions on reloading mentioned above. + + In pcre2test, when a pattern with push modifier is successfully com- + piled, it is pushed onto a stack of compiled patterns, and pcre2test + expects the next line to contain a new pattern (or command) instead of + a subject line. By contrast, the pushcopy modifier causes a copy of the + compiled pattern to be stacked, leaving the original available for im- + mediate matching. By using push and/or pushcopy, a number of patterns + can be compiled and retained. These modifiers are incompatible with + posix, and control modifiers that act at match time are ignored (with a + message) for the stacked patterns. The jitverify modifier applies only + at compile time. + + The command + + #save + + causes all the stacked patterns to be serialized and the result written + to the named file. Afterwards, all the stacked patterns are freed. The + command + + #load + + reads the data in the file, and then arranges for it to be de-serial- + ized, with the resulting compiled patterns added to the pattern stack. + The pattern on the top of the stack can be retrieved by the #pop com- + mand, which must be followed by lines of subjects that are to be + matched with the pattern, terminated as usual by an empty line or end + of file. This command may be followed by a modifier list containing + only control modifiers that act after a pattern has been compiled. In + particular, hex, posix, posix_nosub, push, and pushcopy are not al- + lowed, nor are any option-setting modifiers. The JIT modifiers are, + however permitted. Here is an example that saves and reloads two pat- + terns. + + /abc/push + /xyz/push + #save tempfile + #load tempfile + #pop info + xyz + + #pop jit,bincode + abc + + If jitverify is used with #pop, it does not automatically imply jit, + which is different behaviour from when it is used on a pattern. + + The #popcopy command is analogous to the pushcopy modifier in that it + makes current a copy of the topmost stack pattern, leaving the original + still on the stack. + + +SEE ALSO + + pcre2(3), pcre2api(3), pcre2callout(3), pcre2jit, pcre2matching(3), + pcre2partial(d), pcre2pattern(3), pcre2serialize(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 24 April 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE 10.44 24 April 2024 PCRE2TEST(1) diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 new file mode 100644 index 0000000..eb613f4 --- /dev/null +++ b/doc/pcre2unicode.3 @@ -0,0 +1,485 @@ +.TH PCRE2UNICODE 3 "04 February 2023" "PCRE2 10.43" +.SH NAME +PCRE - Perl-compatible regular expressions (revised API) +.SH "UNICODE AND UTF SUPPORT" +.rs +.sp +PCRE2 is normally built with Unicode support, though if you do not need it, you +can build it without, in which case the library will be smaller. With Unicode +support, PCRE2 has knowledge of Unicode character properties and can process +strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit +width), but this is not the default. Unless specifically requested, PCRE2 +treats each code unit in a string as one character. +.P +There are two ways of telling PCRE2 to switch to UTF mode, where characters may +consist of more than one code unit and the range of values is constrained. The +program can call +.\" HREF +\fBpcre2_compile()\fP +.\" +with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF). +However, the latter facility can be locked out by the PCRE2_NEVER_UTF option. +That is, the programmer can prevent the supplier of the pattern from switching +to UTF mode. +.P +Note that the PCRE2_MATCH_INVALID_UTF option (see +.\" HTML +.\" +below) +.\" +forces PCRE2_UTF to be set. +.P +In UTF mode, both the pattern and any subject strings that are matched against +it are treated as UTF strings instead of strings of individual one-code-unit +characters. There are also some other changes to the way characters are +handled, as documented below. +. +. +.SH "UNICODE PROPERTY SUPPORT" +.rs +.sp +When PCRE2 is built with Unicode support, the escape sequences \ep{..}, +\eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting. +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the derived properties +Any and LC (synonym L&), the Unicode script names such as Arabic or Han, +Bidi_Class, Bidi_Control, and a few binary properties. +.P +The full lists are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +and +.\" HREF +\fBpcre2syntax\fP +.\" +documentation. In general, only the short names for properties are supported. +For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. +. +. +.SH "WIDE CHARACTERS AND UTF MODES" +.rs +.sp +Code points less than 256 can be specified in patterns by either braced or +unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger +values have to use braced sequences. Unbraced octal code points up to \e777 are +also recognized; larger ones can be coded using \eo{...}. +.P +The escape sequence \eN{U+} is recognized as another way of +specifying a Unicode character by code point in a UTF mode. It is not allowed +in non-UTF mode. +.P +In UTF mode, repeat quantifiers apply to complete UTF characters, not to +individual code units. +.P +In UTF mode, the dot metacharacter matches one UTF character instead of a +single code unit. +.P +In UTF mode, capture group names are not restricted to ASCII, and may contain +any Unicode letters and decimal digits, as well as underscore. +.P +The escape sequence \eC can be used to match a single code unit in UTF mode, +but its use can lead to some strange effects because it breaks up multi-unit +characters (see the description of \eC in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation). For this reason, there is a build-time option that disables +support for \eC completely. There is also a less draconian compile-time option +for locking out the use of \eC when a pattern is compiled. +.P +The use of \eC is not supported by the alternative matching function +\fBpcre2_dfa_match()\fP when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \eC in these modes provokes +a match-time error. Also, the JIT optimization does not support \eC in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \eC, it will not succeed, and so when \fBpcre2_match()\fP is called, +the matching will be carried out by the interpretive function. +.P +The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test +characters of any code value, but, by default, the characters that PCRE2 +recognizes as digits, spaces, or word characters remain the same set as in +non-UTF mode, all with code points less than 256. This remains true even when +PCRE2 is built to include Unicode support, because to do otherwise would slow +down matching in many common cases. Note that this also applies to \eb +and \eB, because they are defined in terms of \ew and \eW. If you want +to test for a wider sense of, say, "digit", you can use explicit Unicode +property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option, +the way that the character escapes work is changed so that Unicode properties +are used to determine which characters match, though there are some options +that suppress this for individual escapes. For details see the section on +.\" HTML +.\" +generic character types +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +Like the escapes, characters that match the POSIX named character classes are +all low-valued characters unless the PCRE2_UCP option is set, but there is an +option to override this. +.P +In contrast to the character escapes and character classes, the special +horizontal and vertical white space escapes (\eh, \eH, \ev, and \eV) do match +all the appropriate Unicode characters, whether or not PCRE2_UCP is set. +. +. +.SH "UNICODE CASE-EQUIVALENCE" +.rs +.sp +If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use +of Unicode properties except for characters whose code points are less than 128 +and that have at most two case-equivalent values. For these, a direct table +lookup is used for speed. A few Unicode characters such as Greek sigma have +more than two code points that are case-equivalent, and these are treated +specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case +processing for non-UTF character encodings such as UCS-2. +.P +There are two ASCII characters (S and K) that, in addition to their ASCII lower +case equivalents, have a non-ASCII one as well (long S and Kelvin sign). +Recognition of these non-ASCII characters as case-equivalent to their ASCII +counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT +option. When this is set, all characters in a case equivalence must either be +ASCII or non-ASCII; there can be no mixing. +. +. +.\" HTML +.SH "SCRIPT RUNS" +.rs +.sp +The pattern constructs (*script_run:...) and (*atomic_script_run:...), with +synonyms (*sr:...) and (*asr:...), verify that the string matched within the +parentheses is a script run. In concept, a script run is a sequence of +characters that are all from the same Unicode script. However, because some +scripts are commonly used together, and because some diacritical and other +marks are used with multiple scripts, it is not that simple. +.P +Every Unicode character has a Script property, mostly with a value +corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There +are also three special values: +.P +"Unknown" is used for code points that have not been assigned, and also for the +surrogate code points. In the PCRE2 32-bit library, characters whose code +points are greater than the Unicode maximum (U+10FFFF), which are accessible +only in non-UTF mode, are assigned the Unknown script. +.P +"Common" is used for characters that are used with many scripts. These include +punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII +digits 0 to 9. +.P +"Inherited" is used for characters such as diacritical marks that modify a +previous character. These are considered to take on the script of the character +that they modify. +.P +Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of +characters, the list contains just one script, the same one as the Script +property. However, for characters such as U+102E0 more than one Script is +listed. There are also some Common characters that have a single, non-Common +script in their Script Extension list. +.P +The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal +digits. These are covered in subsequent sections. +. +. +.SS "Basic script run rules" +.rs +.sp +A string that is less than two characters long is a script run. This is the +only case in which an Unknown character can be part of a script run. Longer +strings are checked using only the Script Extensions property, not the basic +Script property. +.P +If a character's Script Extension property is the single value "Inherited", it +is always accepted as part of a script run. This is also true for the property +"Common", subject to the checking of decimal digits described below. All the +remaining characters in a script run must have at least one script in common in +their Script Extension lists. In set-theoretic terminology, the intersection of +all the sets of scripts must not be empty. +.P +A simple example is an Internet name such as "google.com". The letters are all +in the Latin script, and the dot is Common, so this string is a script run. +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +string that looks the same, but with Cyrillic "o"s is not a script run. +.P +More interesting examples involve characters with more than one script in their +Script Extension. Consider the following characters: +.sp + U+060C Arabic comma + U+06D4 Arabic full stop +.sp +The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could +appear in script runs of either Arabic or Hanifi Rohingya. The first could also +appear in Syriac or Thaana script runs, but the second could not. +. +. +.SS "The Chinese Han script" +.rs +.sp +The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts +together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo +and Han. These three combinations are treated as special cases when checking +script runs and are, in effect, "virtual scripts". Thus, a script run may +contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and +Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of +Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical +Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/) +in allowing such mixtures. +. +. +.SS "Decimal digits" +.rs +.sp +Unicode contains many sets of 10 decimal digits in different scripts, and some +scripts (including the Common script) contain more than one set. Some of these +decimal digits them are visually indistinguishable from the common ASCII +digits. In addition to the script checking described above, if a script run +contains any decimal digits, they must all come from the same set of 10 +adjacent characters. +. +. +.SH "VALIDITY OF UTF STRINGS" +.rs +.sp +When the PCRE2_UTF option is set, the strings passed as patterns and subjects +are (by default) checked for validity on entry to the relevant functions. If an +invalid UTF string is passed, a negative error code is returned. The code unit +offset to the offending character can be extracted from the match data block by +calling \fBpcre2_get_startchar()\fP, which is used for this purpose after a UTF +error. +.P +In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance, for +example in the case of a long subject string that is being scanned repeatedly. +If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time, +PCRE2 assumes that the pattern or subject it is given (respectively) contains +only valid UTF code unit sequences. +.P +If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result +is undefined and your program may crash or loop indefinitely or give incorrect +results. There is, however, one mode of matching that can handle invalid UTF +subject strings. This is enabled by passing PCRE2_MATCH_INVALID_UTF to +\fBpcre2_compile()\fP and is discussed below in the next section. The rest of +this section covers the case when PCRE2_MATCH_INVALID_UTF is not set. +.P +Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the UTF check +for the pattern; it does not also apply to subject strings. If you want to +disable the check for a subject string you must pass this same option to +\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. +.P +UTF-16 and UTF-32 strings can indicate their endianness by special code knows +as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting +strings to be in host byte order. +.P +Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other +processing takes place. In the case of \fBpcre2_match()\fP and +\fBpcre2_dfa_match()\fP calls with a non-zero starting offset, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \eb and \eB are +one-character lookbehinds. +.P +In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be. +.P +Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, +where they are used in pairs to encode code points with values greater than +0xFFFF. The code points that are encoded by UTF-16 pairs are available +independently in the UTF-8 and UTF-32 encodings. (In other words, the whole +surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and +UTF-32.) +.P +Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is +given if an escape sequence for an invalid Unicode code point is encountered in +the pattern. If you want to allow escape sequences such as \ex{d800} (a +surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option. However, this is possible only in UTF-8 and UTF-32 modes, because these +values are not representable in UTF-16. +. +. +.\" HTML +.SS "Errors in UTF-8 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-8 strings: +.sp + PCRE2_ERROR_UTF8_ERR1 + PCRE2_ERROR_UTF8_ERR2 + PCRE2_ERROR_UTF8_ERR3 + PCRE2_ERROR_UTF8_ERR4 + PCRE2_ERROR_UTF8_ERR5 +.sp +The string ends with a truncated UTF-8 character; the code specifies how many +bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be +no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279) +allows for up to 6 bytes, and this is checked first; hence the possibility of +4 or 5 missing bytes. +.sp + PCRE2_ERROR_UTF8_ERR6 + PCRE2_ERROR_UTF8_ERR7 + PCRE2_ERROR_UTF8_ERR8 + PCRE2_ERROR_UTF8_ERR9 + PCRE2_ERROR_UTF8_ERR10 +.sp +The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the +character do not have the binary value 0b10 (that is, either the most +significant bit is 0, or the next bit is 1). +.sp + PCRE2_ERROR_UTF8_ERR11 + PCRE2_ERROR_UTF8_ERR12 +.sp +A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; +these code points are excluded by RFC 3629. +.sp + PCRE2_ERROR_UTF8_ERR13 +.sp +A 4-byte character has a value greater than 0x10ffff; these code points are +excluded by RFC 3629. +.sp + PCRE2_ERROR_UTF8_ERR14 +.sp +A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of +code points are reserved by RFC 3629 for use with UTF-16, and so are excluded +from UTF-8. +.sp + PCRE2_ERROR_UTF8_ERR15 + PCRE2_ERROR_UTF8_ERR16 + PCRE2_ERROR_UTF8_ERR17 + PCRE2_ERROR_UTF8_ERR18 + PCRE2_ERROR_UTF8_ERR19 +.sp +A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a +value that can be represented by fewer bytes, which is invalid. For example, +the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just +one byte. +.sp + PCRE2_ERROR_UTF8_ERR20 +.sp +The two most significant bits of the first byte of a character have the binary +value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a +byte can only validly occur as the second or subsequent byte of a multi-byte +character. +.sp + PCRE2_ERROR_UTF8_ERR21 +.sp +The first byte of a character has the value 0xfe or 0xff. These values can +never occur in a valid UTF-8 string. +. +. +.\" HTML +.SS "Errors in UTF-16 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-16 strings: +.sp + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate +.sp +. +. +.\" HTML +.SS "Errors in UTF-32 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-32 strings: +.sp + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff +.sp +. +. +.\" HTML +.SH "MATCHING IN INVALID UTF STRINGS" +.rs +.sp +You can run pattern matches on subject strings that may contain invalid UTF +sequences if you call \fBpcre2_compile()\fP with the PCRE2_MATCH_INVALID_UTF +option. This is supported by \fBpcre2_match()\fP, including JIT matching, but +not by \fBpcre2_dfa_match()\fP. When PCRE2_MATCH_INVALID_UTF is set, it forces +PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a +valid UTF string. +.P +If you do not set PCRE2_MATCH_INVALID_UTF when calling \fBpcre2_compile\fP, and +you are not certain that your subject strings are valid UTF sequences, you +should not make use of the JIT "fast path" function \fBpcre2_jit_match()\fP +because it bypasses sanity checks, including the one for UTF validity. An +invalid string may cause undefined behaviour, including looping, crashing, or +giving the wrong answer. +.P +Setting PCRE2_MATCH_INVALID_UTF does not affect what \fBpcre2_compile()\fP +generates, but if \fBpcre2_jit_compile()\fP is subsequently called, it does +generate different code. If JIT is not used, the option affects the behaviour +of the interpretive code in \fBpcre2_match()\fP. When PCRE2_MATCH_INVALID_UTF +is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time. +.P +In this mode, an invalid code unit sequence in the subject never matches any +pattern item. It does not match dot, it does not match \ep{Any}, it does not +even match negative items such as [^X]. A lookbehind assertion fails if it +encounters an invalid sequence while moving the current point backwards. In +other words, an invalid UTF code unit sequence acts as a barrier which no match +can cross. +.P +You can also think of this as the subject being split up into fragments of +valid UTF, delimited internally by invalid code unit sequences. The pattern is +matched fragment by fragment. The result of a successful match, however, is +given as code unit offsets in the entire subject string in the usual way. There +are a few points to consider: +.P +The internal boundaries are not interpreted as the beginnings or ends of lines +and so do not match circumflex or dollar characters in the pattern. +.P +If \fBpcre2_match()\fP is called with an offset that points to an invalid +UTF-sequence, that sequence is skipped, and the match starts at the next valid +UTF character, or the end of the subject. +.P +At internal fragment boundaries, \eb and \eB behave in the same way as at the +beginning and end of the subject. For example, a sequence such as \ebWORD\eb +would match an instance of WORD that is surrounded by invalid UTF code units. +.P +Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary +data, knowing that any matched strings that are returned are valid UTF. This +can be useful when searching for UTF text in executable or other binary files. +.P +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 12 October 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/install-sh b/install-sh new file mode 100755 index 0000000..ec298b5 --- /dev/null +++ b/install-sh @@ -0,0 +1,541 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2020-11-14.01; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +# Create dirs (including intermediate dirs) using mode 755. +# This is like GNU 'install' as of coreutils 8.32 (2020). +mkdir_umask=22 + +backupsuffix= +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -p pass -p to $cpprog. + -s $stripprog installed files. + -S SUFFIX attempt to back up existing files, with suffix SUFFIX. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG + +By default, rm is invoked with -f; when overridden with RMPROG, +it's up to you to specify -f if you want it. + +If -S is not specified, no backups are attempted. + +Email bug reports to bug-automake@gnu.org. +Automake home page: https://www.gnu.org/software/automake/ +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -p) cpprog="$cpprog -p";; + + -s) stripcmd=$stripprog;; + + -S) backupsuffix="$2" + shift;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + # Don't chown directories that already exist. + if test $dstdir_status = 0; then + chowncmd="" + fi + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + # The $RANDOM variable is not portable (e.g., dash). Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap ' + ret=$? + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null + exit $ret + ' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p'. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && + { test -z "$stripcmd" || { + # Create $dsttmp read-write so that cp doesn't create it read-only, + # which would cause strip to fail. + if test -z "$doit"; then + : >"$dsttmp" # No need to fork-exec 'touch'. + else + $doit touch "$dsttmp" + fi + } + } && + $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # If $backupsuffix is set, and the file being installed + # already exists, attempt a backup. Don't worry if it fails, + # e.g., if mv doesn't support -f. + if test -n "$backupsuffix" && test -f "$dst"; then + $doit $mvcmd -f "$dst" "$dst$backupsuffix" 2>/dev/null + fi + + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/libpcre2-16.pc.in b/libpcre2-16.pc.in new file mode 100644 index 0000000..bacb466 --- /dev/null +++ b/libpcre2-16.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-16 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/libpcre2-32.pc.in b/libpcre2-32.pc.in new file mode 100644 index 0000000..06241f0 --- /dev/null +++ b/libpcre2-32.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-32 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/libpcre2-8.pc.in b/libpcre2-8.pc.in new file mode 100644 index 0000000..246bb9e --- /dev/null +++ b/libpcre2-8.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-8 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/libpcre2-posix.pc.in b/libpcre2-posix.pc.in new file mode 100644 index 0000000..2f1e8f1 --- /dev/null +++ b/libpcre2-posix.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-posix +Description: Posix compatible interface to libpcre2-8 +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@ +Cflags: -I${includedir} @PCRE2POSIX_CFLAG@ +Requires.private: libpcre2-8 diff --git a/ltmain.sh b/ltmain.sh new file mode 100644 index 0000000..51e57e3 --- /dev/null +++ b/ltmain.sh @@ -0,0 +1,11338 @@ +#! /usr/bin/env sh +## DO NOT EDIT - This file generated from ./build-aux/ltmain.in +## by inline-source v2019-02-19.15 + +# libtool (GNU libtool) 2.5.0.1-38c1-dirty +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996-2019, 2021-2024 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +PROGRAM=libtool +PACKAGE=libtool +VERSION=2.5.0.1-38c1-dirty +package_revision=2.5.0.1 + + +## ------ ## +## Usage. ## +## ------ ## + +# Run './libtool --help' for help with using this script from the +# command line. + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# After configure completes, it has a better idea of some of the +# shell tools we need than the defaults used by the functions shared +# with bootstrap, so set those here where they can still be over- +# ridden by the user, but otherwise take precedence. + +: ${AUTOCONF="autoconf"} +: ${AUTOMAKE="automake"} + + +## -------------------------- ## +## Source external libraries. ## +## -------------------------- ## + +# Much of our low-level functionality needs to be sourced from external +# libraries, which are installed to $pkgauxdir. + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + +# General shell script boiler plate, and helper functions. +# Written by Gary V. Vaughan, 2004 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2004-2019, 2021, 2023 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + + +## ------ ## +## Usage. ## +## ------ ## + +# Evaluate this file near the top of your script to gain access to +# the functions and variables defined here: +# +# . `echo "$0" | ${SED-sed} 's|[^/]*$||'`/build-aux/funclib.sh +# +# If you need to override any of the default environment variable +# settings, do that before evaluating this file. + + +## -------------------- ## +## Shell normalisation. ## +## -------------------- ## + +# Some shells need a little help to be as Bourne compatible as possible. +# Before doing anything else, make sure all that help has been provided! + +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac +fi + +# NLS nuisances: We save the old values in case they are required later. +_G_user_locale= +_G_safe_locale= +for _G_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test set = \"\${$_G_var+set}\"; then + save_$_G_var=\$$_G_var + $_G_var=C + export $_G_var + _G_user_locale=\"$_G_var=\\\$save_\$_G_var; \$_G_user_locale\" + _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" + fi" +done +# These NLS vars are set unconditionally (bootstrap issue #24). Unset those +# in case the environment reset is needed later and the $save_* variant is not +# defined (see the code above). +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +# Make sure IFS has a sensible default +sp=' ' +nl=' +' +IFS="$sp $nl" + +# There are apparently some systems that use ';' as a PATH separator! +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# func_unset VAR +# -------------- +# Portably unset VAR. +# In some shells, an 'unset VAR' statement leaves a non-zero return +# status if VAR is already unset, which might be problematic if the +# statement is used at the end of a function (thus poisoning its return +# value) or when 'set -e' is active (causing even a spurious abort of +# the script in this case). +func_unset () +{ + { eval $1=; (eval unset $1) >/dev/null 2>&1 && eval unset $1 || : ; } +} + + +# Make sure CDPATH doesn't cause `cd` commands to output the target dir. +func_unset CDPATH + +# Make sure ${,E,F}GREP behave sanely. +func_unset GREP_OPTIONS + + +## ------------------------- ## +## Locate command utilities. ## +## ------------------------- ## + + +# func_executable_p FILE +# ---------------------- +# Check that FILE is an executable regular file. +func_executable_p () +{ + test -f "$1" && test -x "$1" +} + + +# func_path_progs PROGS_LIST CHECK_FUNC [PATH] +# -------------------------------------------- +# Search for either a program that responds to --version with output +# containing "GNU", or else returned by CHECK_FUNC otherwise, by +# trying all the directories in PATH with each of the elements of +# PROGS_LIST. +# +# CHECK_FUNC should accept the path to a candidate program, and +# set $func_check_prog_result if it truncates its output less than +# $_G_path_prog_max characters. +func_path_progs () +{ + _G_progs_list=$1 + _G_check_func=$2 + _G_PATH=${3-"$PATH"} + + _G_path_prog_max=0 + _G_path_prog_found=false + _G_save_IFS=$IFS; IFS=${PATH_SEPARATOR-:} + for _G_dir in $_G_PATH; do + IFS=$_G_save_IFS + test -z "$_G_dir" && _G_dir=. + for _G_prog_name in $_G_progs_list; do + for _exeext in '' .EXE; do + _G_path_prog=$_G_dir/$_G_prog_name$_exeext + func_executable_p "$_G_path_prog" || continue + case `"$_G_path_prog" --version 2>&1` in + *GNU*) func_path_progs_result=$_G_path_prog _G_path_prog_found=: ;; + *) $_G_check_func $_G_path_prog + func_path_progs_result=$func_check_prog_result + ;; + esac + $_G_path_prog_found && break 3 + done + done + done + IFS=$_G_save_IFS + test -z "$func_path_progs_result" && { + echo "no acceptable sed could be found in \$PATH" >&2 + exit 1 + } +} + + +# We want to be able to use the functions in this file before configure +# has figured out where the best binaries are kept, which means we have +# to search for them ourselves - except when the results are already set +# where we skip the searches. + +# Unless the user overrides by setting SED, search the path for either GNU +# sed, or the sed that truncates its output the least. +test -z "$SED" && { + _G_sed_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for _G_i in 1 2 3 4 5 6 7; do + _G_sed_script=$_G_sed_script$nl$_G_sed_script + done + echo "$_G_sed_script" 2>/dev/null | sed 99q >conftest.sed + _G_sed_script= + + func_check_prog_sed () + { + _G_path_prog=$1 + + _G_count=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo '' >> conftest.nl + "$_G_path_prog" -f conftest.sed conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "sed gsed" func_check_prog_sed "$PATH:/usr/xpg4/bin" + rm -f conftest.sed + SED=$func_path_progs_result +} + + +# Unless the user overrides by setting GREP, search the path for either GNU +# grep, or the grep that truncates its output the least. +test -z "$GREP" && { + func_check_prog_grep () + { + _G_path_prog=$1 + + _G_count=0 + _G_path_prog_max=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo 'GREP' >> conftest.nl + "$_G_path_prog" -e 'GREP$' -e '-(cannot match)-' conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "grep ggrep" func_check_prog_grep "$PATH:/usr/xpg4/bin" + GREP=$func_path_progs_result +} + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# All uppercase variable names are used for environment variables. These +# variables can be overridden by the user before calling a script that +# uses them if a suitable command of that name is not already available +# in the command search PATH. + +: ${CP="cp -f"} +: ${ECHO="printf %s\n"} +: ${EGREP="$GREP -E"} +: ${FGREP="$GREP -F"} +: ${LN_S="ln -s"} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} + + +## -------------------- ## +## Useful sed snippets. ## +## -------------------- ## + +sed_dirname='s|/[^/]*$||' +sed_basename='s|^.*/||' + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s|\([`"$\\]\)|\\\1|g' + +# Same as above, but do not quote variable references. +sed_double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s|[].[^$\\*\/]|\\&|g' + +# Sed substitution that converts a w32 file name or path +# that contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-'\' parameter expansions in output of sed_double_quote_subst that +# were '\'-ed in input to the same. If an odd number of '\' preceded a +# '$' in input to sed_double_quote_subst, that '$' was protected from +# expansion. Since each input '\' is now two '\'s, look for any number +# of runs of four '\'s followed by two '\'s and then a '$'. '\' that '$'. +_G_bs='\\' +_G_bs2='\\\\' +_G_bs4='\\\\\\\\' +_G_dollar='\$' +sed_double_backslash="\ + s/$_G_bs4/&\\ +/g + s/^$_G_bs2$_G_dollar/$_G_bs&/ + s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g + s/\n//g" + +# require_check_ifs_backslash +# --------------------------- +# Check if we can use backslash as IFS='\' separator, and set +# $check_ifs_backshlash_broken to ':' or 'false'. +require_check_ifs_backslash=func_require_check_ifs_backslash +func_require_check_ifs_backslash () +{ + _G_save_IFS=$IFS + IFS='\' + _G_check_ifs_backshlash='a\\b' + for _G_i in $_G_check_ifs_backshlash + do + case $_G_i in + a) + check_ifs_backshlash_broken=false + ;; + '') + break + ;; + *) + check_ifs_backshlash_broken=: + break + ;; + esac + done + IFS=$_G_save_IFS + require_check_ifs_backslash=: +} + + +## ----------------- ## +## Global variables. ## +## ----------------- ## + +# Except for the global variables explicitly listed below, the following +# functions in the '^func_' namespace, and the '^require_' namespace +# variables initialised in the 'Resource management' section, sourcing +# this file will not pollute your global namespace with anything +# else. There's no portable way to scope variables in Bourne shell +# though, so actually running these functions will sometimes place +# results into a variable named after the function, and often use +# temporary variables in the '^_G_' namespace. If you are careful to +# avoid using those namespaces casually in your sourcing script, things +# should continue to work as you expect. And, of course, you can freely +# overwrite any of the functions or variables defined here before +# calling anything to customize them. + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +# Allow overriding, eg assuming that you follow the convention of +# putting '$debug_cmd' at the start of all your functions, you can get +# bash to show function call trace with: +# +# debug_cmd='eval echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +debug_cmd=${debug_cmd-":"} +exit_cmd=: + +# By convention, finish your script with: +# +# exit $exit_status +# +# so that you can set exit_status to non-zero if you want to indicate +# something went wrong during execution without actually bailing out at +# the point of failure. +exit_status=$EXIT_SUCCESS + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath=$0 + +# The name of this program. +progname=`$ECHO "$progpath" |$SED "$sed_basename"` + +# Make sure we have an absolute progpath for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=`$ECHO "$progpath" |$SED "$sed_dirname"` + progdir=`cd "$progdir" && pwd` + progpath=$progdir/$progname + ;; + *) + _G_IFS=$IFS + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS=$_G_IFS + test -x "$progdir/$progname" && break + done + IFS=$_G_IFS + test -n "$progdir" || progdir=`pwd` + progpath=$progdir/$progname + ;; +esac + + +## ----------------- ## +## Standard options. ## +## ----------------- ## + +# The following options affect the operation of the functions defined +# below, and should be set appropriately depending on run-time para- +# meters passed on the command line. + +opt_dry_run=false +opt_quiet=false +opt_verbose=false + +# Categories 'all' and 'none' are always available. Append any others +# you will pass as the first argument to func_warning from your own +# code. +warning_categories= + +# By default, display warnings according to 'opt_warning_types'. Set +# 'warning_func' to ':' to elide all warnings, or func_fatal_error to +# treat the next displayed warning as a fatal error. +warning_func=func_warn_and_continue + +# Set to 'all' to display all warnings, 'none' to suppress all +# warnings, or a space delimited list of some subset of +# 'warning_categories' to display only the listed warnings. +opt_warning_types=all + + +## -------------------- ## +## Resource management. ## +## -------------------- ## + +# This section contains definitions for functions that each ensure a +# particular resource (a file, or a non-empty configuration variable for +# example) is available, and if appropriate to extract default values +# from pertinent package files. Call them using their associated +# 'require_*' variable to ensure that they are executed, at most, once. +# +# It's entirely deliberate that calling these functions can set +# variables that don't obey the namespace limitations obeyed by the rest +# of this file, in order that that they be as useful as possible to +# callers. + + +# require_term_colors +# ------------------- +# Allow display of bold text on terminals that support it. +require_term_colors=func_require_term_colors +func_require_term_colors () +{ + $debug_cmd + + test -t 1 && { + # COLORTERM and USE_ANSI_COLORS environment variables take + # precedence, because most terminfo databases neglect to describe + # whether color sequences are supported. + test -n "${COLORTERM+set}" && : ${USE_ANSI_COLORS="1"} + + if test 1 = "$USE_ANSI_COLORS"; then + # Standard ANSI escape sequences + tc_reset='' + tc_bold=''; tc_standout='' + tc_red=''; tc_green='' + tc_blue=''; tc_cyan='' + else + # Otherwise trust the terminfo database after all. + test -n "`tput sgr0 2>/dev/null`" && { + tc_reset=`tput sgr0` + test -n "`tput bold 2>/dev/null`" && tc_bold=`tput bold` + tc_standout=$tc_bold + test -n "`tput smso 2>/dev/null`" && tc_standout=`tput smso` + test -n "`tput setaf 1 2>/dev/null`" && tc_red=`tput setaf 1` + test -n "`tput setaf 2 2>/dev/null`" && tc_green=`tput setaf 2` + test -n "`tput setaf 4 2>/dev/null`" && tc_blue=`tput setaf 4` + test -n "`tput setaf 5 2>/dev/null`" && tc_cyan=`tput setaf 5` + } + fi + } + + require_term_colors=: +} + + +## ----------------- ## +## Function library. ## +## ----------------- ## + +# This section contains a variety of useful functions to call in your +# scripts. Take note of the portable wrappers for features provided by +# some modern shells, which will fall back to slower equivalents on +# less featureful shells. + + +# func_append VAR VALUE +# --------------------- +# Append VALUE onto the existing contents of VAR. + + # We should try to minimise forks, especially on Windows where they are + # unreasonably slow, so skip the feature probes when bash or zsh are + # being used: + if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then + : ${_G_HAVE_ARITH_OP="yes"} + : ${_G_HAVE_XSI_OPS="yes"} + # The += operator was introduced in bash 3.1 + case $BASH_VERSION in + [12].* | 3.0 | 3.0*) ;; + *) + : ${_G_HAVE_PLUSEQ_OP="yes"} + ;; + esac + fi + + # _G_HAVE_PLUSEQ_OP + # Can be empty, in which case the shell is probed, "yes" if += is + # useable or anything else if it does not work. + test -z "$_G_HAVE_PLUSEQ_OP" \ + && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ + && _G_HAVE_PLUSEQ_OP=yes + +if test yes = "$_G_HAVE_PLUSEQ_OP" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_append () + { + $debug_cmd + + eval "$1+=\$2" + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_append () + { + $debug_cmd + + eval "$1=\$$1\$2" + } +fi + + +# func_append_quoted VAR VALUE +# ---------------------------- +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +if test yes = "$_G_HAVE_PLUSEQ_OP"; then + eval 'func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1+=\\ \$func_quote_arg_result" + }' +else + func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1=\$$1\\ \$func_quote_arg_result" + } +fi + + +# func_append_uniq VAR VALUE +# -------------------------- +# Append unique VALUE onto the existing contents of VAR, assuming +# entries are delimited by the first character of VALUE. For example: +# +# func_append_uniq options " --another-option option-argument" +# +# will only append to $options if " --another-option option-argument " +# is not already present somewhere in $options already (note spaces at +# each end implied by leading space in second argument). +func_append_uniq () +{ + $debug_cmd + + eval _G_current_value='`$ECHO $'$1'`' + _G_delim=`expr "$2" : '\(.\)'` + + case $_G_delim$_G_current_value$_G_delim in + *"$2$_G_delim"*) ;; + *) func_append "$@" ;; + esac +} + + +# func_arith TERM... +# ------------------ +# Set func_arith_result to the result of evaluating TERMs. + test -z "$_G_HAVE_ARITH_OP" \ + && (eval 'test 2 = $(( 1 + 1 ))') 2>/dev/null \ + && _G_HAVE_ARITH_OP=yes + +if test yes = "$_G_HAVE_ARITH_OP"; then + eval 'func_arith () + { + $debug_cmd + + func_arith_result=$(( $* )) + }' +else + func_arith () + { + $debug_cmd + + func_arith_result=`expr "$@"` + } +fi + + +# func_basename FILE +# ------------------ +# Set func_basename_result to FILE with everything up to and including +# the last / stripped. +if test yes = "$_G_HAVE_XSI_OPS"; then + # If this shell supports suffix pattern removal, then use it to avoid + # forking. Hide the definitions single quotes in case the shell chokes + # on unsupported syntax... + _b='func_basename_result=${1##*/}' + _d='case $1 in + */*) func_dirname_result=${1%/*}$2 ;; + * ) func_dirname_result=$3 ;; + esac' + +else + # ...otherwise fall back to using sed. + _b='func_basename_result=`$ECHO "$1" |$SED "$sed_basename"`' + _d='func_dirname_result=`$ECHO "$1" |$SED "$sed_dirname"` + if test "X$func_dirname_result" = "X$1"; then + func_dirname_result=$3 + else + func_append func_dirname_result "$2" + fi' +fi + +eval 'func_basename () +{ + $debug_cmd + + '"$_b"' +}' + + +# func_dirname FILE APPEND NONDIR_REPLACEMENT +# ------------------------------------------- +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +eval 'func_dirname () +{ + $debug_cmd + + '"$_d"' +}' + + +# func_dirname_and_basename FILE APPEND NONDIR_REPLACEMENT +# -------------------------------------------------------- +# Perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# For efficiency, we do not delegate to the functions above but instead +# duplicate the functionality here. +eval 'func_dirname_and_basename () +{ + $debug_cmd + + '"$_b"' + '"$_d"' +}' + + +# func_echo ARG... +# ---------------- +# Echo program name prefixed message. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_echo_all ARG... +# -------------------- +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + + +# func_echo_infix_1 INFIX ARG... +# ------------------------------ +# Echo program name, followed by INFIX on the first line, with any +# additional lines not showing INFIX. +func_echo_infix_1 () +{ + $debug_cmd + + $require_term_colors + + _G_infix=$1; shift + _G_indent=$_G_infix + _G_prefix="$progname: $_G_infix: " + _G_message=$* + + # Strip color escape sequences before counting printable length + for _G_tc in "$tc_reset" "$tc_bold" "$tc_standout" "$tc_red" "$tc_green" "$tc_blue" "$tc_cyan" + do + test -n "$_G_tc" && { + _G_esc_tc=`$ECHO "$_G_tc" | $SED "$sed_make_literal_regex"` + _G_indent=`$ECHO "$_G_indent" | $SED "s|$_G_esc_tc||g"` + } + done + _G_indent="$progname: "`echo "$_G_indent" | $SED 's|.| |g'`" " ## exclude from sc_prohibit_nested_quotes + + func_echo_infix_1_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_infix_1_IFS + $ECHO "$_G_prefix$tc_bold$_G_line$tc_reset" >&2 + _G_prefix=$_G_indent + done + IFS=$func_echo_infix_1_IFS +} + + +# func_error ARG... +# ----------------- +# Echo program name prefixed message to standard error. +func_error () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 " $tc_standout${tc_red}error$tc_reset" "$*" >&2 +} + + +# func_fatal_error ARG... +# ----------------------- +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + $debug_cmd + + func_error "$*" + exit $EXIT_FAILURE +} + + +# func_grep EXPRESSION FILENAME +# ----------------------------- +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $debug_cmd + + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_len STRING +# --------------- +# Set func_len_result to the length of STRING. STRING may not +# start with a hyphen. + test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_len () + { + $debug_cmd + + func_len_result=${#1} + }' +else + func_len () + { + $debug_cmd + + func_len_result=`expr "$1" : ".*" 2>/dev/null || echo $max_cmd_len` + } +fi + + +# func_mkdir_p DIRECTORY-PATH +# --------------------------- +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + $debug_cmd + + _G_directory_path=$1 + _G_dir_list= + + if test -n "$_G_directory_path" && test : != "$opt_dry_run"; then + + # Protect directory names starting with '-' + case $_G_directory_path in + -*) _G_directory_path=./$_G_directory_path ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$_G_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + _G_dir_list=$_G_directory_path:$_G_dir_list + + # If the last portion added has no slash in it, the list is done + case $_G_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + _G_directory_path=`$ECHO "$_G_directory_path" | $SED -e "$sed_dirname"` + done + _G_dir_list=`$ECHO "$_G_dir_list" | $SED 's|:*$||'` + + func_mkdir_p_IFS=$IFS; IFS=: + for _G_dir in $_G_dir_list; do + IFS=$func_mkdir_p_IFS + # mkdir can fail with a 'File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$_G_dir" 2>/dev/null || : + done + IFS=$func_mkdir_p_IFS + + # Bail out if we (or some other process) failed to create a directory. + test -d "$_G_directory_path" || \ + func_fatal_error "Failed to create '$1'" + fi +} + + +# func_mktempdir [BASENAME] +# ------------------------- +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, BASENAME is the basename for that directory. +func_mktempdir () +{ + $debug_cmd + + _G_template=${TMPDIR-/tmp}/${1-$progname} + + if test : = "$opt_dry_run"; then + # Return a directory name, but don't create it in dry-run mode + _G_tmpdir=$_G_template-$$ + else + + # If mktemp works, use that first and foremost + _G_tmpdir=`mktemp -d "$_G_template-XXXXXXXX" 2>/dev/null` + + if test ! -d "$_G_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + _G_tmpdir=$_G_template-${RANDOM-0}$$ + + func_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$_G_tmpdir" + umask $func_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$_G_tmpdir" || \ + func_fatal_error "cannot create temporary directory '$_G_tmpdir'" + fi + + $ECHO "$_G_tmpdir" +} + + +# func_normal_abspath PATH +# ------------------------ +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +func_normal_abspath () +{ + $debug_cmd + + # These SED scripts presuppose an absolute path with a trailing slash. + _G_pathcar='s|^/\([^/]*\).*$|\1|' + _G_pathcdr='s|^/[^/]*||' + _G_removedotparts=':dotsl + s|/\./|/|g + t dotsl + s|/\.$|/|' + _G_collapseslashes='s|/\{1,\}|/|g' + _G_finalslash='s|/*$|/|' + + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_removedotparts" -e "$_G_collapseslashes" -e "$_G_finalslash"` + while :; do + # Processed it all yet? + if test / = "$func_normal_abspath_tpath"; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result"; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_append func_normal_abspath_result "/$func_normal_abspath_tcomponent" + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + + +# func_notquiet ARG... +# -------------------- +# Echo program name prefixed message only when not in quiet mode. +func_notquiet () +{ + $debug_cmd + + $opt_quiet || func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + + +# func_relative_path SRCDIR DSTDIR +# -------------------------------- +# Set func_relative_path_result to the relative path from SRCDIR to DSTDIR. +func_relative_path () +{ + $debug_cmd + + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=$func_dirname_result + if test -z "$func_relative_path_tlibdir"; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test -n "$func_stripname_result"; then + func_append func_relative_path_result "/$func_stripname_result" + fi + + # Normalisation. If bindir is libdir, return '.' else relative path. + if test -n "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + fi + + test -n "$func_relative_path_result" || func_relative_path_result=. + + : +} + + +# func_quote_portable EVAL ARG +# ---------------------------- +# Internal function to portably implement func_quote_arg. Note that we still +# keep attention to performance here so we as much as possible try to avoid +# calling sed binary (so far O(N) complexity as long as func_append is O(1)). +func_quote_portable () +{ + $debug_cmd + + $require_check_ifs_backslash + + func_quote_portable_result=$2 + + # one-time-loop (easy break) + while true + do + if $1; then + func_quote_portable_result=`$ECHO "$2" | $SED \ + -e "$sed_double_quote_subst" -e "$sed_double_backslash"` + break + fi + + # Quote for eval. + case $func_quote_portable_result in + *[\\\`\"\$]*) + # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string + # contains the shell wildcard characters. + case $check_ifs_backshlash_broken$func_quote_portable_result in + :*|*[\[\*\?]*) + func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ + | $SED "$sed_quote_subst"` + break + ;; + esac + + func_quote_portable_old_IFS=$IFS + for _G_char in '\' '`' '"' '$' + do + # STATE($1) PREV($2) SEPARATOR($3) + set start "" "" + func_quote_portable_result=dummy"$_G_char$func_quote_portable_result$_G_char"dummy + IFS=$_G_char + for _G_part in $func_quote_portable_result + do + case $1 in + quote) + func_append func_quote_portable_result "$3$2" + set quote "$_G_part" "\\$_G_char" + ;; + start) + set first "" "" + func_quote_portable_result= + ;; + first) + set quote "$_G_part" "" + ;; + esac + done + done + IFS=$func_quote_portable_old_IFS + ;; + *) ;; + esac + break + done + + func_quote_portable_unquoted_result=$func_quote_portable_result + case $func_quote_portable_result in + # double-quote args containing shell metacharacters to delay + # word splitting, command substitution and variable expansion + # for a subsequent eval. + # many bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_portable_result=\"$func_quote_portable_result\" + ;; + esac +} + + +# func_quotefast_eval ARG +# ----------------------- +# Quote one ARG (internal). This is equivalent to 'func_quote_arg eval ARG', +# but optimized for speed. Result is stored in $func_quotefast_eval. +if test xyes = `(x=; printf -v x %q yes; echo x"$x") 2>/dev/null`; then + printf -v _GL_test_printf_tilde %q '~' + if test '\~' = "$_GL_test_printf_tilde"; then + func_quotefast_eval () + { + printf -v func_quotefast_eval_result %q "$1" + } + else + # Broken older Bash implementations. Make those faster too if possible. + func_quotefast_eval () + { + case $1 in + '~'*) + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + ;; + *) + printf -v func_quotefast_eval_result %q "$1" + ;; + esac + } + fi +else + func_quotefast_eval () + { + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + } +fi + + +# func_quote_arg MODEs ARG +# ------------------------ +# Quote one ARG to be evaled later. MODEs argument may contain zero or more +# specifiers listed below separated by ',' character. This function returns two +# values: +# i) func_quote_arg_result +# double-quoted (when needed), suitable for a subsequent eval +# ii) func_quote_arg_unquoted_result +# has all characters that are still active within double +# quotes backslashified. Available only if 'unquoted' is specified. +# +# Available modes: +# ---------------- +# 'eval' (default) +# - escape shell special characters +# 'expand' +# - the same as 'eval'; but do not quote variable references +# 'pretty' +# - request aesthetic output, i.e. '"a b"' instead of 'a\ b'. This might +# be used later in func_quote to get output like: 'echo "a b"' instead +# of 'echo a\ b'. This is slower than default on some shells. +# 'unquoted' +# - produce also $func_quote_arg_unquoted_result which does not contain +# wrapping double-quotes. +# +# Examples for 'func_quote_arg pretty,unquoted string': +# +# string | *_result | *_unquoted_result +# ------------+-----------------------+------------------- +# " | \" | \" +# a b | "a b" | a b +# "a b" | "\"a b\"" | \"a b\" +# * | "*" | * +# z="${x-$y}" | "z=\"\${x-\$y}\"" | z=\"\${x-\$y}\" +# +# Examples for 'func_quote_arg pretty,unquoted,expand string': +# +# string | *_result | *_unquoted_result +# --------------+---------------------+-------------------- +# z="${x-$y}" | "z=\"${x-$y}\"" | z=\"${x-$y}\" +func_quote_arg () +{ + _G_quote_expand=false + case ,$1, in + *,expand,*) + _G_quote_expand=: + ;; + esac + + case ,$1, in + *,pretty,*|*,expand,*|*,unquoted,*) + func_quote_portable $_G_quote_expand "$2" + func_quote_arg_result=$func_quote_portable_result + func_quote_arg_unquoted_result=$func_quote_portable_unquoted_result + ;; + *) + # Faster quote-for-eval for some shells. + func_quotefast_eval "$2" + func_quote_arg_result=$func_quotefast_eval_result + ;; + esac +} + + +# func_quote MODEs ARGs... +# ------------------------ +# Quote all ARGs to be evaled later and join them into single command. See +# func_quote_arg's description for more info. +func_quote () +{ + $debug_cmd + _G_func_quote_mode=$1 ; shift + func_quote_result= + while test 0 -lt $#; do + func_quote_arg "$_G_func_quote_mode" "$1" + if test -n "$func_quote_result"; then + func_append func_quote_result " $func_quote_arg_result" + else + func_append func_quote_result "$func_quote_arg_result" + fi + shift + done +} + + +# func_stripname PREFIX SUFFIX NAME +# --------------------------------- +# strip PREFIX and SUFFIX from NAME, and store in func_stripname_result. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_stripname () + { + $debug_cmd + + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary variable first. + func_stripname_result=$3 + func_stripname_result=${func_stripname_result#"$1"} + func_stripname_result=${func_stripname_result%"$2"} + }' +else + func_stripname () + { + $debug_cmd + + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%$2\$%%"`;; + esac + } +fi + + +# func_show_eval CMD [FAIL_EXP] +# ----------------------------- +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + func_quote_arg pretty,expand "$_G_cmd" + eval "func_notquiet $func_quote_arg_result" + + $opt_dry_run || { + eval "$_G_cmd" + _G_status=$? + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_show_eval_locale CMD [FAIL_EXP] +# ------------------------------------ +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + $opt_quiet || { + func_quote_arg expand,pretty "$_G_cmd" + eval "func_echo $func_quote_arg_result" + } + + $opt_dry_run || { + eval "$_G_user_locale + $_G_cmd" + _G_status=$? + eval "$_G_safe_locale" + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_tr_sh +# ---------- +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + $debug_cmd + + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED -e 's/^\([0-9]\)/_\1/' -e 's/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_verbose ARG... +# ------------------- +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $debug_cmd + + $opt_verbose && func_echo "$*" + + : +} + + +# func_warn_and_continue ARG... +# ----------------------------- +# Echo program name prefixed warning message to standard error. +func_warn_and_continue () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 "${tc_red}warning$tc_reset" "$*" >&2 +} + + +# func_warning CATEGORY ARG... +# ---------------------------- +# Echo program name prefixed warning message to standard error. Warning +# messages can be filtered according to CATEGORY, where this function +# elides messages where CATEGORY is not listed in the global variable +# 'opt_warning_types'. +func_warning () +{ + $debug_cmd + + # CATEGORY must be in the warning_categories list! + case " $warning_categories " in + *" $1 "*) ;; + *) func_internal_error "invalid warning category '$1'" ;; + esac + + _G_category=$1 + shift + + case " $opt_warning_types " in + *" $_G_category "*) $warning_func ${1+"$@"} ;; + esac +} + + +# func_sort_ver VER1 VER2 +# ----------------------- +# 'sort -V' is not generally available. +# Note this deviates from the version comparison in automake +# in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a +# but this should suffice as we won't be specifying old +# version formats or redundant trailing .0 in bootstrap.conf. +# If we did want full compatibility then we should probably +# use m4_version_compare from autoconf. +func_sort_ver () +{ + $debug_cmd + + printf '%s\n%s\n' "$1" "$2" \ + | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -k 5,5n -k 6,6n -k 7,7n -k 8,8n -k 9,9n +} + +# func_lt_ver PREV CURR +# --------------------- +# Return true if PREV and CURR are in the correct order according to +# func_sort_ver, otherwise false. Use it like this: +# +# func_lt_ver "$prev_ver" "$proposed_ver" || func_fatal_error "..." +func_lt_ver () +{ + $debug_cmd + + test "x$1" = x`func_sort_ver "$1" "$2" | $SED 1q` +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: +#! /bin/sh + +# A portable, pluggable option parser for Bourne shell. +# Written by Gary V. Vaughan, 2010 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2010-2019, 2021, 2023 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + + +## ------ ## +## Usage. ## +## ------ ## + +# This file is a library for parsing options in your shell scripts along +# with assorted other useful supporting features that you can make use +# of too. +# +# For the simplest scripts you might need only: +# +# #!/bin/sh +# . relative/path/to/funclib.sh +# . relative/path/to/options-parser +# scriptversion=1.0 +# func_options ${1+"$@"} +# eval set dummy "$func_options_result"; shift +# ...rest of your script... +# +# In order for the '--version' option to work, you will need to have a +# suitably formatted comment like the one at the top of this file +# starting with '# Written by ' and ending with '# Copyright'. +# +# For '-h' and '--help' to work, you will also need a one line +# description of your script's purpose in a comment directly above the +# '# Written by ' line, like the one at the top of this file. +# +# The default options also support '--debug', which will turn on shell +# execution tracing (see the comment above debug_cmd below for another +# use), and '--verbose' and the func_verbose function to allow your script +# to display verbose messages only when your user has specified +# '--verbose'. +# +# After sourcing this file, you can plug in processing for additional +# options by amending the variables from the 'Configuration' section +# below, and following the instructions in the 'Option parsing' +# section further down. + +## -------------- ## +## Configuration. ## +## -------------- ## + +# You should override these variables in your script after sourcing this +# file so that they reflect the customisations you have added to the +# option parser. + +# The usage line for option parsing errors and the start of '-h' and +# '--help' output messages. You can embed shell variables for delayed +# expansion at the time the message is displayed, but you will need to +# quote other shell meta-characters carefully to prevent them being +# expanded when the contents are evaled. +usage='$progpath [OPTION]...' + +# Short help message in response to '-h' and '--help'. Add to this or +# override it after sourcing this library to reflect the full set of +# options your script accepts. +usage_message="\ + --debug enable verbose shell tracing + -W, --warnings=CATEGORY + report the warnings falling in CATEGORY [all] + -v, --verbose verbosely report processing + --version print version information and exit + -h, --help print short or long help message and exit +" + +# Additional text appended to 'usage_message' in response to '--help'. +long_help_message=" +Warning categories include: + 'all' show all warnings + 'none' turn off all the warnings + 'error' warnings are treated as fatal errors" + +# Help message printed before fatal option parsing errors. +fatal_help="Try '\$progname --help' for more information." + + + +## ------------------------- ## +## Hook function management. ## +## ------------------------- ## + +# This section contains functions for adding, removing, and running hooks +# in the main code. A hook is just a list of function names that can be +# run in order later on. + +# func_hookable FUNC_NAME +# ----------------------- +# Declare that FUNC_NAME will run hooks added with +# 'func_add_hook FUNC_NAME ...'. +func_hookable () +{ + $debug_cmd + + func_append hookable_fns " $1" +} + + +# func_add_hook FUNC_NAME HOOK_FUNC +# --------------------------------- +# Request that FUNC_NAME call HOOK_FUNC before it returns. FUNC_NAME must +# first have been declared "hookable" by a call to 'func_hookable'. +func_add_hook () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not accept hook functions." ;; + esac + + eval func_append ${1}_hooks '" $2"' +} + + +# func_remove_hook FUNC_NAME HOOK_FUNC +# ------------------------------------ +# Remove HOOK_FUNC from the list of hook functions to be called by +# FUNC_NAME. +func_remove_hook () +{ + $debug_cmd + + eval ${1}_hooks='`$ECHO "\$'$1'_hooks" |$SED "s| '$2'||"`' +} + + +# func_propagate_result FUNC_NAME_A FUNC_NAME_B +# --------------------------------------------- +# If the *_result variable of FUNC_NAME_A _is set_, assign its value to +# *_result variable of FUNC_NAME_B. +func_propagate_result () +{ + $debug_cmd + + func_propagate_result_result=: + if eval "test \"\${${1}_result+set}\" = set" + then + eval "${2}_result=\$${1}_result" + else + func_propagate_result_result=false + fi +} + + +# func_run_hooks FUNC_NAME [ARG]... +# --------------------------------- +# Run all hook functions registered to FUNC_NAME. +# It's assumed that the list of hook functions contains nothing more +# than a whitespace-delimited list of legal shell function names, and +# no effort is wasted trying to catch shell meta-characters or preserve +# whitespace. +func_run_hooks () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not support hook functions." ;; + esac + + eval _G_hook_fns=\$$1_hooks; shift + + for _G_hook in $_G_hook_fns; do + func_unset "${_G_hook}_result" + eval $_G_hook '${1+"$@"}' + func_propagate_result $_G_hook func_run_hooks + if $func_propagate_result_result; then + eval set dummy "$func_run_hooks_result"; shift + fi + done +} + + + +## --------------- ## +## Option parsing. ## +## --------------- ## + +# In order to add your own option parsing hooks, you must accept the +# full positional parameter list from your hook function. You may remove +# or edit any options that you action, and then pass back the remaining +# unprocessed options in '_result', escaped +# suitably for 'eval'. +# +# The '_result' variable is automatically unset +# before your hook gets called; for best performance, only set the +# *_result variable when necessary (i.e. don't call the 'func_quote' +# function unnecessarily because it can be an expensive operation on some +# machines). +# +# Like this: +# +# my_options_prep () +# { +# $debug_cmd +# +# # Extend the existing usage message. +# usage_message=$usage_message' +# -s, --silent don'\''t print informational messages +# ' +# # No change in '$@' (ignored completely by this hook). Leave +# # my_options_prep_result variable intact. +# } +# func_add_hook func_options_prep my_options_prep +# +# +# my_silent_option () +# { +# $debug_cmd +# +# args_changed=false +# +# # Note that, for efficiency, we parse as many options as we can +# # recognise in a loop before passing the remainder back to the +# # caller on the first unrecognised argument we encounter. +# while test $# -gt 0; do +# opt=$1; shift +# case $opt in +# --silent|-s) opt_silent=: +# args_changed=: +# ;; +# # Separate non-argument short options: +# -s*) func_split_short_opt "$_G_opt" +# set dummy "$func_split_short_opt_name" \ +# "-$func_split_short_opt_arg" ${1+"$@"} +# shift +# args_changed=: +# ;; +# *) # Make sure the first unrecognised option "$_G_opt" +# # is added back to "$@" in case we need it later, +# # if $args_changed was set to 'true'. +# set dummy "$_G_opt" ${1+"$@"}; shift; break ;; +# esac +# done +# +# # Only call 'func_quote' here if we processed at least one argument. +# if $args_changed; then +# func_quote eval ${1+"$@"} +# my_silent_option_result=$func_quote_result +# fi +# } +# func_add_hook func_parse_options my_silent_option +# +# +# my_option_validation () +# { +# $debug_cmd +# +# $opt_silent && $opt_verbose && func_fatal_help "\ +# '--silent' and '--verbose' options are mutually exclusive." +# } +# func_add_hook func_validate_options my_option_validation +# +# You'll also need to manually amend $usage_message to reflect the extra +# options you parse. It's preferable to append if you can, so that +# multiple option parsing hooks can be added safely. + + +# func_options_finish [ARG]... +# ---------------------------- +# Finishing the option parse loop (call 'func_options' hooks ATM). +func_options_finish () +{ + $debug_cmd + + func_run_hooks func_options ${1+"$@"} + func_propagate_result func_run_hooks func_options_finish +} + + +# func_options [ARG]... +# --------------------- +# All the functions called inside func_options are hookable. See the +# individual implementations for details. +func_hookable func_options +func_options () +{ + $debug_cmd + + _G_options_quoted=false + + for my_func in options_prep parse_options validate_options options_finish + do + func_unset func_${my_func}_result + func_unset func_run_hooks_result + eval func_$my_func '${1+"$@"}' + func_propagate_result func_$my_func func_options + if $func_propagate_result_result; then + eval set dummy "$func_options_result"; shift + _G_options_quoted=: + fi + done + + $_G_options_quoted || { + # As we (func_options) are top-level options-parser function and + # nobody quoted "$@" for us yet, we need to do it explicitly for + # caller. + func_quote eval ${1+"$@"} + func_options_result=$func_quote_result + } +} + + +# func_options_prep [ARG]... +# -------------------------- +# All initialisations required before starting the option parse loop. +# Note that when calling hook functions, we pass through the list of +# positional parameters. If a hook function modifies that list, and +# needs to propagate that back to rest of this script, then the complete +# modified list must be put in 'func_run_hooks_result' before returning. +func_hookable func_options_prep +func_options_prep () +{ + $debug_cmd + + # Option defaults: + opt_verbose=false + opt_warning_types= + + func_run_hooks func_options_prep ${1+"$@"} + func_propagate_result func_run_hooks func_options_prep +} + + +# func_parse_options [ARG]... +# --------------------------- +# The main option parsing loop. +func_hookable func_parse_options +func_parse_options () +{ + $debug_cmd + + _G_parse_options_requote=false + # this just eases exit handling + while test $# -gt 0; do + # Defer to hook functions for initial option parsing, so they + # get priority in the event of reusing an option name. + func_run_hooks func_parse_options ${1+"$@"} + func_propagate_result func_run_hooks func_parse_options + if $func_propagate_result_result; then + eval set dummy "$func_parse_options_result"; shift + # Even though we may have changed "$@", we passed the "$@" array + # down into the hook and it quoted it for us (because we are in + # this if-branch). No need to quote it again. + _G_parse_options_requote=false + fi + + # Break out of the loop if we already parsed every option. + test $# -gt 0 || break + + # We expect that one of the options parsed in this function matches + # and thus we remove _G_opt from "$@" and need to re-quote. + _G_match_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --debug|-x) debug_cmd='set -x' + func_echo "enabling shell trace mode" >&2 + $debug_cmd + ;; + + --no-warnings|--no-warning|--no-warn) + set dummy --warnings none ${1+"$@"} + shift + ;; + + --warnings|--warning|-W) + if test $# = 0 && func_missing_arg $_G_opt; then + _G_parse_options_requote=: + break + fi + case " $warning_categories $1" in + *" $1 "*) + # trailing space prevents matching last $1 above + func_append_uniq opt_warning_types " $1" + ;; + *all) + opt_warning_types=$warning_categories + ;; + *none) + opt_warning_types=none + warning_func=: + ;; + *error) + opt_warning_types=$warning_categories + warning_func=func_fatal_error + ;; + *) + func_fatal_error \ + "unsupported warning category: '$1'" + ;; + esac + shift + ;; + + --verbose|-v) opt_verbose=: ;; + --version) func_version ;; + -\?|-h) func_usage ;; + --help) func_help ;; + + # Separate optargs to long options (plugins may need this): + --*=*) func_split_equals "$_G_opt" + set dummy "$func_split_equals_lhs" \ + "$func_split_equals_rhs" ${1+"$@"} + shift + ;; + + # Separate optargs to short options: + -W*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-v*|-x*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) _G_parse_options_requote=: ; break ;; + -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; + *) set dummy "$_G_opt" ${1+"$@"}; shift + _G_match_parse_options=false + break + ;; + esac + + if $_G_match_parse_options; then + _G_parse_options_requote=: + fi + done + + if $_G_parse_options_requote; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + func_parse_options_result=$func_quote_result + fi +} + + +# func_validate_options [ARG]... +# ------------------------------ +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +func_hookable func_validate_options +func_validate_options () +{ + $debug_cmd + + # Display all warnings if -W was not given. + test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" + + func_run_hooks func_validate_options ${1+"$@"} + func_propagate_result func_run_hooks func_validate_options + + # Bail if the options were screwed! + $exit_cmd $EXIT_FAILURE +} + + + +## ----------------- ## +## Helper functions. ## +## ----------------- ## + +# This section contains the helper functions used by the rest of the +# hookable option parser framework in ascii-betical order. + + +# func_fatal_help ARG... +# ---------------------- +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + eval \$ECHO \""$fatal_help"\" + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + + +# func_help +# --------- +# Echo long help message to standard output and exit. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message" + exit 0 +} + + +# func_missing_arg ARGNAME +# ------------------------ +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $debug_cmd + + func_error "Missing argument for '$1'." + exit_cmd=exit +} + + +# func_split_equals STRING +# ------------------------ +# Set func_split_equals_lhs and func_split_equals_rhs shell variables +# after splitting STRING at the '=' sign. +test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=${1%%=*} + func_split_equals_rhs=${1#*=} + if test "x$func_split_equals_lhs" = "x$1"; then + func_split_equals_rhs= + fi + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` + func_split_equals_rhs= + test "x$func_split_equals_lhs=" = "x$1" \ + || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` + } +fi #func_split_equals + + +# func_split_short_opt SHORTOPT +# ----------------------------- +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"} + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` + func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` + } +fi #func_split_short_opt + + +# func_usage +# ---------- +# Echo short help message to standard output and exit. +func_usage () +{ + $debug_cmd + + func_usage_message + $ECHO "Run '$progname --help |${PAGER-more}' for full usage" + exit 0 +} + + +# func_usage_message +# ------------------ +# Echo short help message to standard output. +func_usage_message () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + echo + $SED -n 's|^# || + /^Written by/{ + x;p;x + } + h + /^Written by/q' < "$progpath" + echo + eval \$ECHO \""$usage_message"\" +} + + +# func_version +# ------------ +# Echo version message to standard output and exit. +# The version message is extracted from the calling file's header +# comments, with leading '# ' stripped: +# 1. First display the progname and version +# 2. Followed by the header comment line matching /^# Written by / +# 3. Then a blank line followed by the first following line matching +# /^# Copyright / +# 4. Immediately followed by any lines between the previous matches, +# except lines preceding the intervening completely blank line. +# For example, see the header comments of this file. +func_version () +{ + $debug_cmd + + printf '%s\n' "$progname $scriptversion" + $SED -n ' + /^# Written by /!b + s|^# ||; p; n + + :fwd2blnk + /./ { + n + b fwd2blnk + } + p; n + + :holdwrnt + s|^# || + s|^# *$|| + /^Copyright /!{ + /./H + n + b holdwrnt + } + + s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| + G + s|\(\n\)\n*|\1|g + p; q' < "$progpath" + + exit $? +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: + +# Set a version string. +scriptversion='(GNU libtool) 2.5.0.1-38c1-dirty' + + +# func_echo ARG... +# ---------------- +# Libtool also displays the current mode in messages, so override +# funclib.sh func_echo with this custom definition. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname${opt_mode+: $opt_mode}: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_warning ARG... +# ------------------- +# Libtool warnings are not categorized, so override funclib.sh +# func_warning with this simpler definition. +func_warning () +{ + $debug_cmd + + $warning_func ${1+"$@"} +} + + +## ---------------- ## +## Options parsing. ## +## ---------------- ## + +# Hook in the functions to make sure our own options are parsed during +# the option parsing loop. + +usage='$progpath [OPTION]... [MODE-ARG]...' + +# Short help message in response to '-h'. +usage_message="Options: + --config show all configuration variables + --debug enable verbose shell tracing + -n, --dry-run display commands without modifying any files + --features display basic configuration information and exit + --mode=MODE use operation mode MODE + --no-warnings equivalent to '-Wnone' + --preserve-dup-deps don't remove duplicate dependency libraries + --quiet, --silent don't print informational messages + --tag=TAG use configuration variables from tag TAG + -v, --verbose print more informational messages than default + --version print version information + -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] + -h, --help, --help-all print short, long, or detailed help message +" + +# Additional text appended to 'usage_message' in response to '--help'. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message + +MODE must be one of the following: + + clean remove files from the build directory + compile compile a source file into a libtool object + execute automatically set library path, then run a program + finish complete the installation of libtool libraries + install install libraries or executables + link create a library or an executable + uninstall remove libraries from an installed directory + +MODE-ARGS vary depending on the MODE. When passed as first option, +'--mode=MODE' may be abbreviated as 'MODE' or a unique abbreviation of that. +Try '$progname --help --mode=MODE' for a more detailed description of MODE. + +When reporting a bug, please describe a test case to reproduce it and +include the following information: + + host-triplet: $host + shell: $SHELL + compiler: $LTCC + compiler flags: $LTCFLAGS + linker: $LD (gnu? $with_gnu_ld) + version: $progname (GNU libtool) 2.5.0.1-38c1-dirty + automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` + autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` + +Report bugs to . +GNU libtool home page: . +General help using GNU software: ." + exit 0 +} + + +# func_lo2o OBJECT-NAME +# --------------------- +# Transform OBJECT-NAME from a '.lo' suffix to the platform specific +# object suffix. + +lo2o=s/\\.lo\$/.$objext/ +o2lo=s/\\.$objext\$/.lo/ + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_lo2o () + { + case $1 in + *.lo) func_lo2o_result=${1%.lo}.$objext ;; + * ) func_lo2o_result=$1 ;; + esac + }' + + # func_xform LIBOBJ-OR-SOURCE + # --------------------------- + # Transform LIBOBJ-OR-SOURCE from a '.o' or '.c' (or otherwise) + # suffix to a '.lo' libtool-object suffix. + eval 'func_xform () + { + func_xform_result=${1%.*}.lo + }' +else + # ...otherwise fall back to using sed. + func_lo2o () + { + func_lo2o_result=`$ECHO "$1" | $SED "$lo2o"` + } + + func_xform () + { + func_xform_result=`$ECHO "$1" | $SED 's|\.[^.]*$|.lo|'` + } +fi + + +# func_fatal_configuration ARG... +# ------------------------------- +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_fatal_error ${1+"$@"} \ + "See the $PACKAGE documentation for more information." \ + "Fatal configuration error." +} + + +# func_config +# ----------- +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + + +# func_features +# ------------- +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test yes = "$build_libtool_libs"; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test yes = "$build_old_libs"; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + + +# func_enable_tag TAGNAME +# ----------------------- +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname=$1 + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf=/$re_begincf/,/$re_endcf/p + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + + +# func_check_version_match +# ------------------------ +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# libtool_options_prep [ARG]... +# ----------------------------- +# Preparation for options parsed by libtool. +libtool_options_prep () +{ + $debug_mode + + # Option defaults: + opt_config=false + opt_dlopen= + opt_dry_run=false + opt_help=false + opt_mode= + opt_preserve_dup_deps=false + opt_quiet=false + + nonopt= + preserve_args= + + _G_rc_lt_options_prep=: + + # Shorthand for --mode=foo, only valid as the first argument + case $1 in + clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; + compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; + execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; + finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; + install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; + link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; + uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; + *) + _G_rc_lt_options_prep=false + ;; + esac + + if $_G_rc_lt_options_prep; then + # Pass back the list of options. + func_quote eval ${1+"$@"} + libtool_options_prep_result=$func_quote_result + fi +} +func_add_hook func_options_prep libtool_options_prep + + +# libtool_parse_options [ARG]... +# --------------------------------- +# Provide handling for libtool specific options. +libtool_parse_options () +{ + $debug_cmd + + _G_rc_lt_parse_options=false + + # Perform our own loop to consume as many options as possible in + # each iteration. + while test $# -gt 0; do + _G_match_lt_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + + --config) func_config ;; + + --dlopen|-dlopen) + opt_dlopen="${opt_dlopen+$opt_dlopen +}$1" + shift + ;; + + --preserve-dup-deps) + opt_preserve_dup_deps=: ;; + + --features) func_features ;; + + --finish) set dummy --mode finish ${1+"$@"}; shift ;; + + --help) opt_help=: ;; + + --help-all) opt_help=': help-all' ;; + + --mode) test $# = 0 && func_missing_arg $_G_opt && break + opt_mode=$1 + case $1 in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $_G_opt" + exit_cmd=exit + break + ;; + esac + shift + ;; + + --no-silent|--no-quiet) + opt_quiet=false + func_append preserve_args " $_G_opt" + ;; + + --no-warnings|--no-warning|--no-warn) + opt_warning=false + func_append preserve_args " $_G_opt" + ;; + + --no-verbose) + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --silent|--quiet) + opt_quiet=: + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --tag) test $# = 0 && func_missing_arg $_G_opt && break + opt_tag=$1 + func_append preserve_args " $_G_opt $1" + func_enable_tag "$1" + shift + ;; + + --verbose|-v) opt_quiet=false + opt_verbose=: + func_append preserve_args " $_G_opt" + ;; + + # An option not handled by this hook function: + *) set dummy "$_G_opt" ${1+"$@"} ; shift + _G_match_lt_parse_options=false + break + ;; + esac + $_G_match_lt_parse_options && _G_rc_lt_parse_options=: + done + + if $_G_rc_lt_parse_options; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + libtool_parse_options_result=$func_quote_result + fi +} +func_add_hook func_parse_options libtool_parse_options + + + +# libtool_validate_options [ARG]... +# --------------------------------- +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +libtool_validate_options () +{ + # save first non-option argument + if test 0 -lt $#; then + nonopt=$1 + shift + fi + + # preserve --debug + test : = "$debug_cmd" || func_append preserve_args " --debug" + + case $host_os in + # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 + # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 + cygwin* | mingw* | windows* | pw32* | cegcc* | solaris2* | os2*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + test yes != "$build_libtool_libs" \ + && test yes != "$build_old_libs" \ + && func_fatal_configuration "not configured to build any kind of library" + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test execute != "$opt_mode"; then + func_error "unrecognized option '-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help=$help + help="Try '$progname --help --mode=$opt_mode' for more information." + } + + # Pass back the unparsed argument list + func_quote eval ${1+"$@"} + libtool_validate_options_result=$func_quote_result +} +func_add_hook func_validate_options libtool_validate_options + + +# Process options as early as possible so that --help and --version +# can return quickly. +func_options ${1+"$@"} +eval set dummy "$func_options_result"; shift + + + +## ----------- ## +## Main. ## +## ----------- ## + +magic='%%%MAGIC variable%%%' +magic_exe='%%%MAGIC EXE variable%%%' + +# Global variables. +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# func_generated_by_libtool +# True iff stdin has been generated by Libtool. This function is only +# a basic sanity check; it will hardly flush out determined imposters. +func_generated_by_libtool_p () +{ + $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if 'file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case $lalib_p_line in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test yes = "$lalib_p" +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + test -f "$1" && + $lt_truncate_bin < "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result=$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $debug_cmd + + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# 'FILE.' does not work on cygwin managed mounts. +func_source () +{ + $debug_cmd + + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case $lt_sysroot:$1 in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result='='$func_stripname_result + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $debug_cmd + + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`$SED -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with '--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=$1 + if test yes = "$build_libtool_libs"; then + write_lobj=\'$2\' + else + write_lobj=none + fi + + if test yes = "$build_old_libs"; then + write_oldobj=\'$3\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "$func_convert_core_file_wine_to_w32_tmp"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, windows, cygwin, or some other w32 environment. Relies on a +# correctly configured wine environment available, with the winepath program +# in $build's $PATH. Assumes ARG has no leading or trailing path separator +# characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $debug_cmd + + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result= + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result"; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result=$func_convert_core_file_wine_to_w32_result + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $debug_cmd + + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: '$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $debug_cmd + + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $debug_cmd + + if test -z "$2" && test -n "$1"; then + func_error "Could not determine host file name corresponding to" + func_error " '$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result=$1 + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $debug_cmd + + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " '$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result=$3 + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $debug_cmd + + case $4 in + $1 ) func_to_host_path_result=$3$func_to_host_path_result + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via '$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $debug_cmd + + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $debug_cmd + + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result=$1 +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result=$func_convert_core_msys_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result=$func_convert_core_file_wine_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via '$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $debug_cmd + + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd=func_convert_path_$func_stripname_result + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $debug_cmd + + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result=$1 +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_msys_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_path_wine_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_dll_def_p FILE +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with _LT_DLL_DEF_P in libtool.m4 +func_dll_def_p () +{ + $debug_cmd + + func_dll_def_p_tmp=`$SED -n \ + -e 's/^[ ]*//' \ + -e '/^\(;.*\)*$/d' \ + -e 's/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p' \ + -e q \ + "$1"` + test DEF = "$func_dll_def_p_tmp" +} + + +# func_mode_compile arg... +func_mode_compile () +{ + $debug_cmd + + # Get the compilation command and the source file. + base_compile= + srcfile=$nonopt # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg=$arg + arg_mode=normal + ;; + + target ) + libobj=$arg + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify '-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs=$IFS; IFS=, + for arg in $args; do + IFS=$save_ifs + func_append_quoted lastarg "$arg" + done + IFS=$save_ifs + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg=$srcfile + srcfile=$arg + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with '-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj=$func_basename_result + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from '$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test yes = "$build_libtool_libs" \ + || func_fatal_configuration "cannot build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_arg pretty "$libobj" + test "X$libobj" != "X$func_quote_arg_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name '$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname=$func_basename_result + xdir=$func_dirname_result + lobj=$xdir$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test yes = "$build_old_libs"; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | windows* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test no = "$pic_mode" && test pass_all != "$deplibs_check_method"; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test no = "$compiler_c_o"; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.$objext + lockfile=$output_obj.lock + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test yes = "$need_locks"; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test warn = "$need_locks"; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_arg pretty "$srcfile" + qsrcfile=$func_quote_arg_result + + # Only build a PIC object if we are building libtool libraries. + if test yes = "$build_libtool_libs"; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test no != "$pic_mode"; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test yes = "$suppress_opt"; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test yes = "$build_old_libs"; then + if test yes != "$pic_mode"; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test yes = "$compiler_c_o"; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test no != "$need_locks"; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test compile = "$opt_mode" && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a '.o' file suitable for static linking + -static only build a '.o' file suitable for static linking + -Wc,FLAG + -Xcompiler FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a 'standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix '.c' with the +library object suffix, '.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to '-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the '--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the 'install' or 'cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE '-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE use a list of object files found in FILE to specify objects + -os2dllname NAME force a short DLL name on OS/2 (no effect on other OSes) + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wa,FLAG + -Xassembler FLAG pass linker-specific FLAG directly to the assembler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with '-') are ignored. + +Every other argument is treated as a filename. Files ending in '.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in '.la', then a libtool library is created, +only library objects ('.lo' files) may be specified, and '-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in '.a' or '.lib', then a standard library is created +using 'ar' and 'ranlib', or on Windows using 'lib'. + +If OUTPUT-FILE ends in '.lo' or '.$objext', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode '$opt_mode'" + ;; + esac + + echo + $ECHO "Try '$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test : = "$opt_help"; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | $SED -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + $SED '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $debug_cmd + + # The first argument is the command name. + cmd=$nonopt + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "'$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "'$file' was not linked with '-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir=$func_dirname_result + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find '$dlname' in '$dir' or '$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir=$func_dirname_result + ;; + + *) + func_warning "'-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir=$absdir + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic=$magic + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file=$progdir/$program + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file=$progdir/$program + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if $opt_dry_run; then + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + else + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd=\$cmd$args + fi +} + +test execute = "$opt_mode" && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $debug_cmd + + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "'$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument '$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and '=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + $SED -e "$sysroot_cmd s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_quiet && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the '-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the '$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the '$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the '$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to '/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test finish = "$opt_mode" && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $debug_cmd + + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$SHELL" = "$nonopt" || test /bin/sh = "$nonopt" || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac + then + # Aesthetically quote it. + func_quote_arg pretty "$nonopt" + install_prog="$func_quote_arg_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_arg pretty "$arg" + func_append install_prog "$func_quote_arg_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=false + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=: ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test X-m = "X$prev" && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_arg pretty "$arg" + func_append install_prog " $func_quote_arg_result" + if test -n "$arg2"; then + func_quote_arg pretty "$arg2" + fi + func_append install_shared_prog " $func_quote_arg_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the '$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_arg pretty "$install_override_mode" + func_append install_shared_prog " -m $func_quote_arg_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=: + if $isdir; then + destdir=$dest + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir=$func_dirname_result + destname=$func_basename_result + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "'$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "'$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir=$func_dirname_result + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install '$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking '$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink '\''$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname=$1 + shift + + srcname=$realname + test -n "$relink_command" && srcname=${realname}T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme=$stripme + case $host_os in + cygwin* | mingw* | windows* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme= + ;; + esac + ;; + os2*) + case $realname in + *_dll.a) + tstripme= + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try 'ln -sf' first, because the 'ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib=$destdir/$realname + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name=$func_basename_result + instname=$dir/${name}i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest=$destfile + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to '$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test yes = "$build_old_libs"; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext= + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=.exe + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw* | *windows*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script '$wrapper'" + + finalize=: + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile=$libdir/`$ECHO "$lib" | $SED 's%^.*/%%g'` + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "'$lib' has not been installed in '$libdir'" + finalize=false + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test no = "$fast_install" && test -n "$relink_command"; then + $opt_dry_run || { + if $finalize; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file=$func_basename_result + outputname=$tmpdir/$file + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_quiet || { + func_quote_arg expand,pretty "$relink_command" + eval "func_echo $func_quote_arg_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink '$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file=$outputname + else + func_warning "cannot relink '$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name=$func_basename_result + + # Set up the ranlib parameters. + oldlib=$destdir/$name + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run '$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL "$progpath" $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test install = "$opt_mode" && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $debug_cmd + + my_outputname=$1 + my_originator=$2 + my_pic_p=${3-false} + my_prefix=`$ECHO "$my_originator" | $SED 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms=${my_outputname}S.c + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist=$output_objdir/$my_outputname.nm + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for '$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined __GNUC__ && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* External symbol declarations for the compiler. */\ +" + + if test yes = "$dlself"; then + func_verbose "generating symbol list for '$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from '$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols=$output_objdir/$outputname.exp + $opt_dry_run || { + $RM $export_symbols + eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *windows* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "$SED -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *windows* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from '$dlprefile'" + func_basename "$dlprefile" + name=$func_basename_result + case $host in + *cygwin* | *mingw* | *windows* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename= + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname"; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename=$func_basename_result + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename"; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + func_show_eval '$RM "${nlist}I"' + if test -n "$global_symbol_to_import"; then + eval "$global_symbol_to_import"' < "$nlist"S > "$nlist"I' + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[];\ +" + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ +static void lt_syminit(void) +{ + LT_DLSYM_CONST lt_dlsymlist *symbol = lt_${my_prefix}_LTX_preloaded_symbols; + for (; symbol->name; ++symbol) + {" + $SED 's/.*/ if (STREQ (symbol->name, \"&\")) symbol->address = (void *) \&&;/' < "$nlist"I >> "$output_objdir/$my_dlsyms" + echo >> "$output_objdir/$my_dlsyms" "\ + } +}" + fi + echo >> "$output_objdir/$my_dlsyms" "\ +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{ {\"$my_originator\", (void *) 0}," + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ + {\"@INIT@\", (void *) <_syminit}," + fi + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + $my_pic_p && pic_flag_for_symtable=" $pic_flag" + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T" "${nlist}I"' + + # Transform the symbol file into the correct name. + symfileobj=$output_objdir/${my_outputname}S.$objext + case $host in + *cygwin* | *mingw* | *windows* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for '$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $debug_cmd + + win32_libid_type=unknown + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + case $nm_interface in + "MS dumpbin") + if func_cygming_ms_implib_p "$1" || + func_cygming_gnu_implib_p "$1" + then + win32_nmres=import + else + win32_nmres= + fi + ;; + *) + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s|.*|import| + p + q + } + }'` + ;; + esac + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $debug_cmd + + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $debug_cmd + + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive that possess that section. Heuristic: eliminate + # all those that have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $debug_cmd + + if func_cygming_gnu_implib_p "$1"; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1"; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result= + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $debug_cmd + + f_ex_an_ar_dir=$1; shift + f_ex_an_ar_oldlib=$1 + if test yes = "$lock_old_archive_extraction"; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test yes = "$lock_old_archive_extraction"; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $debug_cmd + + my_gentop=$1; shift + my_oldlibs=${1+"$@"} + my_oldobjs= + my_xlib= + my_xabs= + my_xdir= + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs=$my_xlib ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib=$func_basename_result + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir=$my_gentop/$my_xlib_u + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + func_basename "$darwin_archive" + darwin_base_archive=$func_basename_result + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches; do + func_mkdir_p "unfat-$$/$darwin_base_archive-$darwin_arch" + $LIPO -thin $darwin_arch -output "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" "$darwin_archive" + cd "unfat-$$/$darwin_base_archive-$darwin_arch" + func_extract_an_archive "`pwd`" "$darwin_base_archive" + cd "$darwin_curdir" + $RM "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$sed_basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result=$my_oldobjs +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw/windows +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory where it is stored is +# the $objdir directory. This is a cygwin/mingw/windows-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + func_quote_arg pretty "$ECHO" + qECHO=$func_quote_arg_result + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=$qECHO + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ that is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options that match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"$outputname:$output:\$LINENO: libtool wrapper (GNU $PACKAGE) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"$outputname:$output:\$LINENO: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw* | *-*-windows* | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw/windows when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test yes = "$fast_install"; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | $SED 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + \$ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test yes = "$shlibpath_overrides_runpath" && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: '\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#if defined _WIN32 && !defined __GNUC__ +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* declarations of non-ANSI functions */ +#if defined __MINGW32__ +# ifdef __STRICT_ANSI__ +_CRTIMP int __cdecl _putenv (const char *); +# endif +#elif defined __CYGWIN__ +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined other_platform || defined ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined _MSC_VER +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +#elif defined __MINGW32__ +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined __CYGWIN__ +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined other platforms ... */ +#endif + +#if defined PATH_MAX +# define LT_PATHMAX PATH_MAX +#elif defined MAXPATHLEN +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined _WIN32 || defined __MSDOS__ || defined __DJGPP__ || \ + defined __OS2__ +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free (stale); stale = 0; } \ +} while (0) + +#if defined LT_DEBUGWRAPPER +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + size_t tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined HAVE_DOS_BASED_FILE_SYSTEM + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined HAVE_DOS_BASED_FILE_SYSTEM + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = (size_t) (q - p); + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (STREQ (str, pat)) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + size_t len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + size_t orig_value_len = strlen (orig_value); + size_t add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + size_t len = strlen (new_value); + while ((len > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[--len] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw* | windows*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $debug_cmd + + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_suncc_cstd_abi +# !!ONLY CALL THIS FOR SUN CC AFTER $compile_command IS FULLY EXPANDED!! +# Several compiler flags select an ABI that is incompatible with the +# Cstd library. Avoid specifying it if any are in CXXFLAGS. +func_suncc_cstd_abi () +{ + $debug_cmd + + case " $compile_command " in + *" -compat=g "*|*\ -std=c++[0-9][0-9]\ *|*" -library=stdcxx4 "*|*" -library=stlport4 "*) + suncc_use_cstd_abi=no + ;; + *) + suncc_use_cstd_abi=yes + ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $debug_cmd + + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # what system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll that has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + os2dllname= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=false + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module=$wl-single_module + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test yes != "$build_libtool_libs" \ + && func_fatal_configuration "cannot build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test yes = "$build_libtool_libs" && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg=$1 + shift + func_quote_arg pretty,unquoted "$arg" + qarg=$func_quote_arg_unquoted_result + func_append libtool_args " $func_quote_arg_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir=$arg + prev= + continue + ;; + dlfiles|dlprefiles) + $preload || { + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=: + } + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test no = "$dlself"; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test dlprefiles = "$prev"; then + dlself=yes + elif test dlfiles = "$prev" && test yes != "$dlopen_self"; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test dlfiles = "$prev"; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols=$arg + test -f "$arg" \ + || func_fatal_error "symbol file '$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex=$arg + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir=$arg + prev= + continue + ;; + mllvm) + # Clang does not use LLVM to link, so we can simply discard any + # '-mllvm $arg' options when doing the link step. + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + if test none != "$pic_object"; then + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + fi + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file '$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + os2dllname) + os2dllname=$arg + prev= + continue + ;; + precious_regex) + precious_files_regex=$arg + prev= + continue + ;; + release) + release=-$arg + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test rpath = "$prev"; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds=$arg + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xassembler) + func_append compiler_flags " -Xassembler $qarg" + prev= + func_append compile_command " -Xassembler $qarg" + func_append finalize_command " -Xassembler $qarg" + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg=$arg + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "'-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test X-export-symbols = "X$arg"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between '-L' and '$1'" + else + func_fatal_error "need path for '-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of '$dir'" + dir=$absdir + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test X-lc = "X$arg" || test X-lm = "X$arg"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test X-lc = "X$arg" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + test X-lc = "X$arg" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test X-lc = "X$arg" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test X-lc = "X$arg" && continue + ;; + esac + elif test X-lc_r = "X$arg"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -mllvm) + prev=mllvm + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199. + -pthread) + case $host in + *solaris2*) ;; + *) + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + ;; + esac + continue + ;; + -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-fopenmp=*|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module=$wl-multi_module + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "'-no-install' is ignored for $host" + func_warning "assuming '-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -os2dllname) + prev=os2dllname + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $func_quote_arg_result" + func_append compiler_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $wl$func_quote_arg_result" + func_append compiler_flags " $wl$func_quote_arg_result" + func_append linker_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xassembler) + prev=xassembler + continue + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # -fstack-protector* stack protector flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + # -specs=* GCC specs files + # -stdlib=* select c++ std lib with clang + # -fdiagnostics-color* simply affects output + # -frecord-gcc-switches used to verify flags were respected + # -fsanitize=* Clang/GCC memory and address sanitizer + # -fno-sanitize* Clang/GCC memory and address sanitizer + # -shared-libsan Link with shared sanitizer runtimes (Clang) + # -static-libsan Link with static sanitizer runtimes (Clang) + # -no-canonical-prefixes Do not expand any symbolic links + # -fuse-ld=* Linker select flags for GCC + # -rtlib=* select c runtime lib with clang + # --unwindlib=* select unwinder library with clang + # -f{file|debug|macro|profile}-prefix-map=* needed for lto linking + # -Wa,* Pass flags directly to the assembler + # -Werror, -Werror=* Report (specified) warnings as errors + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-no-canonical-prefixes| \ + -stdlib=*|-rtlib=*|--unwindlib=*| \ + -specs=*|-fsanitize=*|-fno-sanitize*|-shared-libsan|-static-libsan| \ + -ffile-prefix-map=*|-fdebug-prefix-map=*|-fmacro-prefix-map=*|-fprofile-prefix-map=*| \ + -fdiagnostics-color*|-frecord-gcc-switches| \ + -fuse-ld=*|-Wa,*|-Werror|-Werror=*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + -Z*) + if test os2 = "`expr $host : '.*\(os2\)'`"; then + # OS/2 uses -Zxxx to specify OS/2-specific options + compiler_flags="$compiler_flags $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case $arg in + -Zlinker | -Zstack) + prev=xcompiler + ;; + esac + continue + else + # Otherwise treat like 'Some other compiler flag' below + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + fi + ;; + + # Some other compiler flag. + -* | +*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + test none = "$pic_object" || { + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + } + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test dlfiles = "$prev"; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test dlprefiles = "$prev"; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the '$prevarg' option requires an argument" + + if test yes = "$export_dynamic" && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname=$func_basename_result + libobjs_save=$libobjs + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\$$shlibpath_var\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + # Definition is injected by LT_CONFIG during libtool generation. + func_munge_path_list sys_lib_dlsearch_path "$LT_SYS_LIBRARY_PATH" + + func_dirname "$output" "/" "" + output_objdir=$func_dirname_result$objdir + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test lib = "$linkmode"; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can '-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=false + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test lib,link = "$linkmode,$pass"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs=$tmp_deplibs + fi + + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass"; then + libs=$deplibs + deplibs= + fi + if test prog = "$linkmode"; then + case $pass in + dlopen) libs=$dlfiles ;; + dlpreopen) libs=$dlprefiles ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + if test lib,dlpreopen = "$linkmode,$pass"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs=$dlprefiles + fi + if test dlopen = "$pass"; then + # Collect dlpreopened libraries + save_deplibs=$deplibs + deplibs= + fi + + for deplib in $libs; do + lib= + found=false + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-fopenmp=*|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test lib != "$linkmode" && test prog != "$linkmode"; then + func_warning "'-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test lib = "$linkmode"; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib=$searchdir/lib$name$search_ext + if test -f "$lib"; then + if test .la = "$search_ext"; then + found=: + else + found=false + fi + break 2 + fi + done + done + if $found; then + # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll=$l + done + if test "X$ll" = "X$old_library"; then # only static version available + found=false + func_dirname "$lib" "" "." + ladir=$func_dirname_result + lib=$ladir/$old_library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + else + # deplib doesn't seem to be a libtool library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; # -l + *.ltframework) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test conv = "$pass" && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + if test scan = "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "'-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test link = "$pass"; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=false + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=: + fi + ;; + pass_all) + valid_a_lib=: + ;; + esac + if $valid_a_lib; then + func_warning "Linking the shared library $output against the static library $deplib is not portable!" + deplibs="$deplib $deplibs" + else + func_warning "Trying to link with static lib archive $deplib." + func_warning "I have the capability to make that library automatically link in when" + func_warning "you link to this library. But I can only do this if you have a" + func_warning "shared version of the library, which you do not appear to have" + func_warning "because the file extensions .$libext of this argument makes me believe" + func_warning "that it is just a static archive that I should not use here." + fi + ;; + esac + continue + ;; + prog) + if test link != "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + elif test prog = "$linkmode"; then + if test dlpreopen = "$pass" || test yes != "$dlopen_support" || test no = "$build_libtool_libs"; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=: + continue + ;; + esac # case $deplib + + $found || test -f "$lib" \ + || func_fatal_error "cannot find the library '$lib' or unhandled argument '$deplib'" + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "'$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir=$func_dirname_result + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass" || + { test prog != "$linkmode" && test lib != "$linkmode"; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test conv = "$pass"; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + elif test prog != "$linkmode" && test lib != "$linkmode"; then + func_fatal_error "'$lib' is not a convenience library" + fi + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test yes = "$prefer_static_libs" || + test built,no = "$prefer_static_libs,$installed"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib=$l + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + + # This library was specified with -dlopen. + if test dlopen = "$pass"; then + test -z "$libdir" \ + && func_fatal_error "cannot -dlopen a convenience library: '$lib'" + if test -z "$dlname" || + test yes != "$dlopen_support" || + test no = "$build_libtool_libs" + then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir=$ladir ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of '$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir=$ladir + fi + ;; + esac + func_basename "$lib" + laname=$func_basename_result + + # Find the relevant object directory and library name. + if test yes = "$installed"; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library '$lib' was moved." + dir=$ladir + absdir=$abs_ladir + libdir=$abs_ladir + else + dir=$lt_sysroot$libdir + absdir=$lt_sysroot$libdir + fi + test yes = "$hardcode_automatic" && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir=$ladir + absdir=$abs_ladir + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir=$ladir/$objdir + absdir=$abs_ladir/$objdir + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test dlpreopen = "$pass"; then + if test -z "$libdir" && test prog = "$linkmode"; then + func_fatal_error "only libraries may -dlpreopen a convenience library: '$lib'" + fi + case $host in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *windows* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test lib = "$linkmode"; then + deplibs="$dir/$old_library $deplibs" + elif test prog,link = "$linkmode,$pass"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test prog = "$linkmode" && test link != "$pass"; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=false + if test no != "$link_all_deplibs" || test -z "$library_names" || + test no = "$build_libtool_libs"; then + linkalldeplibs=: + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if $linkalldeplibs; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test prog,link = "$linkmode,$pass"; then + if test -n "$library_names" && + { { test no = "$prefer_static_libs" || + test built,yes = "$prefer_static_libs,$installed"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath"; then + # Make sure the rpath contains only unique directories. + case $temp_rpath: in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if $alldeplibs && + { test pass_all = "$deplibs_check_method" || + { test yes = "$build_libtool_libs" && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test built = "$use_static_libs" && test yes = "$installed"; then + use_static_libs=no + fi + if test -n "$library_names" && + { test no = "$use_static_libs" || test -z "$old_library"; }; then + case $host_os in + cygwin* | mingw* | windows* | cegcc* | os2*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test no = "$installed"; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule= + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule=$dlpremoduletest + break + fi + done + if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then + echo + if test prog = "$linkmode"; then + func_warning "Linking the executable $output against the loadable module" + else + func_warning "Linking the shared library $output against the loadable module" + fi + func_warning "$linklib is not portable!" + fi + if test lib = "$linkmode" && + test yes = "$hardcode_into_libs"; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname=$1 + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname=$dlname + elif test -n "$soname_spec"; then + # bleh windows + case $host_os in + cygwin* | mingw* | windows* | cegcc* | os2*) + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + esac + eval soname=\"$soname_spec\" + else + soname=$realname + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot=$soname + func_basename "$soroot" + soname=$func_basename_result + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from '$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for '$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test prog = "$linkmode" || test relink != "$opt_mode"; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test no = "$hardcode_direct"; then + add=$dir/$linklib + case $host in + *-*-sco3.2v5.0.[024]*) add_dir=-L$dir ;; + *-*-sysv4*uw2*) add_dir=-L$dir ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir=-L$dir ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we cannot + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null; then + if test "X$dlopenmodule" != "X$lib"; then + func_warning "lib $linklib is a module, not a shared library" + if test -z "$old_library"; then + func_warning "And there doesn't seem to be a static archive available" + func_warning "The link will probably fail, sorry" + else + add=$dir/$old_library + fi + elif test -n "$old_library"; then + add=$dir/$old_library + fi + fi + esac + elif test no = "$hardcode_minus_L"; then + case $host in + *-*-sunos*) add_shlibpath=$dir ;; + esac + add_dir=-L$dir + add=-l$name + elif test no = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + relink) + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$dir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$absdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test yes != "$lib_linked"; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test prog = "$linkmode"; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test yes != "$hardcode_direct" && + test yes != "$hardcode_minus_L" && + test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test prog = "$linkmode" || test relink = "$opt_mode"; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$libdir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$libdir + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add=-l$name + elif test yes = "$hardcode_automatic"; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib"; then + add=$inst_prefix_dir$libdir/$linklib + else + add=$libdir/$linklib + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir=-L$libdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + fi + + if test prog = "$linkmode"; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test prog = "$linkmode"; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test unsupported != "$hardcode_direct"; then + test -n "$old_library" && linklib=$old_library + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test yes = "$build_libtool_libs"; then + # Not a shared library + if test pass_all != "$deplibs_check_method"; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + func_warning "This system cannot link to static lib archive $lib." + func_warning "I have the capability to make that library automatically link in when" + func_warning "you link to this library. But I can only do this if you have a" + func_warning "shared version of the library, which you do not appear to have." + if test yes = "$module"; then + func_warning "But as you try to build a module library, libtool will still create " + func_warning "a static module, that should work as long as the dlopening application" + func_warning "is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + func_warning "However, this would only work if libtool was able to extract symbol" + func_warning "lists from a program, using 'nm' or equivalent, but libtool could" + func_warning "not find such a program. So, this module is probably useless." + func_warning "'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test lib = "$linkmode"; then + if test -n "$dependency_libs" && + { test yes != "$hardcode_into_libs" || + test yes = "$build_old_libs" || + test yes = "$link_static"; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs=$temp_deplibs + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test no = "$link_static" && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test no != "$link_all_deplibs"; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path=$deplib ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir=$dir ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of '$dir'" + absdir=$dir + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`$SED -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names"; then + for tmp in $deplibrary_names; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl"; then + depdepl=$absdir/$objdir/$depdepl + darwin_install_name=`$OTOOL -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`$OTOOL64 -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " $wl-dylib_file $wl$darwin_install_name:$depdepl" + func_append linker_flags " -dylib_file $darwin_install_name:$depdepl" + path= + fi + fi + ;; + *) + path=-L$absdir/$objdir + ;; + esac + else + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "'$deplib' seems to be moved" + + path=-L$absdir + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test link = "$pass"; then + if test prog = "$linkmode"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs=$newdependency_libs + if test dlpreopen = "$pass"; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test dlopen != "$pass"; then + test conv = "$pass" || { + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + } + + if test prog,link = "$linkmode,$pass"; then + vars="compile_deplibs finalize_deplibs" + else + vars=deplibs + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + + # Add Sun CC postdeps if required: + test CXX = "$tagname" && { + case $host_os in + linux*) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C++ 5.9 + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + + solaris*) + func_cc_basename "$CC" + case $func_cc_basename_result in + CC* | sunCC*) + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + esac + } + + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i= + ;; + esac + if test -n "$i"; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test prog = "$linkmode"; then + dlfiles=$newdlfiles + fi + if test prog = "$linkmode" || test lib = "$linkmode"; then + dlprefiles=$newdlprefiles + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "'-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "'-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs=$output + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form 'libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test no = "$module" \ + && func_fatal_help "libtool library '$output' must begin with 'lib'" + + if test no != "$need_lib_prefix"; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test pass_all != "$deplibs_check_method"; then + func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" + else + func_warning "Linking the shared library $output against the non-libtool objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test no = "$dlself" \ + || func_warning "'-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test 1 -lt "$#" \ + && func_warning "ignoring multiple '-rpath's for a libtool library" + + install_libdir=$1 + + oldlibs= + if test -z "$rpath"; then + if test yes = "$build_libtool_libs"; then + # Building a libtool convenience library. + # Some compilers have problems with a '.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "'-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs=$IFS; IFS=: + set dummy $vinfo 0 0 0 + shift + IFS=$save_ifs + + test -n "$7" && \ + func_fatal_help "too many parameters to '-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major=$1 + number_minor=$2 + number_revision=$3 + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # that has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|freebsd-elf|linux|midnightbsd-elf|osf|qnx|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_revision + ;; + freebsd-aout|sco|sunos) + current=$number_major + revision=$number_minor + age=0 + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_minor + lt_irix_increment=no + ;; + esac + ;; + no) + current=$1 + revision=$2 + age=$3 + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT '$current' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION '$revision' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE '$age' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE '$age' is greater than the current interface number '$current'" + func_fatal_error "'$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + # On Darwin other compilers + case $CC in + nagfor*) + verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + ;; + *) + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + esac + ;; + + freebsd-aout) + major=.$current + versuffix=.$current.$revision + ;; + + freebsd-elf | midnightbsd-elf) + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + irix | nonstopux) + if test no = "$lt_irix_increment"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring=$verstring_prefix$major.$revision + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test 0 -ne "$loop"; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring_prefix$major.$iface:$verstring + done + + # Before this point, $major must not contain '.'. + major=.$major + versuffix=$major.$revision + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=.$current.$age.$revision + verstring=$current.$age.$revision + + # Add in all the interfaces that we are compatible with. + loop=$age + while test 0 -ne "$loop"; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring:$iface.0 + done + + # Make executables depend on our current version. + func_append verstring ":$current.0" + ;; + + qnx) + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + sco) + major=.$current + versuffix=.$current + ;; + + sunos) + major=.$current + versuffix=.$current.$revision + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 file systems. + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + + *) + func_fatal_configuration "unknown library version type '$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring=0.0 + ;; + esac + if test no = "$need_version"; then + versuffix= + else + versuffix=.0.0 + fi + fi + + # Remove version info from name if versioning should be avoided + if test yes,no = "$avoid_version,$need_version"; then + major= + versuffix= + verstring= + fi + + # Check to see if the archive will have undefined symbols. + if test yes = "$allow_undefined"; then + if test unsupported = "$allow_undefined_flag"; then + if test yes = "$build_old_libs"; then + func_warning "undefined symbols not allowed in $host shared libraries; building static only" + build_libtool_libs=no + else + func_fatal_error "can't build $host shared library unless -no-undefined is specified" + fi + fi + else + # Don't allow undefined symbols. + allow_undefined_flag=$no_undefined_flag + fi + + fi + + func_generate_dlsyms "$libname" "$libname" : + func_append libobjs " $symfileobj" + test " " = "$libobjs" && libobjs= + + if test relink != "$opt_mode"; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/$libname$release.*) + if test -n "$precious_files_regex"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test yes = "$build_old_libs" && test convenience != "$build_libtool_libs"; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test yes != "$hardcode_into_libs" || test yes = "$build_old_libs"; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles=$dlfiles + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles=$dlprefiles + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test yes = "$build_libtool_libs"; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test yes = "$build_libtool_need_lc"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release= + versuffix= + major= + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + file_magic*) + set dummy $deplibs_check_method; shift + file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib= + ;; + esac + fi + if test -n "$a_deplib"; then + libname=`eval "\\$ECHO \"$libname_spec\""` + if test -n "$file_magic_glob"; then + libnameglob=`func_echo_all "$libname" | $SED -e $file_magic_glob` + else + libnameglob=$libname + fi + test yes = "$want_nocaseglob" && nocaseglob=`shopt -p nocaseglob` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + if test yes = "$want_nocaseglob"; then + shopt -s nocaseglob + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib=$potent_lib + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | $SED 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib=$potliblink;; + *) potlib=`$ECHO "$potlib" | $SED 's|[^/]*$||'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + func_warning "Linker path does not have real file for library $a_deplib." + func_warning "I have the capability to make that library automatically link in when" + func_warning "you link to this library. But I can only do this if you have a" + func_warning "shared version of the library, which you do not appear to have" + func_warning "because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + func_warning "with $libname but no candidates were found. (...for file magic test)" + else + func_warning "with $libname and none of the candidates passed a file format test" + func_warning "using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib= + ;; + esac + fi + if test -n "$a_deplib"; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib=$potent_lib # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + func_warning "Linker path does not have real file for library $a_deplib." + func_warning "I have the capability to make that library automatically link in when" + func_warning "you link to this library. But I can only do this if you have a" + func_warning "shared version of the library, which you do not appear to have" + func_warning "because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + func_warning "with $libname but no candidates were found. (...for regex pattern test)" + else + func_warning "with $libname and none of the candidates passed a file format test" + func_warning "using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs= + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + for i in $predeps $postdeps; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s|$i||"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test none = "$deplibs_check_method"; then + func_warning "Inter-library dependencies are not supported in this platform." + else + func_warning "Inter-library dependencies are not known to be supported." + fi + func_warning "All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test yes = "$droppeddeps"; then + if test yes = "$module"; then + func_warning "libtool could not satisfy all declared inter-library" + func_warning "dependencies of module $libname. Therefore, libtool will create" + func_warning "a static module, that should work as long as the dlopening" + func_warning "application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + func_warning "However, this would only work if libtool was able to extract symbol" + func_warning "lists from a program, using 'nm' or equivalent, but libtool could" + func_warning "not find such a program. So, this module is probably useless." + func_warning "'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test no = "$allow_undefined"; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs=$new_libs + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test yes = "$build_libtool_libs"; then + # Remove $wl instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test yes = "$hardcode_into_libs"; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath=$finalize_rpath + test relink = "$opt_mode" || rpath=$compile_rpath$rpath + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath=$finalize_shlibpath + test relink = "$opt_mode" || shlibpath=$compile_shlibpath$shlibpath + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname=$1 + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname=$realname + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib=$output_objdir/$realname + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols=$output_objdir/$libname.uexp + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | windows* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + func_dll_def_p "$export_symbols" || { + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols=$export_symbols + export_symbols= + always_export_symbols=yes + } + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test yes = "$always_export_symbols" || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs=$IFS; IFS='~' + for cmd1 in $cmds; do + IFS=$save_ifs + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test yes = "$try_normal_branch" \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=$output_objdir/$output_la.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS=$save_ifs + if test -n "$export_symbols_regex" && test : != "$skipped_export"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test : != "$skipped_export" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs=$tmp_deplibs + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test yes = "$compiler_needs_object" && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test yes = "$thread_safe" && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test : != "$skipped_export" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test : != "$skipped_export" && test yes = "$with_gnu_ld"; then + output=$output_objdir/$output_la.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test : != "$skipped_export" && test -n "$file_list_spec"; then + output=$output_objdir/$output_la.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test yes = "$compiler_needs_object"; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-$k.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test -z "$objlist" || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test 1 -eq "$k"; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-$k.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-$k.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + ${skipped_export-false} && { + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + } + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs=$IFS; IFS='~' + for cmd in $concat_cmds; do + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + ${skipped_export-false} && { + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + } + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs=$IFS; IFS='~' + for cmd in $cmds; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test yes = "$module" || test yes = "$export_dynamic"; then + # On all known operating systems, these are identical. + dlname=$soname + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "'-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object '$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj=$output + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # if reload_cmds runs $LD directly, get rid of -Wl from + # whole_archive_flag_spec and hope we can get by with turning comma + # into space. + case $reload_cmds in + *\$LD[\ \$]*) wl= ;; + esac + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + test -n "$wl" || tmp_whole_archive_flags=`$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + reload_conv_objs=$reload_objs\ $tmp_whole_archive_flags + else + gentop=$output_objdir/${obj}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test yes = "$build_libtool_libs" || libobjs=$non_pic_objects + + # Create the old-style object. + reload_objs=$objs$old_deplibs' '`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; /\.lib$/d; $lo2o" | $NL2SP`' '$reload_conv_objs + + output=$obj + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + test yes = "$build_libtool_libs" || { + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + } + + if test -n "$pic_flag" || test default != "$pic_mode"; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output=$libobj + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "'-release' is ignored for programs" + + $preload \ + && test unknown,unknown,unknown = "$dlopen_support,$dlopen_self,$dlopen_self_static" \ + && func_warning "'LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test CXX = "$tagname"; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " $wl-bind_at_load" + func_append finalize_command " $wl-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs=$new_libs + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$libdir" | $SED -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath=$rpath + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath=$rpath + + if test -n "$libobjs" && test yes = "$build_old_libs"; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" false + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=: + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=false + ;; + *cygwin* | *mingw* | *windows* ) + test yes = "$build_libtool_libs" || wrappers_required=false + ;; + *) + if test no = "$need_relink" || test yes != "$build_libtool_libs"; then + wrappers_required=false + fi + ;; + esac + $wrappers_required || { + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command=$compile_command$compile_rpath + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.$objext"; then + func_show_eval '$RM "$output_objdir/${outputname}S.$objext"' + fi + + exit $exit_status + } + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test yes = "$no_install"; then + # We don't need to create a wrapper script. + link_command=$compile_var$compile_command$compile_rpath + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + case $hardcode_action,$fast_install in + relink,*) + # Fast installation is not supported + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "'$output' will be relinked during installation" + ;; + *,yes) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + ;; + *,no) + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + ;; + *,needless) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command= + ;; + esac + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty "$var_value" + relink_command="$var=$func_quote_arg_result; export $var; $relink_command" + fi + done + func_quote eval cd "`pwd`" + func_quote_arg pretty,unquoted "($func_quote_result; $relink_command)" + relink_command=$func_quote_arg_unquoted_result + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* | windows* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource=$output_path/$objdir/lt-$output_name.c + cwrapper=$output_path/$output_name.exe + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host"; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + case $build_libtool_libs in + convenience) + oldobjs="$libobjs_save $symfileobj" + addlibs=$convenience + build_libtool_libs=no + ;; + module) + oldobjs=$libobjs_save + addlibs=$old_convenience + build_libtool_libs=no + ;; + *) + oldobjs="$old_deplibs $non_pic_objects" + $preload && test -f "$symfileobj" \ + && func_append oldobjs " $symfileobj" + addlibs=$old_convenience + ;; + esac + + if test -n "$addlibs"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test yes = "$build_libtool_libs"; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase=$func_basename_result + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj"; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test -z "$oldobjs"; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test yes = "$build_old_libs" && old_library=$libname.$libext + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty,unquoted "$var_value" + relink_command="$var=$func_quote_arg_unquoted_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + func_quote eval cd "`pwd`" + relink_command="($func_quote_result; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + func_quote_arg pretty,unquoted "$relink_command" + relink_command=$func_quote_arg_unquoted_result + if test yes = "$hardcode_automatic"; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test yes = "$installed"; then + if test -z "$install_libdir"; then + break + fi + output=$output_objdir/${outputname}i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name=$func_basename_result + func_resolve_sysroot "$deplib" + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs=$newdependency_libs + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles=$newdlprefiles + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles=$newdlprefiles + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *windows*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test -n "$bindir"; then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result/$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test no,yes = "$installed,$need_relink"; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +if test link = "$opt_mode" || test relink = "$opt_mode"; then + func_mode_link ${1+"$@"} +fi + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $debug_cmd + + RM=$nonopt + files= + rmforce=false + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=: ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir=$func_dirname_result + if test . = "$dir"; then + odir=$objdir + else + odir=$dir/$objdir + fi + func_basename "$file" + name=$func_basename_result + test uninstall = "$opt_mode" && odir=$dir + + # Remember odir for removal later, being careful to avoid duplicates + if test clean = "$opt_mode"; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif $rmforce; then + continue + fi + + rmfiles=$file + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case $opt_mode in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" '$rmforce || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" '$rmforce || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && test none != "$pic_object"; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && test none != "$non_pic_object"; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test clean = "$opt_mode"; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.$objext" + if test yes = "$fast_install" && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name"; then + func_append rmfiles " $odir/lt-$noexename.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the $objdir's in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +if test uninstall = "$opt_mode" || test clean = "$opt_mode"; then + func_mode_uninstall ${1+"$@"} +fi + +test -z "$opt_mode" && { + help=$generic_help + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode '$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# where we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/m4/ax_pthread.m4 b/m4/ax_pthread.m4 new file mode 100644 index 0000000..d90de34 --- /dev/null +++ b/m4/ax_pthread.m4 @@ -0,0 +1,309 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). (This +# is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. e.g. you should link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threads programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name +# (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 18 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes) + AC_MSG_RESULT($ax_pthread_ok) + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no) + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($ax_pthread_ok) + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $attr; return attr /* ; */])], + [attr_name=$attr; break], + []) + done + AC_MSG_RESULT($attr_name) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + ax_cv_PTHREAD_PRIO_INHERIT, [ + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[#include ]], [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], + AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.])) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/m4/libtool.m4 b/m4/libtool.m4 new file mode 100644 index 0000000..c5be643 --- /dev/null +++ b/m4/libtool.m4 @@ -0,0 +1,8333 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996-2001, 2003-2019, 2021-2024 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 2024 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +]) + +# serial 61 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_PREPARE_CC_BASENAME +# ----------------------- +m4_defun([_LT_PREPARE_CC_BASENAME], [ +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in @S|@*""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} +])# _LT_PREPARE_CC_BASENAME + + +# _LT_CC_BASENAME(CC) +# ------------------- +# It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, +# but that macro is also expanded into generated libtool script, which +# arranges for $SED and $ECHO to be set by different means. +m4_defun([_LT_CC_BASENAME], +[m4_require([_LT_PREPARE_CC_BASENAME])dnl +AC_REQUIRE([_LT_DECL_SED])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl +func_cc_basename $1 +cc_basename=$func_cc_basename_result +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_DECL_FILECMD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl +m4_require([_LT_CMD_TRUNCATE])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from 'configure', and 'config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# 'config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain=$ac_aux_dir/ltmain.sh +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the 'libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to 'config.status' so that its +# declaration there will have the same value as in 'configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags='_LT_TAGS'dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into 'config.status', and then the shell code to quote escape them in +# for loops in 'config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# '#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test 0 = "$lt_write_fail" && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), in case it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +'$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2024 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +while test 0 != $[#] +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try '$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try '$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test yes = "$silent" && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +_LT_COPYING +_LT_LIBTOOL_TAGS + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +_LT_PREPARE_MUNGE_PATH_LIST +_LT_PREPARE_CC_BASENAME + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS=$save_LDFLAGS + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR $AR_FLAGS libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[[012]],*|,*powerpc*-darwin[[5-8]]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test yes = "$lt_cv_ld_force_load"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + m4_if([$1], [CXX], +[ if test yes != "$lt_cv_apple_cc_single_mod"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script that will find a shell with a builtin +# printf (that we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case $ECHO in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[m4_require([_LT_DECL_SED])dnl +AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], + [Search for dependent libraries within DIR (or the compiler's sysroot + if not specified).])], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + # Trim trailing / since we'll always append absolute paths and we want + # to avoid //, if only for less confusing output for the user. + lt_sysroot=`$CC --print-sysroot 2>/dev/null | $SED 's:/\+$::'` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([$with_sysroot]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and where our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*|x86_64-gnu*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*|x86_64-gnu*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*|x86_64-gnu*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +_LT_DECL([], [AR], [1], [The archiver]) + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because that's what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS +_LT_DECL([], [lt_ar_flags], [0], [Flags to create an archive (by configure)]) + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. +_LT_DECL([], [AR_FLAGS], [\@S|@{ARFLAGS-"\@S|@lt_ar_flags"}], + [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test yes = "[$]$2"; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS +]) + +if test yes = "[$]$2"; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | windows* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n "$lt_cv_sys_max_cmd_len"; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes = "$cross_compiling"; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | windows* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen=shl_load], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen=dlopen], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links=nottested +if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test no = "$hard_links"; then + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", + [Define to the sub-directory where libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then + + # We can hardcode non-existent directories. + if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && + test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || + test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -z "$STRIP"; then + AC_MSG_RESULT([no]) +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac + fi +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_PREPARE_MUNGE_PATH_LIST +# --------------------------- +# Make sure func_munge_path_list() is defined correctly. +m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], +[[# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x@S|@2 in + x) + ;; + *:) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" + ;; + x:*) + eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" + ;; + *) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + esac +} +]])# _LT_PREPARE_PATH_LIST + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | windows* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | windows* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +AC_ARG_VAR([LT_SYS_LIBRARY_PATH], +[User-defined run-time library search path.]) + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a[(]lib.so.V[)]' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | windows* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | windows* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw* | windows*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # -rpath works at least for libraries that are not overridden by + # libraries installed in system locations. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directories which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], + [Detected run-time system search path for libraries]) +_LT_DECL([], [configure_time_lt_sys_library_path], [2], + [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program that can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$1"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac]) +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program that can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test no = "$withval" || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw* | *-*-windows*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], +[if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi]) +rm -f conftest.i conftest2.i conftest.out]) +])# _LT_PATH_DD + + +# _LT_CMD_TRUNCATE +# ---------------- +# find command to truncate a binary pipe +m4_defun([_LT_CMD_TRUNCATE], +[m4_require([_LT_PATH_DD]) +AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], +[printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) +_LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], + [Command to truncate a binary pipe]) +])# _LT_CMD_TRUNCATE + + +# _LT_CHECK_MAGIC_METHOD +# ---------------------- +# how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_MAGIC_METHOD], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +AC_CACHE_CHECK([how to recognize dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[[4-9]]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[[45]]*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | windows* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | windows* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw* | windows*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi]) +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | windows* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_manifest_tool], + [lt_cv_path_manifest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_manifest_tool=yes + fi + rm -f conftest*]) +if test yes != "$lt_cv_path_manifest_tool"; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# _LT_DLL_DEF_P([FILE]) +# --------------------- +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with func_dll_def_p in the libtool script +AC_DEFUN([_LT_DLL_DEF_P], +[dnl + test DEF = "`$SED -n dnl + -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace + -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments + -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl + -e q dnl Only consider the first "real" line + $1`" dnl +])# _LT_DLL_DEF_P + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-mingw* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM=-lm) + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | windows* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BCDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw* | windows*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], + [Transform the output of nm into a list of symbols to manually relocate]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([nm_interface], [lt_cv_nm_interface], [1], + [The name lister interface]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | windows* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | windows* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test yes = "$GCC"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + + mingw* | windows* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *flang) + # Flang compiler. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds + ;; + cygwin* | mingw* | windows* | cegcc*) + case $cc_basename in + cl* | icl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | windows* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd*) + with_gnu_ld=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | windows* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | windows* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -Fe $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -Fe $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS=$save_LDFLAGS]) + if test yes = "$lt_cv_irix_exported_symbol"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(ld_shlibs, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + osf3*) + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e. impossible to change by setting $shlibpath_var if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC=$CC +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report what library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC=$lt_save_CC +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | windows* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl* | ,icl* | no,icl*) + # Native MSVC or ICC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "[[-]]L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "[[-]]L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "[[-]]L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + + _LT_TAGVAR(GCC, $1)=$GXX + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case @S|@2 in + .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; + *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R,l}" and the path. + # Remove the space. + if test x-L = x"$p" || + test x-R = x"$p" || + test x-l = x"$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)=$prev$p + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)=$p + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)=$p + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test no = "$F77"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_F77"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$G77 + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_F77" + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test no = "$FC"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_FC"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_FC" + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code=$lt_simple_compile_test_code + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_FILECMD +# ---------------- +# Check for a file(cmd) program that can be used to detect file type and magic +m4_defun([_LT_DECL_FILECMD], +[AC_CHECK_PROG([FILECMD], [file], [:]) +_LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) +])# _LD_DECL_FILECMD + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine what file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* | *-*-windows* ) + case $build in + *-*-mingw* | *-*-windows* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4 new file mode 100644 index 0000000..6dfe99f --- /dev/null +++ b/m4/ltoptions.m4 @@ -0,0 +1,437 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2024 Free +# Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 9 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option '$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl 'shared' nor 'disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], + [_LT_WITH_AIX_SONAME([aix])]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-windows* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the 'shared' and +# 'disable-shared' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the 'static' and +# 'disable-static' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the 'fast-install' +# and 'disable-fast-install' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_AIX_SONAME([DEFAULT]) +# ---------------------------------- +# implement the --with-aix-soname flag, and support the `aix-soname=aix' +# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT +# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. +m4_define([_LT_WITH_AIX_SONAME], +[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl +shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[[5-9]]*,yes) + AC_MSG_CHECKING([which variant of shared library versioning to provide]) + AC_ARG_WITH([aix-soname], + [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], + [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], + [case $withval in + aix|svr4|both) + ;; + *) + AC_MSG_ERROR([Unknown argument to --with-aix-soname]) + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname], + [AC_CACHE_VAL([lt_cv_with_aix_soname], + [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) + with_aix_soname=$lt_cv_with_aix_soname]) + AC_MSG_RESULT([$with_aix_soname]) + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + +_LT_DECL([], [shared_archive_member_spec], [0], + [Shared archive member basename, for filename based shared library versioning on AIX])dnl +])# _LT_WITH_AIX_SONAME + +LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the 'pic-only' and 'no-pic' +# LT_INIT options. +# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [pic_mode=m4_default([$1], [default])]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4 new file mode 100644 index 0000000..5b5c80a --- /dev/null +++ b/m4/ltsugar.m4 @@ -0,0 +1,124 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2024 Free Software +# Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59, which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff --git a/m4/ltversion.m4 b/m4/ltversion.m4 new file mode 100644 index 0000000..82887f7 --- /dev/null +++ b/m4/ltversion.m4 @@ -0,0 +1,24 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004, 2011-2019, 2021-2024 Free Software Foundation, +# Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 4337 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.5.0.1-38c1-dirty]) +m4_define([LT_PACKAGE_REVISION], [2.5.0.1]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.5.0.1-38c1-dirty' +macro_revision='2.5.0.1' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4 new file mode 100644 index 0000000..22b5346 --- /dev/null +++ b/m4/lt~obsolete.m4 @@ -0,0 +1,99 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2024 Free +# Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/m4/pcre2_visibility.m4 b/m4/pcre2_visibility.m4 new file mode 100644 index 0000000..c025d5f --- /dev/null +++ b/m4/pcre2_visibility.m4 @@ -0,0 +1,88 @@ +# visibility.m4 serial 4 (gettext-0.18.2) +dnl Copyright (C) 2005, 2008, 2010-2011 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Tests whether the compiler supports the command-line option +dnl -fvisibility=hidden and the function and variable attributes +dnl __attribute__((__visibility__("hidden"))) and +dnl __attribute__((__visibility__("default"))). +dnl Does *not* test for __visibility__("protected") - which has tricky +dnl semantics (see the 'vismain' test in glibc) and does not exist e.g. on +dnl MacOS X. +dnl Does *not* test for __visibility__("internal") - which has processor +dnl dependent semantics. +dnl Does *not* test for #pragma GCC visibility push(hidden) - which is +dnl "really only recommended for legacy code". +dnl Set the variable CFLAG_VISIBILITY. +dnl Defines and sets the variable HAVE_VISIBILITY. + +dnl Modified to fit with PCRE build environment by Cristian Rodríguez. +dnl Adjusted for PCRE2 by PH + +AC_DEFUN([PCRE2_VISIBILITY], +[ + AC_REQUIRE([AC_PROG_CC]) + VISIBILITY_CFLAGS= + VISIBILITY_CXXFLAGS= + HAVE_VISIBILITY=0 + if test -n "$GCC"; then + dnl First, check whether -Werror can be added to the command line, or + dnl whether it leads to an error because of some other option that the + dnl user has put into $CC $CFLAGS $CPPFLAGS. + AC_MSG_CHECKING([whether the -Werror option is usable]) + AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[]])], + [pcre2_cv_cc_vis_werror=yes], + [pcre2_cv_cc_vis_werror=no]) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) + dnl Now check whether visibility declarations are supported. + AC_MSG_CHECKING([for simple visibility declarations]) + AC_CACHE_VAL([pcre2_cv_cc_visibility], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -fvisibility=hidden" + dnl We use the option -Werror and a function dummyfunc, because on some + dnl platforms (Cygwin 1.7) the use of -fvisibility triggers a warning + dnl "visibility attribute not supported in this configuration; ignored" + dnl at the first function definition in every compilation unit, and we + dnl don't want to use the option in this case. + if test $pcre2_cv_cc_vis_werror = yes; then + CFLAGS="$CFLAGS -Werror" + fi + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[extern __attribute__((__visibility__("hidden"))) int hiddenvar; + extern __attribute__((__visibility__("default"))) int exportedvar; + extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + extern __attribute__((__visibility__("default"))) int exportedfunc (void); + void dummyfunc (void) {} + ]], + [[]])], + [pcre2_cv_cc_visibility=yes], + [pcre2_cv_cc_visibility=no]) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_visibility]) + if test $pcre2_cv_cc_visibility = yes; then + VISIBILITY_CFLAGS="-fvisibility=hidden" + VISIBILITY_CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" + HAVE_VISIBILITY=1 + AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) + else + AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) + fi + else + AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) + fi + AC_SUBST([VISIBILITY_CFLAGS]) + AC_SUBST([VISIBILITY_CXXFLAGS]) + AC_SUBST([HAVE_VISIBILITY]) + AC_DEFINE_UNQUOTED([HAVE_VISIBILITY], [$HAVE_VISIBILITY], + [Define to 1 if the compiler supports simple visibility declarations.]) +]) diff --git a/missing b/missing new file mode 100755 index 0000000..1fe1611 --- /dev/null +++ b/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/pcre2-config.in b/pcre2-config.in new file mode 100644 index 0000000..9639a90 --- /dev/null +++ b/pcre2-config.in @@ -0,0 +1,121 @@ +#!/bin/sh + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +exec_prefix_set=no + +cflags="[--cflags]" +libs= + +if test @enable_pcre2_16@ = yes ; then + libs="[--libs16] $libs" +fi + +if test @enable_pcre2_32@ = yes ; then + libs="[--libs32] $libs" +fi + +if test @enable_pcre2_8@ = yes ; then + libs="[--libs8] [--libs-posix] $libs" + cflags="$cflags [--cflags-posix]" +fi + +usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags" + +if test $# -eq 0; then + echo "${usage}" 1>&2 + exit 1 +fi + +libR= +case `uname -s` in + *SunOS*) + libR=" -R@libdir@" + ;; + *BSD*) + libR=" -Wl,-R@libdir@" + ;; +esac + +libS= +if test @libdir@ != /usr/lib ; then + libS=-L@libdir@ +fi + +while test $# -gt 0; do + case "$1" in + -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + *) optarg= ;; + esac + + case $1 in + --prefix=*) + prefix=$optarg + if test $exec_prefix_set = no ; then + exec_prefix=$optarg + fi + ;; + --prefix) + echo $prefix + ;; + --exec-prefix=*) + exec_prefix=$optarg + exec_prefix_set=yes + ;; + --exec-prefix) + echo $exec_prefix + ;; + --version) + echo @PACKAGE_VERSION@ + ;; + --cflags) + if test @includedir@ != /usr/include ; then + includes=-I@includedir@ + fi + echo $includes @PCRE2_STATIC_CFLAG@ + ;; + --cflags-posix) + if test @enable_pcre2_8@ = yes ; then + if test @includedir@ != /usr/include ; then + includes=-I@includedir@ + fi + echo $includes @PCRE2POSIX_CFLAG@ + else + echo "${usage}" 1>&2 + fi + ;; + --libs-posix) + if test @enable_pcre2_8@ = yes ; then + echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@ + else + echo "${usage}" 1>&2 + fi + ;; + --libs8) + if test @enable_pcre2_8@ = yes ; then + echo $libS$libR -lpcre2-8@LIB_POSTFIX@ + else + echo "${usage}" 1>&2 + fi + ;; + --libs16) + if test @enable_pcre2_16@ = yes ; then + echo $libS$libR -lpcre2-16@LIB_POSTFIX@ + else + echo "${usage}" 1>&2 + fi + ;; + --libs32) + if test @enable_pcre2_32@ = yes ; then + echo $libS$libR -lpcre2-32@LIB_POSTFIX@ + else + echo "${usage}" 1>&2 + fi + ;; + *) + echo "${usage}" 1>&2 + exit 1 + ;; + esac + shift +done diff --git a/perltest.sh b/perltest.sh new file mode 100755 index 0000000..4cd0f8a --- /dev/null +++ b/perltest.sh @@ -0,0 +1,402 @@ +#! /bin/sh + +# Script for testing regular expressions with perl to check that PCRE2 handles +# them the same. For testing with different versions of Perl, if the first +# argument is -perl then the second is taken as the Perl command to use, and +# both are then removed. If the next argument is "-w", Perl is called with +# "-w", which turns on its warning mode. +# +# The Perl code has to have "use utf8" and "require Encode" at the start when +# running UTF-8 tests, but *not* for non-utf8 tests. (The "require" would +# actually be OK for non-utf8-tests, but is not always installed, so this way +# the script will always run for these tests.) +# +# The desired effect is achieved by making this a shell script that passes the +# Perl script to Perl through a pipe. If the next argument is "-utf8", a +# suitable prefix is set up. +# +# The remaining arguments, if any, are passed to Perl. They are an input file +# and an output file. If there is one argument, the output is written to +# STDOUT. If Perl receives no arguments, it opens /dev/tty as input, and writes +# output to STDOUT. (I haven't found a way of getting it to use STDIN, because +# of the contorted piping input.) + +perl=perl +perlarg='' +prefix='' + +if [ $# -gt 1 -a "$1" = "-perl" ] ; then + shift + perl=$1 + shift +fi + +if [ $# -gt 0 -a "$1" = "-w" ] ; then + perlarg="-w" + shift +fi + +if [ $# -gt 0 -a "$1" = "-utf8" ] ; then + prefix="use utf8; require Encode;" + shift +fi + + +# The Perl script that follows has a similar specification to pcre2test, and so +# can be given identical input, except that input patterns can be followed only +# by Perl's lower case modifiers and certain other pcre2test modifiers that are +# either handled or ignored: +# +# aftertext interpreted as "print $' afterwards" +# afteralltext ignored +# dupnames ignored (Perl always allows) +# jitstack ignored +# mark show mark information +# no_auto_possess ignored +# no_start_optimize insert (??{""}) at pattern start (disables optimizing) +# -no_start_optimize ignored +# subject_literal does not process subjects for escapes +# ucp sets Perl's /u modifier +# utf invoke UTF-8 functionality +# +# Comment lines are ignored. The #pattern command can be used to set modifiers +# that will be added to each subsequent pattern, after any modifiers it may +# already have. NOTE: this is different to pcre2test where #pattern sets +# defaults which can be overridden on individual patterns. The #subject command +# may be used to set or unset a default "mark" modifier for data lines. This is +# the only use of #subject that is supported. The #perltest, #forbid_utf, and +# #newline_default commands, which are needed in the relevant pcre2test files, +# are ignored. Any other #-command is ignored, with a warning message. +# +# The pattern lines should use only / as the delimiter. The other characters +# that pcre2test supports cause problems with this script. +# +# The data lines must not have any pcre2test modifiers. Unless +# "subject_literal" is on the pattern, data lines are processed as +# Perl double-quoted strings, so if they contain " $ or @ characters, these +# have to be escaped. For this reason, all such characters in the +# Perl-compatible testinput1 and testinput4 files are escaped so that they can +# be used for perltest as well as for pcre2test. The output from this script +# should be same as from pcre2test, apart from the initial identifying banner. +# +# The other testinput files are not suitable for feeding to perltest.sh, +# because they make use of the special modifiers that pcre2test uses for +# testing features of PCRE2. Some of these files also contain malformed regular +# expressions, in order to check that PCRE2 diagnoses them correctly. + +(echo "$prefix" ; cat <<'PERLEND' + +# The alpha assertions currently give warnings even when -w is not specified. + +no warnings "experimental::alpha_assertions"; +no warnings "experimental::script_run"; + +# Function for turning a string into a string of printing chars. + +sub pchars { +my($t) = ""; +if ($utf8) + { + @p = unpack('U*', $_[0]); + foreach $c (@p) + { + if ($c >= 32 && $c < 127) { $t .= chr $c; } + else { $t .= sprintf("\\x{%02x}", $c); + } + } + } +else + { + foreach $c (split(//, $_[0])) + { + if (ord $c >= 32 && ord $c < 127) { $t .= $c; } + else { $t .= sprintf("\\x%02x", ord $c); } + } + } +$t; +} + + +# Read lines from a named file or stdin and write to a named file or stdout; +# lines consist of a regular expression, in delimiters and optionally followed +# by options, followed by a set of test data, terminated by an empty line. + +# Sort out the input and output files + +if (@ARGV > 0) + { + open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n"; + $infile = "INFILE"; + $interact = 0; + } +else + { + open(INFILE, " 1) + { + open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n"; + $outfile = "OUTFILE"; + } +else { $outfile = "STDOUT"; } + +printf($outfile "Perl $^V\n\n"); + +$extra_modifiers = ""; +$default_show_mark = 0; + +# Main loop + +NEXT_RE: +for (;;) + { + printf " re> " if $interact; + last if ! ($_ = <$infile>); + printf $outfile "$_" if ! $interact; + next if ($_ =~ /^\s*$/ || $_ =~ /^#[\s!]/); + + # A few of pcre2test's #-commands are supported, or just ignored. Any others + # cause an error. + + if ($_ =~ /^#pattern(.*)/) + { + $extra_modifiers = $1; + chomp($extra_modifiers); + $extra_modifiers =~ s/\s+$//; + next; + } + elsif ($_ =~ /^#subject(.*)/) + { + $mod = $1; + chomp($mod); + $mod =~ s/\s+$//; + if ($mod =~ s/(-?)mark,?//) + { + $minus = $1; + $default_show_mark = ($minus =~ /^$/); + } + if ($mod !~ /^\s*$/) + { + printf $outfile "** Warning: \"$mod\" in #subject ignored\n"; + } + next; + } + elsif ($_ =~ /^#/) + { + if ($_ !~ /^#newline_default|^#perltest|^#forbid_utf/) + { + printf $outfile "** Warning: #-command ignored: %s", $_; + } + next; + } + + $pattern = $_; + + while ($pattern !~ /^\s*(.).*\1/s) + { + printf " > " if $interact; + last if ! ($_ = <$infile>); + printf $outfile "$_" if ! $interact; + $pattern .= $_; + } + + chomp($pattern); + $pattern =~ s/\s+$//; + + # Split the pattern from the modifiers and adjust them as necessary. + + $pattern =~ /^\s*((.).*\2)(.*)$/s; + $pat = $1; + $del = $2; + $mod = "$3,$extra_modifiers"; + $mod =~ s/^,\s*//; + + # The private "aftertext" modifier means "print $' afterwards". + + $showrest = ($mod =~ s/aftertext,?//); + + # The "subject_literal" modifier disables escapes in subjects. + + $subject_literal = ($mod =~ s/subject_literal,?//); + + # "allaftertext" is used by pcre2test to print remainders after captures + + $mod =~ s/allaftertext,?//; + + # Detect utf + + $utf8 = $mod =~ s/utf,?//; + + # Remove "dupnames". + + $mod =~ s/dupnames,?//; + + # Remove "jitstack". + + $mod =~ s/jitstack=\d+,?//; + + # The "mark" modifier requests checking of MARK data */ + + $show_mark = $default_show_mark | ($mod =~ s/mark,?//); + + # "ucp" asks pcre2test to set PCRE2_UCP; change this to /u for Perl + + $mod =~ s/ucp,?/u/; + + # Remove "no_auto_possess". + + $mod =~ s/no_auto_possess,?//; + + # Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl + # optimization by inserting (??{""}) at the start of the pattern. We may + # also encounter -no_start_optimize from a #pattern setting. + + $mod =~ s/-no_start_optimize,?//; + + if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; } + + # Add back retained modifiers and check that the pattern is valid. + + $mod =~ s/,//g; + $pattern = "$pat$mod"; + + eval "\$_ =~ ${pattern}"; + if ($@) + { + printf $outfile "Error: $@"; + if (! $interact) + { + for (;;) + { + last if ! ($_ = <$infile>); + last if $_ =~ /^\s*$/; + } + } + next NEXT_RE; + } + + # If the /g modifier is present, we want to put a loop round the matching; + # otherwise just a single "if". + + $cmd = ($pattern =~ /g[a-z]*\s*$/)? "while" : "if"; + + # If the pattern is actually the null string, Perl uses the most recently + # executed (and successfully compiled) regex is used instead. This is a + # nasty trap for the unwary! The PCRE2 test suite does contain null strings + # in places - if they are allowed through here all sorts of weird and + # unexpected effects happen. To avoid this, we replace such patterns with + # a non-null pattern that has the same effect. + + $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/); + + # Read data lines and test them + + for (;;) + { + printf "data> " if $interact; + last NEXT_RE if ! ($_ = <$infile>); + chomp; + printf $outfile "%s", "$_\n" if ! $interact; + + s/\s+$//; # Remove trailing space + s/^\s+//; # Remove leading space + + last if ($_ eq ""); + next if $_ =~ /^\\=(?:\s|$)/; # Comment line + + if ($subject_literal) + { + $x = $_; + } + else + { + $x = eval "\"$_\""; # To get escapes processed + } + + # Empty array for holding results, ensure $REGERROR and $REGMARK are + # unset, then do the matching. + + @subs = (); + + $pushes = "push \@subs,\$&;" . + "push \@subs,\$1;" . + "push \@subs,\$2;" . + "push \@subs,\$3;" . + "push \@subs,\$4;" . + "push \@subs,\$5;" . + "push \@subs,\$6;" . + "push \@subs,\$7;" . + "push \@subs,\$8;" . + "push \@subs,\$9;" . + "push \@subs,\$10;" . + "push \@subs,\$11;" . + "push \@subs,\$12;" . + "push \@subs,\$13;" . + "push \@subs,\$14;" . + "push \@subs,\$15;" . + "push \@subs,\$16;" . + "push \@subs,\$'; }"; + + undef $REGERROR; + undef $REGMARK; + + eval "${cmd} (\$x =~ ${pattern}) {" . $pushes; + + if ($@) + { + printf $outfile "Error: $@\n"; + next NEXT_RE; + } + elsif (scalar(@subs) == 0) + { + printf $outfile "No match"; + if ($show_mark && defined $REGERROR && $REGERROR != 1) + { printf $outfile (", mark = %s", &pchars($REGERROR)); } + printf $outfile "\n"; + } + else + { + while (scalar(@subs) != 0) + { + printf $outfile (" 0: %s\n", &pchars($subs[0])); + printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest; + $last_printed = 0; + for ($i = 1; $i <= 16; $i++) + { + if (defined $subs[$i]) + { + while ($last_printed++ < $i-1) + { printf $outfile ("%2d: \n", $last_printed); } + printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i])); + $last_printed = $i; + } + } + splice(@subs, 0, 18); + } + + # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is + # set and the input pattern was a UTF-8 string. We can, however, force + # it to be so marked. + + if ($show_mark && defined $REGMARK && $REGMARK != 1) + { + $xx = $REGMARK; + $xx = Encode::decode_utf8($xx) if $utf8; + printf $outfile ("MK: %s\n", &pchars($xx)); + } + } + } + } + +# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode +# "main::OUTFILE" used only once". + +close(OUTFILE) if $outfile eq "OUTFILE"; + +PERLEND +) | $perl $perlarg - $@ + +# End diff --git a/src/config.h.generic b/src/config.h.generic new file mode 100644 index 0000000..0092948 --- /dev/null +++ b/src/config.h.generic @@ -0,0 +1,483 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +/* #undef BSR_ANYCRLF */ + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +/* #undef DISABLE_PERCENT_ZT */ + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +/* #undef EBCDIC */ + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +/* #undef EBCDIC_NL25 */ + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */ + +/* Define to 1 if you have the 'bcopy' function. */ +/* #undef HAVE_BCOPY */ + +/* Define this if your compiler provides __builtin_mul_overflow() */ +/* #undef HAVE_BUILTIN_MUL_OVERFLOW */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRENT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EDITLINE_READLINE_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EDIT_READLINE_READLINE_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INTTYPES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIMITS_H */ + +/* Define to 1 if you have the 'memfd_create' function. */ +/* #undef HAVE_MEMFD_CREATE */ + +/* Define to 1 if you have the 'memmove' function. */ +/* #undef HAVE_MEMMOVE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MINIX_CONFIG_H */ + +/* Define to 1 if you have the 'mkostemp' function. */ +/* #undef HAVE_MKOSTEMP */ + +/* Define if you have POSIX threads libraries and header files. */ +/* #undef HAVE_PTHREAD */ + +/* Have PTHREAD_PRIO_INHERIT. */ +/* #undef HAVE_PTHREAD_PRIO_INHERIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_HISTORY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_READLINE_H */ + +/* Define to 1 if you have the `realpath' function. */ +/* #undef HAVE_REALPATH */ + +/* Define to 1 if you have the 'secure_getenv' function. */ +/* #undef HAVE_SECURE_GETENV */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDINT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDIO_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDLIB_H */ + +/* Define to 1 if you have the 'strerror' function. */ +/* #undef HAVE_STRERROR */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STRINGS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STRING_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_TYPES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_WAIT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UNISTD_H */ + +/* Define to 1 if the compiler supports simple visibility declarations. */ +/* #undef HAVE_VISIBILITY */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WCHAR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#ifndef HEAP_LIMIT +#define HEAP_LIMIT 20000000 +#endif + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#ifndef LINK_SIZE +#define LINK_SIZE 2 +#endif + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +/* This is ignored unless you are using libtool. */ +#ifndef LT_OBJDIR +#define LT_OBJDIR ".libs/" +#endif + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#ifndef MATCH_LIMIT +#define MATCH_LIMIT 10000000 +#endif + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#ifndef MATCH_LIMIT_DEPTH +#define MATCH_LIMIT_DEPTH MATCH_LIMIT +#endif + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#ifndef MAX_NAME_COUNT +#define MAX_NAME_COUNT 10000 +#endif + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#ifndef MAX_NAME_SIZE +#define MAX_NAME_SIZE 128 +#endif + +/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion. */ +#ifndef MAX_VARLOOKBEHIND +#define MAX_VARLOOKBEHIND 255 +#endif + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +/* #undef NEVER_BACKSLASH_C */ + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#ifndef NEWLINE_DEFAULT +#define NEWLINE_DEFAULT 2 +#endif + +/* Name of package */ +#define PACKAGE "pcre2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "PCRE2" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "PCRE2 10.44" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pcre2" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "10.44" + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#ifndef PARENS_NEST_LIMIT +#define PARENS_NEST_LIMIT 250 +#endif + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#ifndef PCRE2GREP_BUFSIZE +#define PCRE2GREP_BUFSIZE 20480 +#endif + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#ifndef PCRE2GREP_MAX_BUFSIZE +#define PCRE2GREP_MAX_BUFSIZE 1048576 +#endif + +/* Define to any value to include debugging code. */ +/* #undef PCRE2_DEBUG */ + +/* to make a symbol visible */ +#define PCRE2_EXPORT + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + a compiler relevant "extern" is used with any "visibility" related + attributes from PCRE2_EXPORT included. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +/* #undef PCRE2_EXP_DEFN */ + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +/* #undef PCRE2_STATIC */ + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */ + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +/* #undef STDC_HEADERS */ + +/* Define to any value to enable differential fuzzing support. */ +/* #undef SUPPORT_DIFF_FUZZ */ + +/* Define to any value to enable support for Just-In-Time compiling. */ +/* #undef SUPPORT_JIT */ + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +/* #undef SUPPORT_LIBBZ2 */ + +/* Define to any value to allow pcre2test to be linked with libedit. */ +/* #undef SUPPORT_LIBEDIT */ + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +/* #undef SUPPORT_LIBREADLINE */ + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ +/* #undef SUPPORT_LIBZ */ + +/* Define to any value to enable callout script support in pcre2grep. */ +/* #undef SUPPORT_PCRE2GREP_CALLOUT */ + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */ + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +/* #undef SUPPORT_PCRE2GREP_JIT */ + +/* Define to any value to enable the 16 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_16 */ + +/* Define to any value to enable the 32 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_32 */ + +/* Define to any value to enable the 8 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_8 */ + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +/* #undef SUPPORT_UNICODE */ + +/* Define to any value for valgrind support to find invalid memory reads. */ +/* #undef SUPPORT_VALGRIND */ + +/* Enable extensions on AIX, Interix, z/OS. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# define _DARWIN_C_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# define _HPUX_ALT_XOPEN_SOCKET_API 1 +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +/* # undef _MINIX */ +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# define _NETBSD_SOURCE 1 +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# define _OPENBSD_SOURCE 1 +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +/* # undef _POSIX_SOURCE */ +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +/* # undef _POSIX_1_SOURCE */ +#endif +/* Enable POSIX-compatible threading on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# define __STDC_WANT_IEC_60559_BFP_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# define __STDC_WANT_IEC_60559_DFP_EXT__ 1 +#endif +/* Enable extensions specified by C23 Annex F. */ +#ifndef __STDC_WANT_IEC_60559_EXT__ +# define __STDC_WANT_IEC_60559_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 +#endif +/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# define __STDC_WANT_LIB_EXT2__ 1 +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# define __STDC_WANT_MATH_SPEC_FUNCS__ 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +/* # undef _XOPEN_SOURCE */ +#endif + +/* Version number of package */ +#define VERSION "10.44" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* #undef _LARGE_FILES */ + +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ + +/* Define to empty if 'const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int64_t */ + +/* Define as 'unsigned int' if doesn't define. */ +/* #undef size_t */ diff --git a/src/config.h.in b/src/config.h.in new file mode 100644 index 0000000..8249182 --- /dev/null +++ b/src/config.h.in @@ -0,0 +1,460 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +#undef BSR_ANYCRLF + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +#undef DISABLE_PERCENT_ZT + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +#undef EBCDIC + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +#undef EBCDIC_NL25 + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +#undef HAVE_ATTRIBUTE_UNINITIALIZED + +/* Define to 1 if you have the 'bcopy' function. */ +#undef HAVE_BCOPY + +/* Define this if your compiler provides __builtin_mul_overflow() */ +#undef HAVE_BUILTIN_MUL_OVERFLOW + +/* Define to 1 if you have the header file. */ +#undef HAVE_BZLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDITLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDIT_READLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if you have the 'memfd_create' function. */ +#undef HAVE_MEMFD_CREATE + +/* Define to 1 if you have the 'memmove' function. */ +#undef HAVE_MEMMOVE + +/* Define to 1 if you have the header file. */ +#undef HAVE_MINIX_CONFIG_H + +/* Define to 1 if you have the 'mkostemp' function. */ +#undef HAVE_MKOSTEMP + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_HISTORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_READLINE_H + +/* Define to 1 if you have the `realpath' function. */ +#undef HAVE_REALPATH + +/* Define to 1 if you have the 'secure_getenv' function. */ +#undef HAVE_SECURE_GETENV + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDIO_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the 'strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the compiler supports simple visibility declarations. */ +#undef HAVE_VISIBILITY + +/* Define to 1 if you have the header file. */ +#undef HAVE_WCHAR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_WINDOWS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_ZLIB_H + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#undef HEAP_LIMIT + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#undef LINK_SIZE + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#undef MATCH_LIMIT + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#undef MATCH_LIMIT_DEPTH + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_COUNT + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_SIZE + +/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion. */ +#undef MAX_VARLOOKBEHIND + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +#undef NEVER_BACKSLASH_C + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#undef NEWLINE_DEFAULT + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#undef PARENS_NEST_LIMIT + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#undef PCRE2GREP_BUFSIZE + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#undef PCRE2GREP_MAX_BUFSIZE + +/* Define to any value to include debugging code. */ +#undef PCRE2_DEBUG + +/* to make a symbol visible */ +#undef PCRE2_EXPORT + + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + a compiler relevant "extern" is used with any "visibility" related + attributes from PCRE2_EXPORT included. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +#undef PCRE2_EXP_DEFN + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +#undef PCRE2_STATIC + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#undef STDC_HEADERS + +/* Define to any value to enable differential fuzzing support. */ +#undef SUPPORT_DIFF_FUZZ + +/* Define to any value to enable support for Just-In-Time compiling. */ +#undef SUPPORT_JIT + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +#undef SUPPORT_LIBBZ2 + +/* Define to any value to allow pcre2test to be linked with libedit. */ +#undef SUPPORT_LIBEDIT + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +#undef SUPPORT_LIBREADLINE + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ +#undef SUPPORT_LIBZ + +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +#undef SUPPORT_PCRE2GREP_JIT + +/* Define to any value to enable the 16 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_16 + +/* Define to any value to enable the 32 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_32 + +/* Define to any value to enable the 8 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_8 + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +#undef SUPPORT_UNICODE + +/* Define to any value for valgrind support to find invalid memory reads. */ +#undef SUPPORT_VALGRIND + +/* Enable extensions on AIX, Interix, z/OS. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# undef _DARWIN_C_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# undef _HPUX_ALT_XOPEN_SOCKET_API +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +# undef _MINIX +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# undef _NETBSD_SOURCE +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# undef _OPENBSD_SOURCE +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +# undef _POSIX_SOURCE +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +# undef _POSIX_1_SOURCE +#endif +/* Enable POSIX-compatible threading on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# undef __STDC_WANT_IEC_60559_BFP_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# undef __STDC_WANT_IEC_60559_DFP_EXT__ +#endif +/* Enable extensions specified by C23 Annex F. */ +#ifndef __STDC_WANT_IEC_60559_EXT__ +# undef __STDC_WANT_IEC_60559_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# undef __STDC_WANT_IEC_60559_FUNCS_EXT__ +#endif +/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# undef __STDC_WANT_IEC_60559_TYPES_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# undef __STDC_WANT_LIB_EXT2__ +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# undef __STDC_WANT_MATH_SPEC_FUNCS__ +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +# undef _XOPEN_SOURCE +#endif + + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +#undef _LARGE_FILES + +/* Number of bits in time_t, on hosts where this is settable. */ +#undef _TIME_BITS + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +#undef __MINGW_USE_VC2005_COMPAT + +/* Define to empty if 'const' does not conform to ANSI C. */ +#undef const + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + +/* Define as 'unsigned int' if doesn't define. */ +#undef size_t diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic new file mode 100644 index 0000000..a322d9f --- /dev/null +++ b/src/pcre2.h.generic @@ -0,0 +1,1010 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This is the public header file for the PCRE library, second API, to be +#included by applications that call PCRE2 functions. + + Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_H_IDEMPOTENT_GUARD +#define PCRE2_H_IDEMPOTENT_GUARD + +/* The current PCRE version information. */ + +#define PCRE2_MAJOR 10 +#define PCRE2_MINOR 44 +#define PCRE2_PRERELEASE +#define PCRE2_DATE 2024-06-07 + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE2, the appropriate +export setting is defined in pcre2_internal.h, which includes this file. So we +don't change existing definitions of PCRE2_EXP_DECL. */ + +#if defined(_WIN32) && !defined(PCRE2_STATIC) +# ifndef PCRE2_EXP_DECL +# define PCRE2_EXP_DECL extern __declspec(dllimport) +# endif +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE2_EXP_DECL +# ifdef __cplusplus +# define PCRE2_EXP_DECL extern "C" +# else +# define PCRE2_EXP_DECL extern +# endif +#endif + +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + +/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and +uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do +not have stdint.h, which is why we use inttypes.h, which according to the C +standard is a superset of stdint.h. If inttypes.h is not available the build +will break and the relevant values must be provided by some other means. */ + +#include +#include +#include + +/* Allow for C++ users compiling this directly. */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* The following option bits can be passed to pcre2_compile(), pcre2_match(), +or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it +is passed. Put these bits at the most significant end of the options word so +others can be added next to them */ + +#define PCRE2_ANCHORED 0x80000000u +#define PCRE2_NO_UTF_CHECK 0x40000000u +#define PCRE2_ENDANCHORED 0x20000000u + +/* The following option bits can be passed only to pcre2_compile(). However, +they may affect compilation, JIT compilation, and/or interpretive execution. +The following tags indicate which: + +C alters what is compiled by pcre2_compile() +J alters what is compiled by pcre2_jit_compile() +M is inspected during pcre2_match() execution +D is inspected during pcre2_dfa_match() execution +*/ + +#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */ +#define PCRE2_ALT_BSUX 0x00000002u /* C */ +#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */ +#define PCRE2_CASELESS 0x00000008u /* C */ +#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */ +#define PCRE2_DOTALL 0x00000020u /* C */ +#define PCRE2_DUPNAMES 0x00000040u /* C */ +#define PCRE2_EXTENDED 0x00000080u /* C */ +#define PCRE2_FIRSTLINE 0x00000100u /* J M D */ +#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */ +#define PCRE2_MULTILINE 0x00000400u /* C */ +#define PCRE2_NEVER_UCP 0x00000800u /* C */ +#define PCRE2_NEVER_UTF 0x00001000u /* C */ +#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */ +#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */ +#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */ +#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */ +#define PCRE2_UCP 0x00020000u /* C J M D */ +#define PCRE2_UNGREEDY 0x00040000u /* C */ +#define PCRE2_UTF 0x00080000u /* C J M D */ +#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ +#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ +#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ +#define PCRE2_EXTENDED_MORE 0x01000000u /* C */ +#define PCRE2_LITERAL 0x02000000u /* C */ +#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ + +/* An additional compile options word is available in the compile context. */ + +#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */ +#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */ +#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ +#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ +#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ +#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ +#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ +#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ +#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */ +#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ +#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ +#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ +#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ + +/* These are for pcre2_jit_compile(). */ + +#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */ +#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u +#define PCRE2_JIT_PARTIAL_HARD 0x00000004u +#define PCRE2_JIT_INVALID_UTF 0x00000100u + +/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and +pcre2_substitute(). Some are allowed only for one of the functions, and in +these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and +PCRE2_NO_UTF_CHECK can also be passed to these functions (though +pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */ + +#define PCRE2_NOTBOL 0x00000001u +#define PCRE2_NOTEOL 0x00000002u +#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ +#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ +#define PCRE2_PARTIAL_SOFT 0x00000010u +#define PCRE2_PARTIAL_HARD 0x00000020u +#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */ +#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */ +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */ +#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */ +#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u +#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */ +#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */ + +/* Options for pcre2_pattern_convert(). */ + +#define PCRE2_CONVERT_UTF 0x00000001u +#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u +#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u +#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u +#define PCRE2_CONVERT_GLOB 0x00000010u +#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u +#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u + +/* Newline and \R settings, for use in compile contexts. The newline values +must be kept in step with values set in config.h and both sets must all be +greater than zero. */ + +#define PCRE2_NEWLINE_CR 1 +#define PCRE2_NEWLINE_LF 2 +#define PCRE2_NEWLINE_CRLF 3 +#define PCRE2_NEWLINE_ANY 4 +#define PCRE2_NEWLINE_ANYCRLF 5 +#define PCRE2_NEWLINE_NUL 6 + +#define PCRE2_BSR_UNICODE 1 +#define PCRE2_BSR_ANYCRLF 2 + +/* Error codes for pcre2_compile(). Some of these are also used by +pcre2_pattern_convert(). */ + +#define PCRE2_ERROR_END_BACKSLASH 101 +#define PCRE2_ERROR_END_BACKSLASH_C 102 +#define PCRE2_ERROR_UNKNOWN_ESCAPE 103 +#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104 +#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105 +#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107 +#define PCRE2_ERROR_CLASS_RANGE_ORDER 108 +#define PCRE2_ERROR_QUANTIFIER_INVALID 109 +#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110 +#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111 +#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112 +#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113 +#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114 +#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115 +#define PCRE2_ERROR_NULL_PATTERN 116 +#define PCRE2_ERROR_BAD_OPTIONS 117 +#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118 +#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119 +#define PCRE2_ERROR_PATTERN_TOO_LARGE 120 +#define PCRE2_ERROR_HEAP_FAILED 121 +#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122 +#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123 +#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124 +#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125 +#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126 +#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127 +#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128 +#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129 +#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130 +#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131 +#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132 +#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133 +#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134 +#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135 +#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136 +#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137 +#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138 +#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140 +#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141 +#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142 +#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143 +#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144 +#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145 +#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146 +#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147 +#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148 +#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149 +#define PCRE2_ERROR_CLASS_INVALID_RANGE 150 +#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151 +#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152 +#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153 +#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154 +#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155 +#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 +#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 +#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 +/* Error 159 is obsolete and should now never occur */ +#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 +#define PCRE2_ERROR_VERB_UNKNOWN 160 +#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 +#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162 +#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163 +#define PCRE2_ERROR_INVALID_OCTAL 164 +#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165 +#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166 +#define PCRE2_ERROR_INVALID_HEXADECIMAL 167 +#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168 +#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170 +#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171 +#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172 +#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173 +#define PCRE2_ERROR_UTF_IS_DISABLED 174 +#define PCRE2_ERROR_UCP_IS_DISABLED 175 +#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176 +#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177 +#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178 +#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180 +#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181 +#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182 +#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183 +#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184 +#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185 +#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186 +#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187 +#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188 +#define PCRE2_ERROR_INTERNAL_BAD_CODE 189 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 +#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 +#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 +#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 +#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 +#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 +#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 +#define PCRE2_ERROR_TOO_MANY_CAPTURES 197 +#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 + + +/* "Expected" matching error codes: no match and partial match. */ + +#define PCRE2_ERROR_NOMATCH (-1) +#define PCRE2_ERROR_PARTIAL (-2) + +/* Error codes for UTF-8 validity checks */ + +#define PCRE2_ERROR_UTF8_ERR1 (-3) +#define PCRE2_ERROR_UTF8_ERR2 (-4) +#define PCRE2_ERROR_UTF8_ERR3 (-5) +#define PCRE2_ERROR_UTF8_ERR4 (-6) +#define PCRE2_ERROR_UTF8_ERR5 (-7) +#define PCRE2_ERROR_UTF8_ERR6 (-8) +#define PCRE2_ERROR_UTF8_ERR7 (-9) +#define PCRE2_ERROR_UTF8_ERR8 (-10) +#define PCRE2_ERROR_UTF8_ERR9 (-11) +#define PCRE2_ERROR_UTF8_ERR10 (-12) +#define PCRE2_ERROR_UTF8_ERR11 (-13) +#define PCRE2_ERROR_UTF8_ERR12 (-14) +#define PCRE2_ERROR_UTF8_ERR13 (-15) +#define PCRE2_ERROR_UTF8_ERR14 (-16) +#define PCRE2_ERROR_UTF8_ERR15 (-17) +#define PCRE2_ERROR_UTF8_ERR16 (-18) +#define PCRE2_ERROR_UTF8_ERR17 (-19) +#define PCRE2_ERROR_UTF8_ERR18 (-20) +#define PCRE2_ERROR_UTF8_ERR19 (-21) +#define PCRE2_ERROR_UTF8_ERR20 (-22) +#define PCRE2_ERROR_UTF8_ERR21 (-23) + +/* Error codes for UTF-16 validity checks */ + +#define PCRE2_ERROR_UTF16_ERR1 (-24) +#define PCRE2_ERROR_UTF16_ERR2 (-25) +#define PCRE2_ERROR_UTF16_ERR3 (-26) + +/* Error codes for UTF-32 validity checks */ + +#define PCRE2_ERROR_UTF32_ERR1 (-27) +#define PCRE2_ERROR_UTF32_ERR2 (-28) + +/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction +functions, context functions, and serializing functions. They are in numerical +order. Originally they were in alphabetical order too, but now that PCRE2 is +released, the numbers must not be changed. */ + +#define PCRE2_ERROR_BADDATA (-29) +#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */ +#define PCRE2_ERROR_BADMAGIC (-31) +#define PCRE2_ERROR_BADMODE (-32) +#define PCRE2_ERROR_BADOFFSET (-33) +#define PCRE2_ERROR_BADOPTION (-34) +#define PCRE2_ERROR_BADREPLACEMENT (-35) +#define PCRE2_ERROR_BADUTFOFFSET (-36) +#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ +#define PCRE2_ERROR_DFA_BADRESTART (-38) +#define PCRE2_ERROR_DFA_RECURSE (-39) +#define PCRE2_ERROR_DFA_UCOND (-40) +#define PCRE2_ERROR_DFA_UFUNC (-41) +#define PCRE2_ERROR_DFA_UITEM (-42) +#define PCRE2_ERROR_DFA_WSSIZE (-43) +#define PCRE2_ERROR_INTERNAL (-44) +#define PCRE2_ERROR_JIT_BADOPTION (-45) +#define PCRE2_ERROR_JIT_STACKLIMIT (-46) +#define PCRE2_ERROR_MATCHLIMIT (-47) +#define PCRE2_ERROR_NOMEMORY (-48) +#define PCRE2_ERROR_NOSUBSTRING (-49) +#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50) +#define PCRE2_ERROR_NULL (-51) +#define PCRE2_ERROR_RECURSELOOP (-52) +#define PCRE2_ERROR_DEPTHLIMIT (-53) +#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */ +#define PCRE2_ERROR_UNAVAILABLE (-54) +#define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) +#define PCRE2_ERROR_BADREPESCAPE (-57) +#define PCRE2_ERROR_REPMISSINGBRACE (-58) +#define PCRE2_ERROR_BADSUBSTITUTION (-59) +#define PCRE2_ERROR_BADSUBSPATTERN (-60) +#define PCRE2_ERROR_TOOMANYREPLACE (-61) +#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) +#define PCRE2_ERROR_HEAPLIMIT (-63) +#define PCRE2_ERROR_CONVERT_SYNTAX (-64) +#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) +#define PCRE2_ERROR_DFA_UINVALID_UTF (-66) +#define PCRE2_ERROR_INVALIDOFFSET (-67) + + +/* Request types for pcre2_pattern_info() */ + +#define PCRE2_INFO_ALLOPTIONS 0 +#define PCRE2_INFO_ARGOPTIONS 1 +#define PCRE2_INFO_BACKREFMAX 2 +#define PCRE2_INFO_BSR 3 +#define PCRE2_INFO_CAPTURECOUNT 4 +#define PCRE2_INFO_FIRSTCODEUNIT 5 +#define PCRE2_INFO_FIRSTCODETYPE 6 +#define PCRE2_INFO_FIRSTBITMAP 7 +#define PCRE2_INFO_HASCRORLF 8 +#define PCRE2_INFO_JCHANGED 9 +#define PCRE2_INFO_JITSIZE 10 +#define PCRE2_INFO_LASTCODEUNIT 11 +#define PCRE2_INFO_LASTCODETYPE 12 +#define PCRE2_INFO_MATCHEMPTY 13 +#define PCRE2_INFO_MATCHLIMIT 14 +#define PCRE2_INFO_MAXLOOKBEHIND 15 +#define PCRE2_INFO_MINLENGTH 16 +#define PCRE2_INFO_NAMECOUNT 17 +#define PCRE2_INFO_NAMEENTRYSIZE 18 +#define PCRE2_INFO_NAMETABLE 19 +#define PCRE2_INFO_NEWLINE 20 +#define PCRE2_INFO_DEPTHLIMIT 21 +#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */ +#define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 +#define PCRE2_INFO_FRAMESIZE 24 +#define PCRE2_INFO_HEAPLIMIT 25 +#define PCRE2_INFO_EXTRAOPTIONS 26 + +/* Request types for pcre2_config(). */ + +#define PCRE2_CONFIG_BSR 0 +#define PCRE2_CONFIG_JIT 1 +#define PCRE2_CONFIG_JITTARGET 2 +#define PCRE2_CONFIG_LINKSIZE 3 +#define PCRE2_CONFIG_MATCHLIMIT 4 +#define PCRE2_CONFIG_NEWLINE 5 +#define PCRE2_CONFIG_PARENSLIMIT 6 +#define PCRE2_CONFIG_DEPTHLIMIT 7 +#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */ +#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */ +#define PCRE2_CONFIG_UNICODE 9 +#define PCRE2_CONFIG_UNICODE_VERSION 10 +#define PCRE2_CONFIG_VERSION 11 +#define PCRE2_CONFIG_HEAPLIMIT 12 +#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13 +#define PCRE2_CONFIG_COMPILED_WIDTHS 14 +#define PCRE2_CONFIG_TABLES_LENGTH 15 + + +/* Types for code units in patterns and subject strings. */ + +typedef uint8_t PCRE2_UCHAR8; +typedef uint16_t PCRE2_UCHAR16; +typedef uint32_t PCRE2_UCHAR32; + +typedef const PCRE2_UCHAR8 *PCRE2_SPTR8; +typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; +typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; + +/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2, +including pattern offsets for errors and subject offsets after a match. We +define special values to indicate zero-terminated strings and unset offsets in +the offset vector (ovector). */ + +#define PCRE2_SIZE size_t +#define PCRE2_SIZE_MAX SIZE_MAX +#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) +#define PCRE2_UNSET (~(PCRE2_SIZE)0) + +/* Generic types for opaque structures and JIT callback functions. These +declarations are defined in a macro that is expanded for each width later. */ + +#define PCRE2_TYPES_LIST \ +struct pcre2_real_general_context; \ +typedef struct pcre2_real_general_context pcre2_general_context; \ +\ +struct pcre2_real_compile_context; \ +typedef struct pcre2_real_compile_context pcre2_compile_context; \ +\ +struct pcre2_real_match_context; \ +typedef struct pcre2_real_match_context pcre2_match_context; \ +\ +struct pcre2_real_convert_context; \ +typedef struct pcre2_real_convert_context pcre2_convert_context; \ +\ +struct pcre2_real_code; \ +typedef struct pcre2_real_code pcre2_code; \ +\ +struct pcre2_real_match_data; \ +typedef struct pcre2_real_match_data pcre2_match_data; \ +\ +struct pcre2_real_jit_stack; \ +typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ +\ +typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); + + +/* The structures for passing out data via callout functions. We use structures +so that new fields can be added on the end in future versions, without changing +the API of the function, thereby allowing old clients to work without +modification. Define the generic versions in a macro; the width-specific +versions are generated from this macro below. */ + +/* Flags for the callout_flags field. These are cleared after a callout. */ + +#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */ +#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */ + +#define PCRE2_STRUCTURE_LIST \ +typedef struct pcre2_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + uint32_t capture_top; /* Max current capture */ \ + uint32_t capture_last; /* Most recently closed capture */ \ + PCRE2_SIZE *offset_vector; /* The offset vector */ \ + PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ + PCRE2_SPTR subject; /* The subject being matched */ \ + PCRE2_SIZE subject_length; /* The length of the subject */ \ + PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \ + PCRE2_SIZE current_position; /* Where we currently are in the subject */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + /* ------------------- Added for Version 1 -------------------------- */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------- Added for Version 2 -------------------------- */ \ + uint32_t callout_flags; /* See above for list */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_block; \ +\ +typedef struct pcre2_callout_enumerate_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_enumerate_block; \ +\ +typedef struct pcre2_substitute_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SPTR input; /* Pointer to input subject string */ \ + PCRE2_SPTR output; /* Pointer to output buffer */ \ + PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \ + PCRE2_SIZE *ovector; /* Pointer to current ovector */ \ + uint32_t oveccount; /* Count of pairs set in ovector */ \ + uint32_t subscount; /* Substitution number */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_substitute_callout_block; + + +/* List the generic forms of all other functions in macros, which will be +expanded for each width below. Start with functions that give general +information. */ + +#define PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); + + +/* Functions for manipulating contexts. */ + +#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_create(void *(*)(size_t, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_general_context_free(pcre2_general_context *); + +#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_newline(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ + int (*)(uint32_t, void *), void *); + +#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_callout(pcre2_match_context *, \ + int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_memory_management(pcre2_match_context *, \ + void *(*)(size_t, void *), void (*)(void *, void *), void *); + +#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_copy(pcre2_convert_context *); \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_convert_context_free(pcre2_convert_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); + + +/* Functions concerned with compiling a pattern to PCRE internal code. */ + +#define PCRE2_COMPILE_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ + pcre2_compile_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_code_free(pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy_with_tables(const pcre2_code *); + + +/* Functions that give information about a compiled pattern. */ + +#define PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_callout_enumerate(const pcre2_code *, \ + int (*)(pcre2_callout_enumerate_block *, void *), void *); + + +/* Functions for running a match and inspecting the result. */ + +#define PCRE2_MATCH_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create_from_pattern(const pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_data_free(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ + pcre2_get_mark(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_match_data_size(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_match_data_heapframes_size(pcre2_match_data *); \ +PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_startchar(pcre2_match_data *); + + +/* Convenience functions for handling matched substrings. */ + +#define PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ + PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_list_free(PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + +/* Functions for serializing / deserializing compiled patterns. */ + +#define PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ + PCRE2_SIZE *, pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_get_number_of_codes(const uint8_t *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_serialize_free(uint8_t *); + + +/* Convenience function for match + substitute. */ + +#define PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ + PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); + + +/* Functions for converting pattern source strings. */ + +#define PCRE2_CONVERT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *, pcre2_convert_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_converted_pattern_free(PCRE2_UCHAR *); + + +/* Functions for JIT processing */ + +#define PCRE2_JIT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_free_unused_memory(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_free(pcre2_jit_stack *); + + +/* Other miscellaneous functions. */ + +#define PCRE2_OTHER_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ +PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \ + pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_maketables_free(pcre2_general_context *, const uint8_t *); + +/* Define macros that generate width-specific names from generic versions. The +three-level macro scheme is necessary to get the macros expanded when we want +them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for +generating three versions of everything below. After that, PCRE2_SUFFIX will be +re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as +pcre2_compile are called by application code. */ + +#define PCRE2_JOIN(a,b) a ## b +#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b) +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH) + + +/* Data types */ + +#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) +#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) + +#define pcre2_code PCRE2_SUFFIX(pcre2_code_) +#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) +#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) + +#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) +#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) +#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) +#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_) +#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) +#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) +#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) + + +/* Data blocks */ + +#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) +#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_) +#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_) +#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) +#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) +#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_) +#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) +#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) + + +/* Functions: the complete list in alphabetical order */ + +#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) +#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) +#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) +#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) +#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) +#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) +#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) +#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) +#define pcre2_config PCRE2_SUFFIX(pcre2_config_) +#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_) +#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_) +#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_) +#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_) +#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) +#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) +#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) +#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) +#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) +#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) +#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_) +#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_) +#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) +#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) +#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) +#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) +#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) +#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) +#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) +#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_) +#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) +#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_) +#define pcre2_match PCRE2_SUFFIX(pcre2_match_) +#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) +#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) +#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) +#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) +#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) +#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) +#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) +#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_) +#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_) +#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_) +#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_) +#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_) +#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) +#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) +#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) +#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) +#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) +#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) +#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) +#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) +#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) +#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_) +#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_) +#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) +#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) +#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) +#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) +#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) +#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) +#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) +#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) +#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) +#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) +#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) +#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) +#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) + +/* Keep this old function name for backwards compatibility */ +#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) + +/* Keep this obsolete function for backwards compatibility: it is now a noop. */ +#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) + +/* Now generate all three sets of width-specific structures and function +prototypes. */ + +#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \ +PCRE2_TYPES_LIST \ +PCRE2_STRUCTURE_LIST \ +PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_FUNCTIONS \ +PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_FUNCTIONS \ +PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_MATCH_FUNCTIONS \ +PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_JIT_FUNCTIONS \ +PCRE2_OTHER_FUNCTIONS + +#define PCRE2_LOCAL_WIDTH 8 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 16 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 32 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +/* Undefine the list macros; they are no longer needed. */ + +#undef PCRE2_TYPES_LIST +#undef PCRE2_STRUCTURE_LIST +#undef PCRE2_GENERAL_INFO_FUNCTIONS +#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS +#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS +#undef PCRE2_MATCH_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_FUNCTIONS +#undef PCRE2_PATTERN_INFO_FUNCTIONS +#undef PCRE2_MATCH_FUNCTIONS +#undef PCRE2_SUBSTRING_FUNCTIONS +#undef PCRE2_SERIALIZE_FUNCTIONS +#undef PCRE2_SUBSTITUTE_FUNCTION +#undef PCRE2_JIT_FUNCTIONS +#undef PCRE2_OTHER_FUNCTIONS +#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS + +/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine +PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make +PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ + +#undef PCRE2_SUFFIX +#ifndef PCRE2_CODE_UNIT_WIDTH +#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h. +#error Use 8, 16, or 32; or 0 for a multi-width application. +#else /* PCRE2_CODE_UNIT_WIDTH is defined */ +#if PCRE2_CODE_UNIT_WIDTH == 8 || \ + PCRE2_CODE_UNIT_WIDTH == 16 || \ + PCRE2_CODE_UNIT_WIDTH == 32 +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) +#elif PCRE2_CODE_UNIT_WIDTH == 0 +#undef PCRE2_JOIN +#undef PCRE2_GLUE +#define PCRE2_SUFFIX(a) a +#else +#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32. +#endif +#endif /* PCRE2_CODE_UNIT_WIDTH is defined */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PCRE2_H_IDEMPOTENT_GUARD */ + +/* End of pcre2.h */ diff --git a/src/pcre2.h.in b/src/pcre2.h.in new file mode 100644 index 0000000..b43534b --- /dev/null +++ b/src/pcre2.h.in @@ -0,0 +1,1010 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This is the public header file for the PCRE library, second API, to be +#included by applications that call PCRE2 functions. + + Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_H_IDEMPOTENT_GUARD +#define PCRE2_H_IDEMPOTENT_GUARD + +/* The current PCRE version information. */ + +#define PCRE2_MAJOR @PCRE2_MAJOR@ +#define PCRE2_MINOR @PCRE2_MINOR@ +#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@ +#define PCRE2_DATE @PCRE2_DATE@ + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE2, the appropriate +export setting is defined in pcre2_internal.h, which includes this file. So we +don't change existing definitions of PCRE2_EXP_DECL. */ + +#if defined(_WIN32) && !defined(PCRE2_STATIC) +# ifndef PCRE2_EXP_DECL +# define PCRE2_EXP_DECL extern __declspec(dllimport) +# endif +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE2_EXP_DECL +# ifdef __cplusplus +# define PCRE2_EXP_DECL extern "C" +# else +# define PCRE2_EXP_DECL extern +# endif +#endif + +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + +/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and +uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do +not have stdint.h, which is why we use inttypes.h, which according to the C +standard is a superset of stdint.h. If inttypes.h is not available the build +will break and the relevant values must be provided by some other means. */ + +#include +#include +#include + +/* Allow for C++ users compiling this directly. */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* The following option bits can be passed to pcre2_compile(), pcre2_match(), +or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it +is passed. Put these bits at the most significant end of the options word so +others can be added next to them */ + +#define PCRE2_ANCHORED 0x80000000u +#define PCRE2_NO_UTF_CHECK 0x40000000u +#define PCRE2_ENDANCHORED 0x20000000u + +/* The following option bits can be passed only to pcre2_compile(). However, +they may affect compilation, JIT compilation, and/or interpretive execution. +The following tags indicate which: + +C alters what is compiled by pcre2_compile() +J alters what is compiled by pcre2_jit_compile() +M is inspected during pcre2_match() execution +D is inspected during pcre2_dfa_match() execution +*/ + +#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */ +#define PCRE2_ALT_BSUX 0x00000002u /* C */ +#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */ +#define PCRE2_CASELESS 0x00000008u /* C */ +#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */ +#define PCRE2_DOTALL 0x00000020u /* C */ +#define PCRE2_DUPNAMES 0x00000040u /* C */ +#define PCRE2_EXTENDED 0x00000080u /* C */ +#define PCRE2_FIRSTLINE 0x00000100u /* J M D */ +#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */ +#define PCRE2_MULTILINE 0x00000400u /* C */ +#define PCRE2_NEVER_UCP 0x00000800u /* C */ +#define PCRE2_NEVER_UTF 0x00001000u /* C */ +#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */ +#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */ +#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */ +#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */ +#define PCRE2_UCP 0x00020000u /* C J M D */ +#define PCRE2_UNGREEDY 0x00040000u /* C */ +#define PCRE2_UTF 0x00080000u /* C J M D */ +#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ +#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ +#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ +#define PCRE2_EXTENDED_MORE 0x01000000u /* C */ +#define PCRE2_LITERAL 0x02000000u /* C */ +#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ + +/* An additional compile options word is available in the compile context. */ + +#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */ +#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */ +#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ +#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ +#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ +#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ +#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */ +#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */ +#define PCRE2_EXTRA_ASCII_BSD 0x00000100u /* C */ +#define PCRE2_EXTRA_ASCII_BSS 0x00000200u /* C */ +#define PCRE2_EXTRA_ASCII_BSW 0x00000400u /* C */ +#define PCRE2_EXTRA_ASCII_POSIX 0x00000800u /* C */ +#define PCRE2_EXTRA_ASCII_DIGIT 0x00001000u /* C */ + +/* These are for pcre2_jit_compile(). */ + +#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */ +#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u +#define PCRE2_JIT_PARTIAL_HARD 0x00000004u +#define PCRE2_JIT_INVALID_UTF 0x00000100u + +/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and +pcre2_substitute(). Some are allowed only for one of the functions, and in +these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and +PCRE2_NO_UTF_CHECK can also be passed to these functions (though +pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */ + +#define PCRE2_NOTBOL 0x00000001u +#define PCRE2_NOTEOL 0x00000002u +#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ +#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ +#define PCRE2_PARTIAL_SOFT 0x00000010u +#define PCRE2_PARTIAL_HARD 0x00000020u +#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */ +#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */ +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */ +#define PCRE2_NO_JIT 0x00002000u /* not for pcre2_dfa_match() */ +#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u +#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */ +#define PCRE2_DISABLE_RECURSELOOP_CHECK 0x00040000u /* not for pcre2_dfa_match() or pcre2_jit_match() */ + +/* Options for pcre2_pattern_convert(). */ + +#define PCRE2_CONVERT_UTF 0x00000001u +#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u +#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u +#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u +#define PCRE2_CONVERT_GLOB 0x00000010u +#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u +#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u + +/* Newline and \R settings, for use in compile contexts. The newline values +must be kept in step with values set in config.h and both sets must all be +greater than zero. */ + +#define PCRE2_NEWLINE_CR 1 +#define PCRE2_NEWLINE_LF 2 +#define PCRE2_NEWLINE_CRLF 3 +#define PCRE2_NEWLINE_ANY 4 +#define PCRE2_NEWLINE_ANYCRLF 5 +#define PCRE2_NEWLINE_NUL 6 + +#define PCRE2_BSR_UNICODE 1 +#define PCRE2_BSR_ANYCRLF 2 + +/* Error codes for pcre2_compile(). Some of these are also used by +pcre2_pattern_convert(). */ + +#define PCRE2_ERROR_END_BACKSLASH 101 +#define PCRE2_ERROR_END_BACKSLASH_C 102 +#define PCRE2_ERROR_UNKNOWN_ESCAPE 103 +#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104 +#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105 +#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107 +#define PCRE2_ERROR_CLASS_RANGE_ORDER 108 +#define PCRE2_ERROR_QUANTIFIER_INVALID 109 +#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110 +#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111 +#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112 +#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113 +#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114 +#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115 +#define PCRE2_ERROR_NULL_PATTERN 116 +#define PCRE2_ERROR_BAD_OPTIONS 117 +#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118 +#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119 +#define PCRE2_ERROR_PATTERN_TOO_LARGE 120 +#define PCRE2_ERROR_HEAP_FAILED 121 +#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122 +#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123 +#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124 +#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125 +#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126 +#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127 +#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128 +#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129 +#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130 +#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131 +#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132 +#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133 +#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134 +#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135 +#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136 +#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137 +#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138 +#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140 +#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141 +#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142 +#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143 +#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144 +#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145 +#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146 +#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147 +#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148 +#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149 +#define PCRE2_ERROR_CLASS_INVALID_RANGE 150 +#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151 +#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152 +#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153 +#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154 +#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155 +#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 +#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 +#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 +/* Error 159 is obsolete and should now never occur */ +#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 +#define PCRE2_ERROR_VERB_UNKNOWN 160 +#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 +#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162 +#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163 +#define PCRE2_ERROR_INVALID_OCTAL 164 +#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165 +#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166 +#define PCRE2_ERROR_INVALID_HEXADECIMAL 167 +#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168 +#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170 +#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171 +#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172 +#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173 +#define PCRE2_ERROR_UTF_IS_DISABLED 174 +#define PCRE2_ERROR_UCP_IS_DISABLED 175 +#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176 +#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177 +#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178 +#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180 +#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181 +#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182 +#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183 +#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184 +#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185 +#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186 +#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187 +#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188 +#define PCRE2_ERROR_INTERNAL_BAD_CODE 189 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 +#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 +#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 +#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 +#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 +#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 +#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 +#define PCRE2_ERROR_TOO_MANY_CAPTURES 197 +#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 + + +/* "Expected" matching error codes: no match and partial match. */ + +#define PCRE2_ERROR_NOMATCH (-1) +#define PCRE2_ERROR_PARTIAL (-2) + +/* Error codes for UTF-8 validity checks */ + +#define PCRE2_ERROR_UTF8_ERR1 (-3) +#define PCRE2_ERROR_UTF8_ERR2 (-4) +#define PCRE2_ERROR_UTF8_ERR3 (-5) +#define PCRE2_ERROR_UTF8_ERR4 (-6) +#define PCRE2_ERROR_UTF8_ERR5 (-7) +#define PCRE2_ERROR_UTF8_ERR6 (-8) +#define PCRE2_ERROR_UTF8_ERR7 (-9) +#define PCRE2_ERROR_UTF8_ERR8 (-10) +#define PCRE2_ERROR_UTF8_ERR9 (-11) +#define PCRE2_ERROR_UTF8_ERR10 (-12) +#define PCRE2_ERROR_UTF8_ERR11 (-13) +#define PCRE2_ERROR_UTF8_ERR12 (-14) +#define PCRE2_ERROR_UTF8_ERR13 (-15) +#define PCRE2_ERROR_UTF8_ERR14 (-16) +#define PCRE2_ERROR_UTF8_ERR15 (-17) +#define PCRE2_ERROR_UTF8_ERR16 (-18) +#define PCRE2_ERROR_UTF8_ERR17 (-19) +#define PCRE2_ERROR_UTF8_ERR18 (-20) +#define PCRE2_ERROR_UTF8_ERR19 (-21) +#define PCRE2_ERROR_UTF8_ERR20 (-22) +#define PCRE2_ERROR_UTF8_ERR21 (-23) + +/* Error codes for UTF-16 validity checks */ + +#define PCRE2_ERROR_UTF16_ERR1 (-24) +#define PCRE2_ERROR_UTF16_ERR2 (-25) +#define PCRE2_ERROR_UTF16_ERR3 (-26) + +/* Error codes for UTF-32 validity checks */ + +#define PCRE2_ERROR_UTF32_ERR1 (-27) +#define PCRE2_ERROR_UTF32_ERR2 (-28) + +/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction +functions, context functions, and serializing functions. They are in numerical +order. Originally they were in alphabetical order too, but now that PCRE2 is +released, the numbers must not be changed. */ + +#define PCRE2_ERROR_BADDATA (-29) +#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */ +#define PCRE2_ERROR_BADMAGIC (-31) +#define PCRE2_ERROR_BADMODE (-32) +#define PCRE2_ERROR_BADOFFSET (-33) +#define PCRE2_ERROR_BADOPTION (-34) +#define PCRE2_ERROR_BADREPLACEMENT (-35) +#define PCRE2_ERROR_BADUTFOFFSET (-36) +#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ +#define PCRE2_ERROR_DFA_BADRESTART (-38) +#define PCRE2_ERROR_DFA_RECURSE (-39) +#define PCRE2_ERROR_DFA_UCOND (-40) +#define PCRE2_ERROR_DFA_UFUNC (-41) +#define PCRE2_ERROR_DFA_UITEM (-42) +#define PCRE2_ERROR_DFA_WSSIZE (-43) +#define PCRE2_ERROR_INTERNAL (-44) +#define PCRE2_ERROR_JIT_BADOPTION (-45) +#define PCRE2_ERROR_JIT_STACKLIMIT (-46) +#define PCRE2_ERROR_MATCHLIMIT (-47) +#define PCRE2_ERROR_NOMEMORY (-48) +#define PCRE2_ERROR_NOSUBSTRING (-49) +#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50) +#define PCRE2_ERROR_NULL (-51) +#define PCRE2_ERROR_RECURSELOOP (-52) +#define PCRE2_ERROR_DEPTHLIMIT (-53) +#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */ +#define PCRE2_ERROR_UNAVAILABLE (-54) +#define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) +#define PCRE2_ERROR_BADREPESCAPE (-57) +#define PCRE2_ERROR_REPMISSINGBRACE (-58) +#define PCRE2_ERROR_BADSUBSTITUTION (-59) +#define PCRE2_ERROR_BADSUBSPATTERN (-60) +#define PCRE2_ERROR_TOOMANYREPLACE (-61) +#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) +#define PCRE2_ERROR_HEAPLIMIT (-63) +#define PCRE2_ERROR_CONVERT_SYNTAX (-64) +#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) +#define PCRE2_ERROR_DFA_UINVALID_UTF (-66) +#define PCRE2_ERROR_INVALIDOFFSET (-67) + + +/* Request types for pcre2_pattern_info() */ + +#define PCRE2_INFO_ALLOPTIONS 0 +#define PCRE2_INFO_ARGOPTIONS 1 +#define PCRE2_INFO_BACKREFMAX 2 +#define PCRE2_INFO_BSR 3 +#define PCRE2_INFO_CAPTURECOUNT 4 +#define PCRE2_INFO_FIRSTCODEUNIT 5 +#define PCRE2_INFO_FIRSTCODETYPE 6 +#define PCRE2_INFO_FIRSTBITMAP 7 +#define PCRE2_INFO_HASCRORLF 8 +#define PCRE2_INFO_JCHANGED 9 +#define PCRE2_INFO_JITSIZE 10 +#define PCRE2_INFO_LASTCODEUNIT 11 +#define PCRE2_INFO_LASTCODETYPE 12 +#define PCRE2_INFO_MATCHEMPTY 13 +#define PCRE2_INFO_MATCHLIMIT 14 +#define PCRE2_INFO_MAXLOOKBEHIND 15 +#define PCRE2_INFO_MINLENGTH 16 +#define PCRE2_INFO_NAMECOUNT 17 +#define PCRE2_INFO_NAMEENTRYSIZE 18 +#define PCRE2_INFO_NAMETABLE 19 +#define PCRE2_INFO_NEWLINE 20 +#define PCRE2_INFO_DEPTHLIMIT 21 +#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */ +#define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 +#define PCRE2_INFO_FRAMESIZE 24 +#define PCRE2_INFO_HEAPLIMIT 25 +#define PCRE2_INFO_EXTRAOPTIONS 26 + +/* Request types for pcre2_config(). */ + +#define PCRE2_CONFIG_BSR 0 +#define PCRE2_CONFIG_JIT 1 +#define PCRE2_CONFIG_JITTARGET 2 +#define PCRE2_CONFIG_LINKSIZE 3 +#define PCRE2_CONFIG_MATCHLIMIT 4 +#define PCRE2_CONFIG_NEWLINE 5 +#define PCRE2_CONFIG_PARENSLIMIT 6 +#define PCRE2_CONFIG_DEPTHLIMIT 7 +#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */ +#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */ +#define PCRE2_CONFIG_UNICODE 9 +#define PCRE2_CONFIG_UNICODE_VERSION 10 +#define PCRE2_CONFIG_VERSION 11 +#define PCRE2_CONFIG_HEAPLIMIT 12 +#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13 +#define PCRE2_CONFIG_COMPILED_WIDTHS 14 +#define PCRE2_CONFIG_TABLES_LENGTH 15 + + +/* Types for code units in patterns and subject strings. */ + +typedef uint8_t PCRE2_UCHAR8; +typedef uint16_t PCRE2_UCHAR16; +typedef uint32_t PCRE2_UCHAR32; + +typedef const PCRE2_UCHAR8 *PCRE2_SPTR8; +typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; +typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; + +/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2, +including pattern offsets for errors and subject offsets after a match. We +define special values to indicate zero-terminated strings and unset offsets in +the offset vector (ovector). */ + +#define PCRE2_SIZE size_t +#define PCRE2_SIZE_MAX SIZE_MAX +#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) +#define PCRE2_UNSET (~(PCRE2_SIZE)0) + +/* Generic types for opaque structures and JIT callback functions. These +declarations are defined in a macro that is expanded for each width later. */ + +#define PCRE2_TYPES_LIST \ +struct pcre2_real_general_context; \ +typedef struct pcre2_real_general_context pcre2_general_context; \ +\ +struct pcre2_real_compile_context; \ +typedef struct pcre2_real_compile_context pcre2_compile_context; \ +\ +struct pcre2_real_match_context; \ +typedef struct pcre2_real_match_context pcre2_match_context; \ +\ +struct pcre2_real_convert_context; \ +typedef struct pcre2_real_convert_context pcre2_convert_context; \ +\ +struct pcre2_real_code; \ +typedef struct pcre2_real_code pcre2_code; \ +\ +struct pcre2_real_match_data; \ +typedef struct pcre2_real_match_data pcre2_match_data; \ +\ +struct pcre2_real_jit_stack; \ +typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ +\ +typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); + + +/* The structures for passing out data via callout functions. We use structures +so that new fields can be added on the end in future versions, without changing +the API of the function, thereby allowing old clients to work without +modification. Define the generic versions in a macro; the width-specific +versions are generated from this macro below. */ + +/* Flags for the callout_flags field. These are cleared after a callout. */ + +#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */ +#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */ + +#define PCRE2_STRUCTURE_LIST \ +typedef struct pcre2_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + uint32_t capture_top; /* Max current capture */ \ + uint32_t capture_last; /* Most recently closed capture */ \ + PCRE2_SIZE *offset_vector; /* The offset vector */ \ + PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ + PCRE2_SPTR subject; /* The subject being matched */ \ + PCRE2_SIZE subject_length; /* The length of the subject */ \ + PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \ + PCRE2_SIZE current_position; /* Where we currently are in the subject */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + /* ------------------- Added for Version 1 -------------------------- */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------- Added for Version 2 -------------------------- */ \ + uint32_t callout_flags; /* See above for list */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_block; \ +\ +typedef struct pcre2_callout_enumerate_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_enumerate_block; \ +\ +typedef struct pcre2_substitute_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SPTR input; /* Pointer to input subject string */ \ + PCRE2_SPTR output; /* Pointer to output buffer */ \ + PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \ + PCRE2_SIZE *ovector; /* Pointer to current ovector */ \ + uint32_t oveccount; /* Count of pairs set in ovector */ \ + uint32_t subscount; /* Substitution number */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_substitute_callout_block; + + +/* List the generic forms of all other functions in macros, which will be +expanded for each width below. Start with functions that give general +information. */ + +#define PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); + + +/* Functions for manipulating contexts. */ + +#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \ + pcre2_general_context_create(void *(*)(size_t, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_general_context_free(pcre2_general_context *); + +#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \ + pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_newline(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ + int (*)(uint32_t, void *), void *); + +#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \ + pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_callout(pcre2_match_context *, \ + int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_memory_management(pcre2_match_context *, \ + void *(*)(size_t, void *), void (*)(void *, void *), void *); + +#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_copy(pcre2_convert_context *); \ +PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \ + pcre2_convert_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_convert_context_free(pcre2_convert_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); + + +/* Functions concerned with compiling a pattern to PCRE internal code. */ + +#define PCRE2_COMPILE_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ + pcre2_compile_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_code_free(pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \ + pcre2_code_copy_with_tables(const pcre2_code *); + + +/* Functions that give information about a compiled pattern. */ + +#define PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_callout_enumerate(const pcre2_code *, \ + int (*)(pcre2_callout_enumerate_block *, void *), void *); + + +/* Functions for running a match and inspecting the result. */ + +#define PCRE2_MATCH_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \ + pcre2_match_data_create_from_pattern(const pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_data_free(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ + pcre2_get_mark(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_match_data_size(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_match_data_heapframes_size(pcre2_match_data *); \ +PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_startchar(pcre2_match_data *); + + +/* Convenience functions for handling matched substrings. */ + +#define PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ + PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_list_free(PCRE2_UCHAR **); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + +/* Functions for serializing / deserializing compiled patterns. */ + +#define PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ + PCRE2_SIZE *, pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_get_number_of_codes(const uint8_t *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_serialize_free(uint8_t *); + + +/* Convenience function for match + substitute. */ + +#define PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ + PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); + + +/* Functions for converting pattern source strings. */ + +#define PCRE2_CONVERT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *, pcre2_convert_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_converted_pattern_free(PCRE2_UCHAR *); + + +/* Functions for JIT processing */ + +#define PCRE2_JIT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_free_unused_memory(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_create(size_t, size_t, pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_free(pcre2_jit_stack *); + + +/* Other miscellaneous functions. */ + +#define PCRE2_OTHER_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ +PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \ + pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_maketables_free(pcre2_general_context *, const uint8_t *); + +/* Define macros that generate width-specific names from generic versions. The +three-level macro scheme is necessary to get the macros expanded when we want +them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for +generating three versions of everything below. After that, PCRE2_SUFFIX will be +re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as +pcre2_compile are called by application code. */ + +#define PCRE2_JOIN(a,b) a ## b +#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b) +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH) + + +/* Data types */ + +#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) +#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) + +#define pcre2_code PCRE2_SUFFIX(pcre2_code_) +#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) +#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) + +#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) +#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) +#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) +#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_) +#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) +#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) +#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) + + +/* Data blocks */ + +#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) +#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_) +#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_) +#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) +#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) +#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_) +#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) +#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) + + +/* Functions: the complete list in alphabetical order */ + +#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) +#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) +#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) +#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) +#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) +#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) +#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) +#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) +#define pcre2_config PCRE2_SUFFIX(pcre2_config_) +#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_) +#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_) +#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_) +#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_) +#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) +#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) +#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) +#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) +#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) +#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) +#define pcre2_get_match_data_heapframes_size PCRE2_SUFFIX(pcre2_get_match_data_heapframes_size_) +#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_) +#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) +#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) +#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) +#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) +#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) +#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) +#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) +#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_) +#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) +#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_) +#define pcre2_match PCRE2_SUFFIX(pcre2_match_) +#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) +#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) +#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) +#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) +#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) +#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) +#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) +#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_) +#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_) +#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_) +#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_) +#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_) +#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) +#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) +#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) +#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) +#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) +#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) +#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) +#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) +#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) +#define pcre2_set_max_varlookbehind PCRE2_SUFFIX(pcre2_set_max_varlookbehind_) +#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_) +#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) +#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) +#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) +#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) +#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) +#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) +#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) +#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) +#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) +#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) +#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) +#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) +#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) + +/* Keep this old function name for backwards compatibility */ +#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) + +/* Keep this obsolete function for backwards compatibility: it is now a noop. */ +#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) + +/* Now generate all three sets of width-specific structures and function +prototypes. */ + +#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \ +PCRE2_TYPES_LIST \ +PCRE2_STRUCTURE_LIST \ +PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_FUNCTIONS \ +PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_FUNCTIONS \ +PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_MATCH_FUNCTIONS \ +PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_JIT_FUNCTIONS \ +PCRE2_OTHER_FUNCTIONS + +#define PCRE2_LOCAL_WIDTH 8 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 16 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 32 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +/* Undefine the list macros; they are no longer needed. */ + +#undef PCRE2_TYPES_LIST +#undef PCRE2_STRUCTURE_LIST +#undef PCRE2_GENERAL_INFO_FUNCTIONS +#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS +#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS +#undef PCRE2_MATCH_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_FUNCTIONS +#undef PCRE2_PATTERN_INFO_FUNCTIONS +#undef PCRE2_MATCH_FUNCTIONS +#undef PCRE2_SUBSTRING_FUNCTIONS +#undef PCRE2_SERIALIZE_FUNCTIONS +#undef PCRE2_SUBSTITUTE_FUNCTION +#undef PCRE2_JIT_FUNCTIONS +#undef PCRE2_OTHER_FUNCTIONS +#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS + +/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine +PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make +PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ + +#undef PCRE2_SUFFIX +#ifndef PCRE2_CODE_UNIT_WIDTH +#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h. +#error Use 8, 16, or 32; or 0 for a multi-width application. +#else /* PCRE2_CODE_UNIT_WIDTH is defined */ +#if PCRE2_CODE_UNIT_WIDTH == 8 || \ + PCRE2_CODE_UNIT_WIDTH == 16 || \ + PCRE2_CODE_UNIT_WIDTH == 32 +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) +#elif PCRE2_CODE_UNIT_WIDTH == 0 +#undef PCRE2_JOIN +#undef PCRE2_GLUE +#define PCRE2_SUFFIX(a) a +#else +#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32. +#endif +#endif /* PCRE2_CODE_UNIT_WIDTH is defined */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PCRE2_H_IDEMPOTENT_GUARD */ + +/* End of pcre2.h */ diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c new file mode 100644 index 0000000..210d13d --- /dev/null +++ b/src/pcre2_auto_possess.c @@ -0,0 +1,1371 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains functions that scan a compiled pattern and change +repeats into possessive repeats where possible. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "pcre2_internal.h" + + +/************************************************* +* Tables for auto-possessification * +*************************************************/ + +/* This table is used to check whether auto-possessification is possible +between adjacent character-type opcodes. The left-hand (repeated) opcode is +used to select the row, and the right-hand opcode is use to select the column. +A value of 1 means that auto-possessification is OK. For example, the second +value in the first row means that \D+\d can be turned into \D++\d. + +The Unicode property types (\P and \p) have to be present to fill out the table +because of what their opcode values are, but the table values should always be +zero because property types are handled separately in the code. The last four +columns apply to items that cannot be repeated, so there is no need to have +rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is +*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ + +#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) +#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1) + +static const uint8_t autoposstab[APTROWS][APTCOLS] = { +/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */ + { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */ + { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */ + { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */ + { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */ + { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */ + { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */ + { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */ + { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */ + { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ +}; + +#ifdef SUPPORT_UNICODE +/* This table is used to check whether auto-possessification is possible +between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The +left-hand (repeated) opcode is used to select the row, and the right-hand +opcode is used to select the column. The values are as follows: + + 0 Always return FALSE (never auto-possessify) + 1 Character groups are distinct (possessify if both are OP_PROP) + 2 Check character categories in the same group (general or particular) + 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP) + + 4 Check left general category vs right particular category + 5 Check right general category vs left particular category + + 6 Left alphanum vs right general category + 7 Left space vs right general category + 8 Left word vs right general category + + 9 Right alphanum vs left general category + 10 Right space vs left general category + 11 Right word vs left general category + + 12 Left alphanum vs right particular category + 13 Left space vs right particular category + 14 Left word vs right particular category + + 15 Right alphanum vs left particular category + 16 Right space vs left particular category + 17 Right word vs left particular category +*/ + +static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { +/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ + { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ +}; + +/* This table is used to check whether auto-possessification is possible +between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one +specifies a general category and the other specifies a particular category. The +row is selected by the general category and the column by the particular +category. The value is 1 if the particular category is not part of the general +category. */ + +static const uint8_t catposstab[7][30] = { +/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */ + { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */ + { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */ +}; + +/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against +a general or particular category. The properties in each row are those +that apply to the character set in question. Duplication means that a little +unnecessary work is done when checking, but this keeps things much simpler +because they can all use the same code. For more details see the comment where +this table is used. + +Note: SPACE and PXSPACE used to be different because Perl excluded VT from +"space", but from Perl 5.18 it's included, so both categories are treated the +same here. */ + +static const uint8_t posspropstab[3][4] = { + { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */ + { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */ + { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */ +}; +#endif /* SUPPORT_UNICODE */ + + + +#ifdef SUPPORT_UNICODE +/************************************************* +* Check a character and a property * +*************************************************/ + +/* This function is called by compare_opcodes() when a property item is +adjacent to a fixed character. + +Arguments: + c the character + ptype the property type + pdata the data for the type + negated TRUE if it's a negated property (\P or \p{^) + +Returns: TRUE if auto-possessifying is OK +*/ + +static BOOL +check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, + BOOL negated) +{ +BOOL ok; +const uint32_t *p; +const ucd_record *prop = GET_UCD(c); + +switch(ptype) + { + case PT_LAMP: + return (prop->chartype == ucp_Lu || + prop->chartype == ucp_Ll || + prop->chartype == ucp_Lt) == negated; + + case PT_GC: + return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; + + case PT_PC: + return (pdata == prop->chartype) == negated; + + case PT_SC: + return (pdata == prop->script) == negated; + + case PT_SCX: + ok = (pdata == prop->script + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + return ok == negated; + + /* These are specials */ + + case PT_ALNUM: + return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, which + means that Perl space and POSIX space are now identical. PCRE was changed + at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + return negated; + + default: + return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; + } + break; /* Control never reaches here */ + + case PT_WORD: + return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || + c == CHAR_UNDERSCORE) == negated; + + case PT_CLIST: + p = PRIV(ucd_caseless_sets) + prop->caseset; + for (;;) + { + if (c < *p) return !negated; + if (c == *p++) return negated; + } + break; /* Control never reaches here */ + + /* Haven't yet thought these through. */ + + case PT_BIDICL: + return FALSE; + + case PT_BOOL: + return FALSE; + } + +return FALSE; +} +#endif /* SUPPORT_UNICODE */ + + + +/************************************************* +* Base opcode of repeated opcodes * +*************************************************/ + +/* Returns the base opcode for repeated single character type opcodes. If the +opcode is not a repeated character type, it returns with the original value. + +Arguments: c opcode +Returns: base opcode for the type +*/ + +static PCRE2_UCHAR +get_repeat_base(PCRE2_UCHAR c) +{ +return (c > OP_TYPEPOSUPTO)? c : + (c >= OP_TYPESTAR)? OP_TYPESTAR : + (c >= OP_NOTSTARI)? OP_NOTSTARI : + (c >= OP_NOTSTAR)? OP_NOTSTAR : + (c >= OP_STARI)? OP_STARI : + OP_STAR; +} + + +/************************************************* +* Fill the character property list * +*************************************************/ + +/* Checks whether the code points to an opcode that can take part in auto- +possessification, and if so, fills a list with its properties. + +Arguments: + code points to start of expression + utf TRUE if in UTF mode + ucp TRUE if in UCP mode + fcc points to the case-flipping table + list points to output list + list[0] will be filled with the opcode + list[1] will be non-zero if this opcode + can match an empty character string + list[2..7] depends on the opcode + +Returns: points to the start of the next opcode if *code is accepted + NULL if *code is not accepted +*/ + +static PCRE2_SPTR +get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc, + uint32_t *list) +{ +PCRE2_UCHAR c = *code; +PCRE2_UCHAR base; +PCRE2_SPTR end; +uint32_t chr; + +#ifdef SUPPORT_UNICODE +uint32_t *clist_dest; +const uint32_t *clist_src; +#else +(void)utf; /* Suppress "unused parameter" compiler warnings */ +(void)ucp; +#endif + +list[0] = c; +list[1] = FALSE; +code++; + +if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) + { + base = get_repeat_base(c); + c -= (base - OP_STAR); + + if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) + code += IMM2_SIZE; + + list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && + c != OP_POSPLUS); + + switch(base) + { + case OP_STAR: + list[0] = OP_CHAR; + break; + + case OP_STARI: + list[0] = OP_CHARI; + break; + + case OP_NOTSTAR: + list[0] = OP_NOT; + break; + + case OP_NOTSTARI: + list[0] = OP_NOTI; + break; + + case OP_TYPESTAR: + list[0] = *code; + code++; + break; + } + c = list[0]; + } + +switch(c) + { + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_DOLL: + case OP_DOLLM: + return code; + + case OP_CHAR: + case OP_NOT: + GETCHARINCTEST(chr, code); + list[2] = chr; + list[3] = NOTACHAR; + return code; + + case OP_CHARI: + case OP_NOTI: + list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT; + GETCHARINCTEST(chr, code); + list[2] = chr; + +#ifdef SUPPORT_UNICODE + if (chr < 128 || (chr < 256 && !utf && !ucp)) + list[3] = fcc[chr]; + else + list[3] = UCD_OTHERCASE(chr); +#elif defined SUPPORT_WIDE_CHARS + list[3] = (chr < 256) ? fcc[chr] : chr; +#else + list[3] = fcc[chr]; +#endif + + /* The othercase might be the same value. */ + + if (chr == list[3]) + list[3] = NOTACHAR; + else + list[4] = NOTACHAR; + return code; + +#ifdef SUPPORT_UNICODE + case OP_PROP: + case OP_NOTPROP: + if (code[0] != PT_CLIST) + { + list[2] = code[0]; + list[3] = code[1]; + return code + 2; + } + + /* Convert only if we have enough space. */ + + clist_src = PRIV(ucd_caseless_sets) + code[1]; + clist_dest = list + 2; + code += 2; + + do { + if (clist_dest >= list + 8) + { + /* Early return if there is not enough space. This should never + happen, since all clists are shorter than 5 character now. */ + list[2] = code[0]; + list[3] = code[1]; + return code; + } + *clist_dest++ = *clist_src; + } + while(*clist_src++ != NOTACHAR); + + /* All characters are stored. The terminating NOTACHAR is copied from the + clist itself. */ + + list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT; + return code; +#endif + + case OP_NCLASS: + case OP_CLASS: +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + if (c == OP_XCLASS) + end = code + GET(code, 0) - 1; + else +#endif + end = code + 32 / sizeof(PCRE2_UCHAR); + + switch(*end) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: + list[1] = TRUE; + end++; + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + end++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + list[1] = (GET2(end, 1) == 0); + end += 1 + 2 * IMM2_SIZE; + break; + } + list[2] = (uint32_t)(end - code); + return end; + } + +return NULL; /* Opcode not accepted */ +} + + + +/************************************************* +* Scan further character sets for match * +*************************************************/ + +/* Checks whether the base and the current opcode have a common character, in +which case the base cannot be possessified. + +Arguments: + code points to the byte code + utf TRUE in UTF mode + ucp TRUE in UCP mode + cb compile data block + base_list the data list of the base opcode + base_end the end of the base opcode + rec_limit points to recursion depth counter + +Returns: TRUE if the auto-possessification is possible +*/ + +static BOOL +compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb, + const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) +{ +PCRE2_UCHAR c; +uint32_t list[8]; +const uint32_t *chr_ptr; +const uint32_t *ochr_ptr; +const uint32_t *list_ptr; +PCRE2_SPTR next_code; +#ifdef SUPPORT_WIDE_CHARS +PCRE2_SPTR xclass_flags; +#endif +const uint8_t *class_bitset; +const uint8_t *set1, *set2, *set_end; +uint32_t chr; +BOOL accepted, invert_bits; +BOOL entered_a_group = FALSE; + +if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */ + +/* Note: the base_list[1] contains whether the current opcode has a greedy +(represented by a non-zero value) quantifier. This is a different from +other character type lists, which store here that the character iterator +matches to an empty string (also represented by a non-zero value). */ + +for(;;) + { + PCRE2_SPTR bracode; + + /* All operations move the code pointer forward. + Therefore infinite recursions are not possible. */ + + c = *code; + + /* Skip over callouts */ + + if (c == OP_CALLOUT) + { + code += PRIV(OP_lengths)[c]; + continue; + } + + if (c == OP_CALLOUT_STR) + { + code += GET(code, 1 + 2*LINK_SIZE); + continue; + } + + /* At the end of a branch, skip to the end of the group. */ + + if (c == OP_ALT) + { + do code += GET(code, 1); while (*code == OP_ALT); + c = *code; + } + + /* Inspect the next opcode. */ + + switch(c) + { + /* We can always possessify a greedy iterator at the end of the pattern, + which is reached after skipping over the final OP_KET. A non-greedy + iterator must never be possessified. */ + + case OP_END: + return base_list[1] != 0; + + /* When an iterator is at the end of certain kinds of group we can inspect + what follows the group by skipping over the closing ket. Note that this + does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given + iteration is variable (could be another iteration or could be the next + item). As these two opcodes are not listed in the next switch, they will + end up as the next code to inspect, and return FALSE by virtue of being + unsupported. */ + + case OP_KET: + case OP_KETRPOS: + /* The non-greedy case cannot be converted to a possessive form. */ + + if (base_list[1] == 0) return FALSE; + + /* If the bracket is capturing it might be referenced by an OP_RECURSE + so its last iterator can never be possessified if the pattern contains + recursions. (This could be improved by keeping a list of group numbers that + are called by recursion.) */ + + bracode = code - GET(code, 1); + switch(*bracode) + { + case OP_CBRA: + case OP_SCBRA: + case OP_CBRAPOS: + case OP_SCBRAPOS: + if (cb->had_recurse) return FALSE; + break; + + /* A script run might have to backtrack if the iterated item can match + characters from more than one script. So give up unless repeating an + explicit character. */ + + case OP_SCRIPT_RUN: + if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI) + return FALSE; + break; + + /* Atomic sub-patterns and assertions can always auto-possessify their + last iterator except for variable length lookbehinds. However, if the + group was entered as a result of checking a previous iterator, this is + not possible. */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ONCE: + return !entered_a_group; + + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group; + + /* Non-atomic assertions - don't possessify last iterator. This needs + more thought. */ + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + return FALSE; + } + + /* Skip over the bracket and inspect what comes next. */ + + code += PRIV(OP_lengths)[c]; + continue; + + /* Handle cases where the next item is a group. */ + + case OP_ONCE: + case OP_BRA: + case OP_CBRA: + next_code = code + GET(code, 1); + code += PRIV(OP_lengths)[c]; + + /* Check each branch. We have to recurse a level for all but the last + branch. */ + + while (*next_code == OP_ALT) + { + if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit)) + return FALSE; + code = next_code + 1 + LINK_SIZE; + next_code += GET(next_code, 1); + } + + entered_a_group = TRUE; + continue; + + case OP_BRAZERO: + case OP_BRAMINZERO: + + next_code = code + 1; + if (*next_code != OP_BRA && *next_code != OP_CBRA && + *next_code != OP_ONCE) return FALSE; + + do next_code += GET(next_code, 1); while (*next_code == OP_ALT); + + /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ + + next_code += 1 + LINK_SIZE; + if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end, + rec_limit)) + return FALSE; + + code += PRIV(OP_lengths)[c]; + continue; + + /* The next opcode does not need special handling; fall through and use it + to see if the base can be possessified. */ + + default: + break; + } + + /* We now have the next appropriate opcode to compare with the base. Check + for a supported opcode, and load its properties. */ + + code = get_chr_property_list(code, utf, ucp, cb->fcc, list); + if (code == NULL) return FALSE; /* Unsupported */ + + /* If either opcode is a small character list, set pointers for comparing + characters from that list with another list, or with a property. */ + + if (base_list[0] == OP_CHAR) + { + chr_ptr = base_list + 2; + list_ptr = list; + } + else if (list[0] == OP_CHAR) + { + chr_ptr = list + 2; + list_ptr = base_list; + } + + /* Character bitsets can also be compared to certain opcodes. */ + + else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS +#if PCRE2_CODE_UNIT_WIDTH == 8 + /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */ + || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS)) +#endif + ) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS)) +#else + if (base_list[0] == OP_CLASS) +#endif + { + set1 = (uint8_t *)(base_end - base_list[2]); + list_ptr = list; + } + else + { + set1 = (uint8_t *)(code - list[2]); + list_ptr = base_list; + } + + invert_bits = FALSE; + switch(list_ptr[0]) + { + case OP_CLASS: + case OP_NCLASS: + set2 = (uint8_t *) + ((list_ptr == list ? code : base_end) - list_ptr[2]); + break; + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; + if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; + if ((*xclass_flags & XCL_MAP) == 0) + { + /* No bits are set for characters < 256. */ + if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; + /* Might be an empty repeat. */ + continue; + } + set2 = (uint8_t *)(xclass_flags + 1); + break; +#endif + + case OP_NOT_DIGIT: + invert_bits = TRUE; + /* Fall through */ + case OP_DIGIT: + set2 = (uint8_t *)(cb->cbits + cbit_digit); + break; + + case OP_NOT_WHITESPACE: + invert_bits = TRUE; + /* Fall through */ + case OP_WHITESPACE: + set2 = (uint8_t *)(cb->cbits + cbit_space); + break; + + case OP_NOT_WORDCHAR: + invert_bits = TRUE; + /* Fall through */ + case OP_WORDCHAR: + set2 = (uint8_t *)(cb->cbits + cbit_word); + break; + + default: + return FALSE; + } + + /* Because the bit sets are unaligned bytes, we need to perform byte + comparison here. */ + + set_end = set1 + 32; + if (invert_bits) + { + do + { + if ((*set1++ & ~(*set2++)) != 0) return FALSE; + } + while (set1 < set_end); + } + else + { + do + { + if ((*set1++ & *set2++) != 0) return FALSE; + } + while (set1 < set_end); + } + + if (list[1] == 0) return TRUE; + /* Might be an empty repeat. */ + continue; + } + + /* Some property combinations also acceptable. Unicode property opcodes are + processed specially; the rest can be handled with a lookup table. */ + + else + { + uint32_t leftop, rightop; + + leftop = base_list[0]; + rightop = list[0]; + +#ifdef SUPPORT_UNICODE + accepted = FALSE; /* Always set in non-unicode case. */ + if (leftop == OP_PROP || leftop == OP_NOTPROP) + { + if (rightop == OP_EOD) + accepted = TRUE; + else if (rightop == OP_PROP || rightop == OP_NOTPROP) + { + int n; + const uint8_t *p; + BOOL same = leftop == rightop; + BOOL lisprop = leftop == OP_PROP; + BOOL risprop = rightop == OP_PROP; + BOOL bothprop = lisprop && risprop; + + /* There's a table that specifies how each combination is to be + processed: + 0 Always return FALSE (never auto-possessify) + 1 Character groups are distinct (possessify if both are OP_PROP) + 2 Check character categories in the same group (general or particular) + 3 Return TRUE if the two opcodes are not the same + ... see comments below + */ + + n = propposstab[base_list[2]][list[2]]; + switch(n) + { + case 0: break; + case 1: accepted = bothprop; break; + case 2: accepted = (base_list[3] == list[3]) != same; break; + case 3: accepted = !same; break; + + case 4: /* Left general category, right particular category */ + accepted = risprop && catposstab[base_list[3]][list[3]] == same; + break; + + case 5: /* Right general category, left particular category */ + accepted = lisprop && catposstab[list[3]][base_list[3]] == same; + break; + + /* This code is logically tricky. Think hard before fiddling with it. + The posspropstab table has four entries per row. Each row relates to + one of PCRE's special properties such as ALNUM or SPACE or WORD. + Only WORD actually needs all four entries, but using repeats for the + others means they can all use the same code below. + + The first two entries in each row are Unicode general categories, and + apply always, because all the characters they include are part of the + PCRE character set. The third and fourth entries are a general and a + particular category, respectively, that include one or more relevant + characters. One or the other is used, depending on whether the check + is for a general or a particular category. However, in both cases the + category contains more characters than the specials that are defined + for the property being tested against. Therefore, it cannot be used + in a NOTPROP case. + + Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po. + Underscore is covered by ucp_P or ucp_Po. */ + + case 6: /* Left alphanum vs right general category */ + case 7: /* Left space vs right general category */ + case 8: /* Left word vs right general category */ + p = posspropstab[n-6]; + accepted = risprop && lisprop == + (list[3] != p[0] && + list[3] != p[1] && + (list[3] != p[2] || !lisprop)); + break; + + case 9: /* Right alphanum vs left general category */ + case 10: /* Right space vs left general category */ + case 11: /* Right word vs left general category */ + p = posspropstab[n-9]; + accepted = lisprop && risprop == + (base_list[3] != p[0] && + base_list[3] != p[1] && + (base_list[3] != p[2] || !risprop)); + break; + + case 12: /* Left alphanum vs right particular category */ + case 13: /* Left space vs right particular category */ + case 14: /* Left word vs right particular category */ + p = posspropstab[n-12]; + accepted = risprop && lisprop == + (catposstab[p[0]][list[3]] && + catposstab[p[1]][list[3]] && + (list[3] != p[3] || !lisprop)); + break; + + case 15: /* Right alphanum vs left particular category */ + case 16: /* Right space vs left particular category */ + case 17: /* Right word vs left particular category */ + p = posspropstab[n-15]; + accepted = lisprop && risprop == + (catposstab[p[0]][base_list[3]] && + catposstab[p[1]][base_list[3]] && + (base_list[3] != p[3] || !risprop)); + break; + } + } + } + + else +#endif /* SUPPORT_UNICODE */ + + accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP && + rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP && + autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; + + if (!accepted) return FALSE; + + if (list[1] == 0) return TRUE; + /* Might be an empty repeat. */ + continue; + } + + /* Control reaches here only if one of the items is a small character list. + All characters are checked against the other side. */ + + do + { + chr = *chr_ptr; + + switch(list_ptr[0]) + { + case OP_CHAR: + ochr_ptr = list_ptr + 2; + do + { + if (chr == *ochr_ptr) return FALSE; + ochr_ptr++; + } + while(*ochr_ptr != NOTACHAR); + break; + + case OP_NOT: + ochr_ptr = list_ptr + 2; + do + { + if (chr == *ochr_ptr) + break; + ochr_ptr++; + } + while(*ochr_ptr != NOTACHAR); + if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */ + break; + + /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not* + set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ + + case OP_DIGIT: + if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE; + break; + + case OP_NOT_DIGIT: + if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE; + break; + + case OP_WHITESPACE: + if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE; + break; + + case OP_NOT_WHITESPACE: + if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE; + break; + + case OP_WORDCHAR: + if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE; + break; + + case OP_NOT_WORDCHAR: + if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE; + break; + + case OP_HSPACE: + switch(chr) + { + HSPACE_CASES: return FALSE; + default: break; + } + break; + + case OP_NOT_HSPACE: + switch(chr) + { + HSPACE_CASES: break; + default: return FALSE; + } + break; + + case OP_ANYNL: + case OP_VSPACE: + switch(chr) + { + VSPACE_CASES: return FALSE; + default: break; + } + break; + + case OP_NOT_VSPACE: + switch(chr) + { + VSPACE_CASES: break; + default: return FALSE; + } + break; + + case OP_DOLL: + case OP_EODN: + switch (chr) + { + case CHAR_CR: + case CHAR_LF: + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + return FALSE; + } + break; + + case OP_EOD: /* Can always possessify before \z */ + break; + +#ifdef SUPPORT_UNICODE + case OP_PROP: + case OP_NOTPROP: + if (!check_char_prop(chr, list_ptr[2], list_ptr[3], + list_ptr[0] == OP_NOTPROP)) + return FALSE; + break; +#endif + + case OP_NCLASS: + if (chr > 255) return FALSE; + /* Fall through */ + + case OP_CLASS: + if (chr > 255) break; + class_bitset = (uint8_t *) + ((list_ptr == list ? code : base_end) - list_ptr[2]); + if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE; + break; + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - + list_ptr[2] + LINK_SIZE, utf)) return FALSE; + break; +#endif + + default: + return FALSE; + } + + chr_ptr++; + } + while(*chr_ptr != NOTACHAR); + + /* At least one character must be matched from this opcode. */ + + if (list[1] == 0) return TRUE; + } + +/* Control never reaches here. There used to be a fail-save return FALSE; here, +but some compilers complain about an unreachable statement. */ +} + + + +/************************************************* +* Scan compiled regex for auto-possession * +*************************************************/ + +/* Replaces single character iterations with their possessive alternatives +if appropriate. This function modifies the compiled opcode! Hitting a +non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a +bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches +overly complicated or large patterns. In these cases, the check just stops, +leaving the remainder of the pattern unpossessified. + +Arguments: + code points to start of the byte code + cb compile data block + +Returns: 0 for success + -1 if a non-existant opcode is encountered +*/ + +int +PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb) +{ +PCRE2_UCHAR c; +PCRE2_SPTR end; +PCRE2_UCHAR *repeat_opcode; +uint32_t list[8]; +int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ +BOOL utf = (cb->external_options & PCRE2_UTF) != 0; +BOOL ucp = (cb->external_options & PCRE2_UCP) != 0; + +for (;;) + { + c = *code; + + if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */ + + if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) + { + c -= get_repeat_base(c) - OP_STAR; + end = (c <= OP_MINUPTO) ? + get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL; + list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; + + if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end, + &rec_limit)) + { + switch(c) + { + case OP_STAR: + *code += OP_POSSTAR - OP_STAR; + break; + + case OP_MINSTAR: + *code += OP_POSSTAR - OP_MINSTAR; + break; + + case OP_PLUS: + *code += OP_POSPLUS - OP_PLUS; + break; + + case OP_MINPLUS: + *code += OP_POSPLUS - OP_MINPLUS; + break; + + case OP_QUERY: + *code += OP_POSQUERY - OP_QUERY; + break; + + case OP_MINQUERY: + *code += OP_POSQUERY - OP_MINQUERY; + break; + + case OP_UPTO: + *code += OP_POSUPTO - OP_UPTO; + break; + + case OP_MINUPTO: + *code += OP_POSUPTO - OP_MINUPTO; + break; + } + } + c = *code; + } + else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) + { +#ifdef SUPPORT_WIDE_CHARS + if (c == OP_XCLASS) + repeat_opcode = code + GET(code, 1); + else +#endif + repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); + + c = *repeat_opcode; + if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) + { + /* The return from get_chr_property_list() will never be NULL when + *code (aka c) is one of the three class opcodes. However, gcc with + -fanalyzer notes that a NULL return is possible, and grumbles. Hence we + put in a check. */ + + end = get_chr_property_list(code, utf, ucp, cb->fcc, list); + list[1] = (c & 1) == 0; + + if (end != NULL && + compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) + { + switch (c) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + *repeat_opcode = OP_CRPOSSTAR; + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: + *repeat_opcode = OP_CRPOSPLUS; + break; + + case OP_CRQUERY: + case OP_CRMINQUERY: + *repeat_opcode = OP_CRPOSQUERY; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + *repeat_opcode = OP_CRPOSRANGE; + break; + } + } + } + c = *code; + } + + switch(c) + { + case OP_END: + return 0; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + code += 2; + break; + + case OP_CALLOUT_STR: + code += GET(code, 1 + 2*LINK_SIZE); + break; + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + code += GET(code, 1); + break; +#endif + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; + } + + /* Add in the fixed length from the table */ + + code += PRIV(OP_lengths)[c]; + + /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be + followed by a multi-byte character. The length in the table is a minimum, so + we have to arrange to skip the extra code units. */ + +#ifdef MAYBE_UTF_MULTI + if (utf) switch(c) + { + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); + break; + } +#else + (void)(utf); /* Keep compiler happy by referencing function argument */ +#endif /* SUPPORT_WIDE_CHARS */ + } +} + +/* End of pcre2_auto_possess.c */ diff --git a/src/pcre2_chartables.c.dist b/src/pcre2_chartables.c.dist new file mode 100644 index 0000000..7362c3f --- /dev/null +++ b/src/pcre2_chartables.c.dist @@ -0,0 +1,196 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This file was automatically written by the pcre2_dftables auxiliary +program. It contains character tables that are used when no external +tables are passed to PCRE2 by the application that calls it. The tables +are used only for characters whose code values are less than 256, and +only relevant if not in UCP mode. */ + +/* This set of tables was written in the C locale. */ + +/* The pcre2_ftables program (which is distributed with PCRE2) can be used +to build alternative versions of this file. This is necessary if you are +running in an EBCDIC environment, or if you want to default to a different +encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates +these tables in the "C" locale by default. This happens automatically if +PCRE2 is configured with --enable-rebuild-chartables. However, you can run +pcre2_dftables manually with the -L option to build tables using the LC_ALL +locale. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +const uint8_t PRIV(default_tables)[] = { + +/* This table is a lower casing table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table is a case flipping table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table contains bit maps for various character classes. Each map is 32 +bytes long and the bits run from the least significant end of each byte. The +classes that have their own maps are: space, xdigit, digit, upper, lower, word, +graph, print, punct, and cntrl. Other classes are built from combinations. */ + + 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */ + 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */ + 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */ + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */ + 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */ + 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + +/* This table identifies various classes of character by individual bits: + 0x01 white space character + 0x02 letter + 0x04 lower case letter + 0x08 decimal digit + 0x10 word (alphanumeric or '_') +*/ + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ + 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */ + 0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ + 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ + 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */ + 0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */ + 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */ + 0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */ + 0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ + +/* End of pcre2_chartables.c */ diff --git a/src/pcre2_chkdint.c b/src/pcre2_chkdint.c new file mode 100644 index 0000000..d04f6f8 --- /dev/null +++ b/src/pcre2_chkdint.c @@ -0,0 +1,96 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This file contains functions to implement checked integer operation */ + +#ifndef PCRE2_PCRE2TEST +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" +#endif + +/************************************************* +* Checked Integer Multiplication * +*************************************************/ + +/* +Arguments: + r A pointer to PCRE2_SIZE to store the answer + a, b Two integers + +Returns: Bool indicating if the operation overflows + +It is modeled after C23's interface +The INT64_OR_DOUBLE type is a 64-bit integer type when available, +otherwise double. */ + +BOOL +PRIV(ckd_smul)(PCRE2_SIZE *r, int a, int b) +{ +#ifdef HAVE_BUILTIN_MUL_OVERFLOW +PCRE2_SIZE m; + +if (__builtin_mul_overflow(a, b, &m)) return TRUE; + +*r = m; +#else +INT64_OR_DOUBLE m; + +#ifdef PCRE2_DEBUG +if (a < 0 || b < 0) abort(); +#endif + +m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b; + +#if defined INT64_MAX || defined int64_t +if (sizeof(m) > sizeof(*r) && m > (INT64_OR_DOUBLE)PCRE2_SIZE_MAX) return TRUE; +*r = (PCRE2_SIZE)m; +#else +if (m > PCRE2_SIZE_MAX) return TRUE; +*r = m; +#endif + +#endif + +return FALSE; +} + +/* End of pcre_chkdint.c */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c new file mode 100644 index 0000000..8e6787a --- /dev/null +++ b/src/pcre2_compile.c @@ -0,0 +1,11001 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define NLBLOCK cb /* Block containing newline information */ +#define PSSTART start_pattern /* Field containing processed string start */ +#define PSEND end_pattern /* Field containing processed string end */ + +#include "pcre2_internal.h" + +/* In rare error cases debugging might require calling pcre2_printint(). */ + +#if 0 +#ifdef EBCDIC +#define PRINTABLE(c) ((c) >= 64 && (c) < 255) +#else +#define PRINTABLE(c) ((c) >= 32 && (c) < 127) +#endif +#include "pcre2_printint.c" +#define DEBUG_CALL_PRINTINT +#endif + +/* Other debugging code can be enabled by these defines. */ + +/* #define DEBUG_SHOW_CAPTURES */ +/* #define DEBUG_SHOW_PARSED */ + +/* There are a few things that vary with different code unit sizes. Handle them +by defining macros in order to minimize #if usage. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5 +#define XDIGIT(c) xdigitab[c] + +#else /* Either 16-bit or 32-bit */ +#define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff) + +#if PCRE2_CODE_UNIT_WIDTH == 16 +#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6 + +#else /* 32-bit */ +#define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6 +#endif +#endif + +/* Macros to store and retrieve a PCRE2_SIZE value in the parsed pattern, which +consists of uint32_t elements. Assume that if uint32_t can't hold it, two of +them will be able to (i.e. assume a 64-bit world). */ + +#if PCRE2_SIZE_MAX <= UINT32_MAX +#define PUTOFFSET(s,p) *p++ = s +#define GETOFFSET(s,p) s = *p++ +#define GETPLUSOFFSET(s,p) s = *(++p) +#define READPLUSOFFSET(s,p) s = p[1] +#define SKIPOFFSET(p) p++ +#define SIZEOFFSET 1 +#else +#define PUTOFFSET(s,p) \ + { *p++ = (uint32_t)(s >> 32); *p++ = (uint32_t)(s & 0xffffffff); } +#define GETOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[0] << 32) | (PCRE2_SIZE)p[1]; p += 2; } +#define GETPLUSOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; p += 2; } +#define READPLUSOFFSET(s,p) \ + { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; } +#define SKIPOFFSET(p) p += 2 +#define SIZEOFFSET 2 +#endif + +/* Macros for manipulating elements of the parsed pattern vector. */ + +#define META_CODE(x) (x & 0xffff0000u) +#define META_DATA(x) (x & 0x0000ffffu) +#define META_DIFF(x,y) ((x-y)>>16) + +/* Function definitions to allow mutual recursion */ + +#ifdef SUPPORT_UNICODE +static unsigned int + add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t, + compile_block *, const uint32_t *, unsigned int); +#endif + +static int + compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *, + uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, + open_capitem *, compile_block *, PCRE2_SIZE *); + +static int + get_branchlength(uint32_t **, int *, int *, int *, parsed_recurse_check *, + compile_block *); + +static BOOL + set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *, + compile_block *); + +static int + check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *, + compile_block *, int *); + + +/************************************************* +* Code parameters and static tables * +*************************************************/ + +#define MAX_GROUP_NUMBER 65535u +#define MAX_REPEAT_COUNT 65535u +#define REPEAT_UNLIMITED (MAX_REPEAT_COUNT+1) + +/* COMPILE_WORK_SIZE specifies the size of stack workspace, which is used in +different ways in the different pattern scans. The parsing and group- +identifying pre-scan uses it to handle nesting, and needs it to be 16-bit +aligned for this. Having defined the size in code units, we set up +C16_WORK_SIZE as the number of elements in the 16-bit vector. + +During the first compiling phase, when determining how much memory is required, +the regex is partly compiled into this space, but the compiled parts are +discarded as soon as they can be, so that hopefully there will never be an +overrun. The code does, however, check for an overrun, which can occur for +pathological patterns. The size of the workspace depends on LINK_SIZE because +the length of compiled items varies with this. + +In the real compile phase, this workspace is not currently used. */ + +#define COMPILE_WORK_SIZE (3000*LINK_SIZE) /* Size in code units */ + +#define C16_WORK_SIZE \ + ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint16_t)) + +/* A uint32_t vector is used for caching information about the size of +capturing groups, to improve performance. A default is created on the stack of +this size. */ + +#define GROUPINFO_DEFAULT_SIZE 256 + +/* The overrun tests check for a slightly smaller size so that they detect the +overrun before it actually does run off the end of the data block. */ + +#define WORK_SIZE_SAFETY_MARGIN (100) + +/* This value determines the size of the initial vector that is used for +remembering named groups during the pre-compile. It is allocated on the stack, +but if it is too small, it is expanded, in a similar way to the workspace. The +value is the number of slots in the list. */ + +#define NAMED_GROUP_LIST_SIZE 20 + +/* The pre-compiling pass over the pattern creates a parsed pattern in a vector +of uint32_t. For short patterns this lives on the stack, with this size. Heap +memory is used for longer patterns. */ + +#define PARSED_PATTERN_DEFAULT_SIZE 1024 + +/* Maximum length value to check against when making sure that the variable +that holds the compiled pattern length does not overflow. We make it a bit less +than INT_MAX to allow for adding in group terminating code units, so that we +don't have to check them every time. */ + +#define OFLOW_MAX (INT_MAX - 20) + +/* Code values for parsed patterns, which are stored in a vector of 32-bit +unsigned ints. Values less than META_END are literal data values. The coding +for identifying the item is in the top 16-bits, leaving 16 bits for the +additional data that some of them need. The META_CODE, META_DATA, and META_DIFF +macros are used to manipulate parsed pattern elements. + +NOTE: When these definitions are changed, the table of extra lengths for each +code (meta_extra_lengths, just below) must be updated to remain in step. */ + +#define META_END 0x80000000u /* End of pattern */ + +#define META_ALT 0x80010000u /* alternation */ +#define META_ATOMIC 0x80020000u /* atomic group */ +#define META_BACKREF 0x80030000u /* Back ref */ +#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */ +#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */ +#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */ +#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */ +#define META_CAPTURE 0x80080000u /* Capturing parenthesis */ +#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */ +#define META_CLASS 0x800a0000u /* start non-empty class */ +#define META_CLASS_EMPTY 0x800b0000u /* empty class */ +#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */ +#define META_CLASS_END 0x800d0000u /* end of non-empty class */ +#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */ +#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */ +#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */ +#define META_COND_NAME 0x80110000u /* (?()... */ +#define META_COND_NUMBER 0x80120000u /* (?(digits)... */ +#define META_COND_RNAME 0x80130000u /* (?(R&name)... */ +#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */ +#define META_COND_VERSION 0x80150000u /* (?(VERSIONx.y)... */ +#define META_DOLLAR 0x80160000u /* $ metacharacter */ +#define META_DOT 0x80170000u /* . metacharacter */ +#define META_ESCAPE 0x80180000u /* \d and friends */ +#define META_KET 0x80190000u /* closing parenthesis */ +#define META_NOCAPTURE 0x801a0000u /* no capture parens */ +#define META_OPTIONS 0x801b0000u /* (?i) and friends */ +#define META_POSIX 0x801c0000u /* POSIX class item */ +#define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */ +#define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */ +#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */ +#define META_RECURSE 0x80200000u /* Recursion */ +#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */ +#define META_SCRIPT_RUN 0x80220000u /* (*script_run:...) */ + +/* These must be kept together to make it easy to check that an assertion +is present where expected in a conditional group. */ + +#define META_LOOKAHEAD 0x80230000u /* (?= */ +#define META_LOOKAHEADNOT 0x80240000u /* (?! */ +#define META_LOOKBEHIND 0x80250000u /* (?<= */ +#define META_LOOKBEHINDNOT 0x80260000u /* (?= 10 */ + 1+SIZEOFFSET, /* META_BACKREF_BYNAME */ + 1, /* META_BIGVALUE */ + 3, /* META_CALLOUT_NUMBER */ + 3+SIZEOFFSET, /* META_CALLOUT_STRING */ + 0, /* META_CAPTURE */ + 0, /* META_CIRCUMFLEX */ + 0, /* META_CLASS */ + 0, /* META_CLASS_EMPTY */ + 0, /* META_CLASS_EMPTY_NOT */ + 0, /* META_CLASS_END */ + 0, /* META_CLASS_NOT */ + 0, /* META_COND_ASSERT */ + SIZEOFFSET, /* META_COND_DEFINE */ + 1+SIZEOFFSET, /* META_COND_NAME */ + 1+SIZEOFFSET, /* META_COND_NUMBER */ + 1+SIZEOFFSET, /* META_COND_RNAME */ + 1+SIZEOFFSET, /* META_COND_RNUMBER */ + 3, /* META_COND_VERSION */ + 0, /* META_DOLLAR */ + 0, /* META_DOT */ + 0, /* META_ESCAPE - more for ESC_P, ESC_p, ESC_g, ESC_k */ + 0, /* META_KET */ + 0, /* META_NOCAPTURE */ + 1, /* META_OPTIONS */ + 1, /* META_POSIX */ + 1, /* META_POSIX_NEG */ + 0, /* META_RANGE_ESCAPED */ + 0, /* META_RANGE_LITERAL */ + SIZEOFFSET, /* META_RECURSE */ + 1+SIZEOFFSET, /* META_RECURSE_BYNAME */ + 0, /* META_SCRIPT_RUN */ + 0, /* META_LOOKAHEAD */ + 0, /* META_LOOKAHEADNOT */ + SIZEOFFSET, /* META_LOOKBEHIND */ + SIZEOFFSET, /* META_LOOKBEHINDNOT */ + 0, /* META_LOOKAHEAD_NA */ + SIZEOFFSET, /* META_LOOKBEHIND_NA */ + 1, /* META_MARK - plus the string length */ + 0, /* META_ACCEPT */ + 0, /* META_FAIL */ + 0, /* META_COMMIT */ + 1, /* META_COMMIT_ARG - plus the string length */ + 0, /* META_PRUNE */ + 1, /* META_PRUNE_ARG - plus the string length */ + 0, /* META_SKIP */ + 1, /* META_SKIP_ARG - plus the string length */ + 0, /* META_THEN */ + 1, /* META_THEN_ARG - plus the string length */ + 0, /* META_ASTERISK */ + 0, /* META_ASTERISK_PLUS */ + 0, /* META_ASTERISK_QUERY */ + 0, /* META_PLUS */ + 0, /* META_PLUS_PLUS */ + 0, /* META_PLUS_QUERY */ + 0, /* META_QUERY */ + 0, /* META_QUERY_PLUS */ + 0, /* META_QUERY_QUERY */ + 2, /* META_MINMAX */ + 2, /* META_MINMAX_PLUS */ + 2 /* META_MINMAX_QUERY */ +}; + +/* Types for skipping parts of a parsed pattern. */ + +enum { PSKIP_ALT, PSKIP_CLASS, PSKIP_KET }; + +/* Macro for setting individual bits in class bitmaps. It took some +experimenting to figure out how to stop gcc 5.3.0 from warning with +-Wconversion. This version gets a warning: + + #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1u << ((b)&7)) + +Let's hope the apparently less efficient version isn't actually so bad if the +compiler is clever with identical subexpressions. */ + +#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7))) + +/* Values and flags for the unsigned xxcuflags variables that accompany xxcu +variables, which are concerned with first and required code units. A value +greater than or equal to REQ_NONE means "no code unit set"; otherwise the +matching xxcu variable is set, and the low valued bits are relevant. */ + +#define REQ_UNSET 0xffffffffu /* Not yet found anything */ +#define REQ_NONE 0xfffffffeu /* Found not fixed character */ +#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */ +#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */ + +/* These flags are used in the groupinfo vector. */ + +#define GI_SET_FIXED_LENGTH 0x80000000u +#define GI_NOT_FIXED_LENGTH 0x40000000u +#define GI_FIXED_LENGTH_MASK 0x0000ffffu + +/* This simple test for a decimal digit works for both ASCII/Unicode and EBCDIC +and is fast (a good compiler can turn it into a subtraction and unsigned +comparison). */ + +#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9) + +/* Table to identify hex digits. The tables in chartables are dependent on the +locale, and may mark arbitrary characters as digits. We want to recognize only +0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It +costs 256 bytes, but it is a lot faster than doing character value tests (at +least in some simple cases I timed), and in some applications one wants PCRE2 +to compile efficiently as well as match efficiently. The value in the table is +the binary hex digit value, or 0xff for non-hex digits. */ + +/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in +UTF-8 mode. */ + +#ifndef EBCDIC +static const uint8_t xdigitab[] = + { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 0- 7 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 8- 15 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 16- 23 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 24- 31 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - ' */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* ( - / */ + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /* 0 - 7 */ + 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff, /* 8 - ? */ + 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* @ - G */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* H - O */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* P - W */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* X - _ */ + 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* ` - g */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* h - o */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* p - w */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* x -127 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 128-135 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 136-143 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144-151 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 152-159 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160-167 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 168-175 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 176-183 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 192-199 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 2ff-207 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 208-215 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 216-223 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 224-231 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 232-239 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 240-247 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};/* 248-255 */ + +#else + +/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ + +static const uint8_t xdigitab[] = + { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 0- 7 0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 8- 15 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 16- 23 10 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 24- 31 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 32- 39 20 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 40- 47 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 48- 55 30 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 56- 63 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - 71 40 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 72- | */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* & - 87 50 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 88- 95 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - -103 60 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 104- ? */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 112-119 70 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 120- " */ + 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* 128- g 80 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* h -143 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144- p 90 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* q -159 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160- x A0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* y -175 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* ^ -183 B0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191 */ + 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* { - G C0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* H -207 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* } - P D0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* Q -223 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* \ - X E0 */ + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* Y -239 */ + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /* 0 - 7 F0 */ + 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff};/* 8 -255 */ +#endif /* EBCDIC */ + + +/* Table for handling alphanumeric escaped characters. Positive returns are +simple data values; negative values are for special things like \d and so on. +Zero means further processing is needed (for things like \x), or the escape is +invalid. */ + +/* This is the "normal" table for ASCII systems or for EBCDIC systems running +in UTF-8 mode. It runs from '0' to 'z'. */ + +#ifndef EBCDIC +#define ESCAPES_FIRST CHAR_0 +#define ESCAPES_LAST CHAR_z +#define UPPER_CASE(c) (c-32) + +static const short int escapes[] = { + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + CHAR_COLON, CHAR_SEMICOLON, + CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, + CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, + CHAR_COMMERCIAL_AT, -ESC_A, + -ESC_B, -ESC_C, + -ESC_D, -ESC_E, + 0, -ESC_G, + -ESC_H, 0, + 0, -ESC_K, + 0, 0, + -ESC_N, 0, + -ESC_P, -ESC_Q, + -ESC_R, -ESC_S, + 0, 0, + -ESC_V, -ESC_W, + -ESC_X, 0, + -ESC_Z, CHAR_LEFT_SQUARE_BRACKET, + CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, + CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, + CHAR_GRAVE_ACCENT, CHAR_BEL, + -ESC_b, 0, + -ESC_d, CHAR_ESC, + CHAR_FF, 0, + -ESC_h, 0, + 0, -ESC_k, + 0, 0, + CHAR_LF, 0, + -ESC_p, 0, + CHAR_CR, -ESC_s, + CHAR_HT, 0, + -ESC_v, -ESC_w, + 0, 0, + -ESC_z +}; + +#else + +/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. +It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code +is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a +because it is defined as 'a', which of course picks up the ASCII value. */ + +#if 'a' == 0x81 /* Check for a real EBCDIC environment */ +#define ESCAPES_FIRST CHAR_a +#define ESCAPES_LAST CHAR_9 +#define UPPER_CASE(c) (c+64) +#else /* Testing in an ASCII environment */ +#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */ +#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */ +#define UPPER_CASE(c) (c-32) +#endif + +static const short int escapes[] = { +/* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0, +/* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0, +/* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p, +/* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0, +/* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0, +/* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0, +/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', +/* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, +/* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0, +/* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P, +/* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0, +/* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X, +/* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0, +/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* F8 */ 0, 0 +}; + +/* We also need a table of characters that may follow \c in an EBCDIC +environment for characters 0-31. */ + +static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; + +#endif /* EBCDIC */ + + +/* Table of special "verbs" like (*PRUNE). This is a short table, so it is +searched linearly. Put all the names into a single string, in order to reduce +the number of relocations when a shared library is dynamically linked. The +string is built from string macros so that it works in UTF-8 mode on EBCDIC +platforms. */ + +typedef struct verbitem { + unsigned int len; /* Length of verb name */ + uint32_t meta; /* Base META_ code */ + int has_arg; /* Argument requirement */ +} verbitem; + +static const char verbnames[] = + "\0" /* Empty name is a shorthand for MARK */ + STRING_MARK0 + STRING_ACCEPT0 + STRING_F0 + STRING_FAIL0 + STRING_COMMIT0 + STRING_PRUNE0 + STRING_SKIP0 + STRING_THEN; + +static const verbitem verbs[] = { + { 0, META_MARK, +1 }, /* > 0 => must have an argument */ + { 4, META_MARK, +1 }, + { 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */ + { 1, META_FAIL, -1 }, + { 4, META_FAIL, -1 }, + { 6, META_COMMIT, 0 }, + { 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */ + { 4, META_SKIP, 0 }, + { 4, META_THEN, 0 } +}; + +static const int verbcount = sizeof(verbs)/sizeof(verbitem); + +/* Verb opcodes, indexed by their META code offset from META_MARK. */ + +static const uint32_t verbops[] = { + OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE, + OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; + +/* Table of "alpha assertions" like (*pla:...), similar to the (*VERB) table. */ + +typedef struct alasitem { + unsigned int len; /* Length of name */ + uint32_t meta; /* Base META_ code */ +} alasitem; + +static const char alasnames[] = + STRING_pla0 + STRING_plb0 + STRING_napla0 + STRING_naplb0 + STRING_nla0 + STRING_nlb0 + STRING_positive_lookahead0 + STRING_positive_lookbehind0 + STRING_non_atomic_positive_lookahead0 + STRING_non_atomic_positive_lookbehind0 + STRING_negative_lookahead0 + STRING_negative_lookbehind0 + STRING_atomic0 + STRING_sr0 + STRING_asr0 + STRING_script_run0 + STRING_atomic_script_run; + +static const alasitem alasmeta[] = { + { 3, META_LOOKAHEAD }, + { 3, META_LOOKBEHIND }, + { 5, META_LOOKAHEAD_NA }, + { 5, META_LOOKBEHIND_NA }, + { 3, META_LOOKAHEADNOT }, + { 3, META_LOOKBEHINDNOT }, + { 18, META_LOOKAHEAD }, + { 19, META_LOOKBEHIND }, + { 29, META_LOOKAHEAD_NA }, + { 30, META_LOOKBEHIND_NA }, + { 18, META_LOOKAHEADNOT }, + { 19, META_LOOKBEHINDNOT }, + { 6, META_ATOMIC }, + { 2, META_SCRIPT_RUN }, /* sr = script run */ + { 3, META_ATOMIC_SCRIPT_RUN }, /* asr = atomic script run */ + { 10, META_SCRIPT_RUN }, /* script run */ + { 17, META_ATOMIC_SCRIPT_RUN } /* atomic script run */ +}; + +static const int alascount = sizeof(alasmeta)/sizeof(alasitem); + +/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */ + +static uint32_t chartypeoffset[] = { + OP_STAR - OP_STAR, OP_STARI - OP_STAR, + OP_NOTSTAR - OP_STAR, OP_NOTSTARI - OP_STAR }; + +/* Tables of names of POSIX character classes and their lengths. The names are +now all in a single string, to reduce the number of relocations when a shared +library is dynamically loaded. The list of lengths is terminated by a zero +length entry. The first three must be alpha, lower, upper, as this is assumed +for handling case independence. The indices for several classes are needed, so +identify them. */ + +static const char posix_names[] = + STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 + STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 + STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 + STRING_word0 STRING_xdigit; + +static const uint8_t posix_name_lengths[] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; + +#define PC_DIGIT 7 +#define PC_GRAPH 8 +#define PC_PRINT 9 +#define PC_PUNCT 10 +#define PC_XDIGIT 13 + +/* Table of class bit maps for each POSIX class. Each class is formed from a +base map, with an optional addition or removal of another map. Then, for some +classes, there is some additional tweaking: for [:blank:] the vertical space +characters are removed, and for [:alpha:] and [:alnum:] the underscore +character is removed. The triples in the table consist of the base map offset, +second map offset or -1 if no second map, and a non-negative value for map +addition or a negative value for map subtraction (if there are two maps). The +absolute value of the third field has these meanings: 0 => no tweaking, 1 => +remove vertical space characters, 2 => remove underscore. */ + +static const int posix_class_maps[] = { + cbit_word, cbit_digit, -2, /* alpha */ + cbit_lower, -1, 0, /* lower */ + cbit_upper, -1, 0, /* upper */ + cbit_word, -1, 2, /* alnum - word without underscore */ + cbit_print, cbit_cntrl, 0, /* ascii */ + cbit_space, -1, 1, /* blank - a GNU extension */ + cbit_cntrl, -1, 0, /* cntrl */ + cbit_digit, -1, 0, /* digit */ + cbit_graph, -1, 0, /* graph */ + cbit_print, -1, 0, /* print */ + cbit_punct, -1, 0, /* punct */ + cbit_space, -1, 0, /* space */ + cbit_word, -1, 0, /* word - a Perl extension */ + cbit_xdigit, -1, 0 /* xdigit */ +}; + +#ifdef SUPPORT_UNICODE + +/* The POSIX class Unicode property substitutes that are used in UCP mode must +be in the order of the POSIX class names, defined above. */ + +static int posix_substitutes[] = { + PT_GC, ucp_L, /* alpha */ + PT_PC, ucp_Ll, /* lower */ + PT_PC, ucp_Lu, /* upper */ + PT_ALNUM, 0, /* alnum */ + -1, 0, /* ascii, treat as non-UCP */ + -1, 1, /* blank, treat as \h */ + PT_PC, ucp_Cc, /* cntrl */ + PT_PC, ucp_Nd, /* digit */ + PT_PXGRAPH, 0, /* graph */ + PT_PXPRINT, 0, /* print */ + PT_PXPUNCT, 0, /* punct */ + PT_PXSPACE, 0, /* space */ /* Xps is POSIX space, but from 8.34 */ + PT_WORD, 0, /* word */ /* Perl and POSIX space are the same */ + PT_PXXDIGIT, 0 /* xdigit */ /* Perl has additional hex digits */ +}; +#define POSIX_SUBSIZE (sizeof(posix_substitutes) / (2*sizeof(uint32_t))) +#endif /* SUPPORT_UNICODE */ + +/* Masks for checking option settings. When PCRE2_LITERAL is set, only a subset +are allowed. */ + +#define PUBLIC_LITERAL_COMPILE_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_ENDANCHORED| \ + PCRE2_FIRSTLINE|PCRE2_LITERAL|PCRE2_MATCH_INVALID_UTF| \ + PCRE2_NO_START_OPTIMIZE|PCRE2_NO_UTF_CHECK|PCRE2_USE_OFFSET_LIMIT|PCRE2_UTF) + +#define PUBLIC_COMPILE_OPTIONS \ + (PUBLIC_LITERAL_COMPILE_OPTIONS| \ + PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ + PCRE2_ALT_VERBNAMES|PCRE2_DOLLAR_ENDONLY|PCRE2_DOTALL|PCRE2_DUPNAMES| \ + PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MATCH_UNSET_BACKREF| \ + PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \ + PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \ + PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY) + +#define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \ + (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT) + +#define PUBLIC_COMPILE_EXTRA_OPTIONS \ + (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \ + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \ + PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \ + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \ + PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \ + PCRE2_EXTRA_ASCII_DIGIT) + +/* Compile time error code numbers. They are given names so that they can more +easily be tracked. When a new number is added, the tables called eint1 and +eint2 in pcre2posix.c may need to be updated, and a new error text must be +added to compile_error_texts in pcre2_error.c. Also, the error codes in +pcre2.h.in must be updated - their values are exactly 100 greater than these +values. */ + +enum { ERR0 = COMPILE_ERROR_BASE, + ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, + ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, + ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, + ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, + ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, + ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, + ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, + ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, + ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, + ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, + ERR101 }; + +/* This is a table of start-of-pattern options such as (*UTF) and settings such +as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward +compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is +generic and always supported. */ + +enum { PSO_OPT, /* Value is an option bit */ + PSO_FLG, /* Value is a flag bit */ + PSO_NL, /* Value is a newline type */ + PSO_BSR, /* Value is a \R type */ + PSO_LIMH, /* Read integer value for heap limit */ + PSO_LIMM, /* Read integer value for match limit */ + PSO_LIMD /* Read integer value for depth limit */ + }; + +typedef struct pso { + const uint8_t *name; + uint16_t length; + uint16_t type; + uint32_t value; +} pso; + +/* NB: STRING_UTFn_RIGHTPAR contains the length as well */ + +static const pso pso_list[] = { + { (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, + { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, + { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, + { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, + { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, + { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, + { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR }, + { (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, + { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, + { (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 }, + { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, + { (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, + { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, + { (uint8_t *)STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, + { (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, + { (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, + { (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, + { (uint8_t *)STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL }, + { (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, + { (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, + { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } +}; + +/* This table is used when converting repeating opcodes into possessified +versions as a result of an explicit possessive quantifier such as ++. A zero +value means there is no possessified version - in those cases the item in +question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT +because all relevant opcodes are less than that. */ + +static const uint8_t opcode_possessify[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */ + + 0, /* NOTI */ + OP_POSSTAR, 0, /* STAR, MINSTAR */ + OP_POSPLUS, 0, /* PLUS, MINPLUS */ + OP_POSQUERY, 0, /* QUERY, MINQUERY */ + OP_POSUPTO, 0, /* UPTO, MINUPTO */ + 0, /* EXACT */ + 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */ + + OP_POSSTARI, 0, /* STARI, MINSTARI */ + OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */ + OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */ + OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */ + 0, /* EXACTI */ + 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */ + + OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */ + OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */ + OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */ + OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */ + 0, /* NOTEXACT */ + 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */ + + OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */ + OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */ + OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */ + OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */ + 0, /* NOTEXACTI */ + 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */ + + OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */ + OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */ + OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */ + OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */ + 0, /* TYPEEXACT */ + 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */ + + OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */ + OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */ + OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */ + OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ + 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ + + 0, 0, 0, /* CLASS, NCLASS, XCLASS */ + 0, 0, /* REF, REFI */ + 0, 0, /* DNREF, DNREFI */ + 0, 0 /* RECURSE, CALLOUT */ +}; + + +#ifdef DEBUG_SHOW_PARSED +/************************************************* +* Show the parsed pattern for debugging * +*************************************************/ + +/* For debugging the pre-scan, this code, which outputs the parsed data vector, +can be enabled. */ + +static void show_parsed(compile_block *cb) +{ +uint32_t *pptr = cb->parsed_pattern; + +for (;;) + { + int max, min; + PCRE2_SIZE offset; + uint32_t i; + uint32_t length; + uint32_t meta_arg = META_DATA(*pptr); + + fprintf(stderr, "+++ %02d %.8x ", (int)(pptr - cb->parsed_pattern), *pptr); + + if (*pptr < META_END) + { + if (*pptr > 32 && *pptr < 128) fprintf(stderr, "%c", *pptr); + pptr++; + } + + else switch (META_CODE(*pptr++)) + { + default: + fprintf(stderr, "**** OOPS - unknown META value - giving up ****\n"); + return; + + case META_END: + fprintf(stderr, "META_END\n"); + return; + + case META_CAPTURE: + fprintf(stderr, "META_CAPTURE %d", meta_arg); + break; + + case META_RECURSE: + GETOFFSET(offset, pptr); + fprintf(stderr, "META_RECURSE %d %zd", meta_arg, offset); + break; + + case META_BACKREF: + if (meta_arg < 10) + offset = cb->small_ref_offset[meta_arg]; + else + GETOFFSET(offset, pptr); + fprintf(stderr, "META_BACKREF %d %zd", meta_arg, offset); + break; + + case META_ESCAPE: + if (meta_arg == ESC_P || meta_arg == ESC_p) + { + uint32_t ptype = *pptr >> 16; + uint32_t pvalue = *pptr++ & 0xffff; + fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? 'P':'p', + ptype, pvalue); + } + else + { + uint32_t cc; + /* There's just one escape we might have here that isn't negated in the + escapes table. */ + if (meta_arg == ESC_g) cc = CHAR_g; + else for (cc = ESCAPES_FIRST; cc <= ESCAPES_LAST; cc++) + { + if (meta_arg == (uint32_t)(-escapes[cc - ESCAPES_FIRST])) break; + } + if (cc > ESCAPES_LAST) cc = CHAR_QUESTION_MARK; + fprintf(stderr, "META \\%c", cc); + } + break; + + case META_MINMAX: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}", min, max); + else + fprintf(stderr, "META {%d,}", min); + break; + + case META_MINMAX_QUERY: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}?", min, max); + else + fprintf(stderr, "META {%d,}?", min); + break; + + case META_MINMAX_PLUS: + min = *pptr++; + max = *pptr++; + if (max != REPEAT_UNLIMITED) + fprintf(stderr, "META {%d,%d}+", min, max); + else + fprintf(stderr, "META {%d,}+", min); + break; + + case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break; + case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break; + case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break; + case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break; + case META_DOT: fprintf(stderr, "META_DOT"); break; + case META_ASTERISK: fprintf(stderr, "META *"); break; + case META_ASTERISK_QUERY: fprintf(stderr, "META *?"); break; + case META_ASTERISK_PLUS: fprintf(stderr, "META *+"); break; + case META_PLUS: fprintf(stderr, "META +"); break; + case META_PLUS_QUERY: fprintf(stderr, "META +?"); break; + case META_PLUS_PLUS: fprintf(stderr, "META ++"); break; + case META_QUERY: fprintf(stderr, "META ?"); break; + case META_QUERY_QUERY: fprintf(stderr, "META ??"); break; + case META_QUERY_PLUS: fprintf(stderr, "META ?+"); break; + + case META_ATOMIC: fprintf(stderr, "META (?>"); break; + case META_NOCAPTURE: fprintf(stderr, "META (?:"); break; + case META_LOOKAHEAD: fprintf(stderr, "META (?="); break; + case META_LOOKAHEADNOT: fprintf(stderr, "META (?!"); break; + case META_LOOKAHEAD_NA: fprintf(stderr, "META (*napla:"); break; + case META_SCRIPT_RUN: fprintf(stderr, "META (*sr:"); break; + case META_KET: fprintf(stderr, "META )"); break; + case META_ALT: fprintf(stderr, "META | %d", meta_arg); break; + + case META_CLASS: fprintf(stderr, "META ["); break; + case META_CLASS_NOT: fprintf(stderr, "META [^"); break; + case META_CLASS_END: fprintf(stderr, "META ]"); break; + case META_CLASS_EMPTY: fprintf(stderr, "META []"); break; + case META_CLASS_EMPTY_NOT: fprintf(stderr, "META [^]"); break; + + case META_RANGE_LITERAL: fprintf(stderr, "META - (literal)"); break; + case META_RANGE_ESCAPED: fprintf(stderr, "META - (escaped)"); break; + + case META_POSIX: fprintf(stderr, "META_POSIX %d", *pptr++); break; + case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break; + + case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; + case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; + case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break; + case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break; + case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; + case META_THEN: fprintf(stderr, "META (*THEN)"); break; + + case META_OPTIONS: + fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]); + pptr += 2; + break; + + case META_LOOKBEHIND: + fprintf(stderr, "META (?<= %d %d", meta_arg, *pptr); + pptr += 2; + break; + + case META_LOOKBEHIND_NA: + fprintf(stderr, "META (*naplb: %d %d", meta_arg, *pptr); + pptr += 2; + break; + + case META_LOOKBEHINDNOT: + fprintf(stderr, "META (?="); + fprintf(stderr, "%d.", *pptr++); + fprintf(stderr, "%d)", *pptr++); + break; + + case META_COND_NAME: + fprintf(stderr, "META (?() length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_COND_RNAME: + fprintf(stderr, "META (?(R&name) length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + /* This is kept as a name, because it might be. */ + + case META_COND_RNUMBER: + fprintf(stderr, "META (?(Rnumber) length=%d offset=", *pptr++); + GETOFFSET(offset, pptr); + fprintf(stderr, "%zd", offset); + break; + + case META_MARK: + fprintf(stderr, "META (*MARK:"); + goto SHOWARG; + + case META_COMMIT_ARG: + fprintf(stderr, "META (*COMMIT:"); + goto SHOWARG; + + case META_PRUNE_ARG: + fprintf(stderr, "META (*PRUNE:"); + goto SHOWARG; + + case META_SKIP_ARG: + fprintf(stderr, "META (*SKIP:"); + goto SHOWARG; + + case META_THEN_ARG: + fprintf(stderr, "META (*THEN:"); + SHOWARG: + length = *pptr++; + for (i = 0; i < length; i++) + { + uint32_t cc = *pptr++; + if (cc > 32 && cc < 128) fprintf(stderr, "%c", cc); + else fprintf(stderr, "\\x{%x}", cc); + } + fprintf(stderr, ") length=%u", length); + break; + } + fprintf(stderr, "\n"); + } +return; +} +#endif /* DEBUG_SHOW_PARSED */ + + + +/************************************************* +* Copy compiled code * +*************************************************/ + +/* Compiled JIT code cannot be copied, so the new compiled block has no +associated JIT data. */ + +PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION +pcre2_code_copy(const pcre2_code *code) +{ +PCRE2_SIZE* ref_count; +pcre2_code *newcode; + +if (code == NULL) return NULL; +newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data); +if (newcode == NULL) return NULL; +memcpy(newcode, code, code->blocksize); +newcode->executable_jit = NULL; + +/* If the code is one that has been deserialized, increment the reference count +in the decoded tables. */ + +if ((code->flags & PCRE2_DEREF_TABLES) != 0) + { + ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH); + (*ref_count)++; + } + +return newcode; +} + + + +/************************************************* +* Copy compiled code and character tables * +*************************************************/ + +/* Compiled JIT code cannot be copied, so the new compiled block has no +associated JIT data. This version of code_copy also makes a separate copy of +the character tables. */ + +PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION +pcre2_code_copy_with_tables(const pcre2_code *code) +{ +PCRE2_SIZE* ref_count; +pcre2_code *newcode; +uint8_t *newtables; + +if (code == NULL) return NULL; +newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data); +if (newcode == NULL) return NULL; +memcpy(newcode, code, code->blocksize); +newcode->executable_jit = NULL; + +newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), + code->memctl.memory_data); +if (newtables == NULL) + { + code->memctl.free((void *)newcode, code->memctl.memory_data); + return NULL; + } +memcpy(newtables, code->tables, TABLES_LENGTH); +ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH); +*ref_count = 1; + +newcode->tables = newtables; +newcode->flags |= PCRE2_DEREF_TABLES; +return newcode; +} + + + +/************************************************* +* Free compiled code * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_code_free(pcre2_code *code) +{ +PCRE2_SIZE* ref_count; + +if (code != NULL) + { +#ifdef SUPPORT_JIT + if (code->executable_jit != NULL) + PRIV(jit_free)(code->executable_jit, &code->memctl); +#endif + + if ((code->flags & PCRE2_DEREF_TABLES) != 0) + { + /* Decoded tables belong to the codes after deserialization, and they must + be freed when there are no more references to them. The *ref_count should + always be > 0. */ + + ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH); + if (*ref_count > 0) + { + (*ref_count)--; + if (*ref_count == 0) + code->memctl.free((void *)code->tables, code->memctl.memory_data); + } + } + + code->memctl.free(code, code->memctl.memory_data); + } +} + + + +/************************************************* +* Read a number, possibly signed * +*************************************************/ + +/* This function is used to read numbers in the pattern. The initial pointer +must be at the sign or first digit of the number. When relative values +(introduced by + or -) are allowed, they are relative group numbers, and the +result must be greater than zero. + +Arguments: + ptrptr points to the character pointer variable + ptrend points to the end of the input string + allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this + max_value the largest number allowed + max_error the error to give for an over-large number + intptr where to put the result + errcodeptr where to put an error code + +Returns: TRUE - a number was read + FALSE - errorcode == 0 => no number was found + errorcode != 0 => an error occurred +*/ + +static BOOL +read_number(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, int32_t allow_sign, + uint32_t max_value, uint32_t max_error, int *intptr, int *errorcodeptr) +{ +int sign = 0; +uint32_t n = 0; +PCRE2_SPTR ptr = *ptrptr; +BOOL yield = FALSE; + +*errorcodeptr = 0; + +if (allow_sign >= 0 && ptr < ptrend) + { + if (*ptr == CHAR_PLUS) + { + sign = +1; + max_value -= allow_sign; + ptr++; + } + else if (*ptr == CHAR_MINUS) + { + sign = -1; + ptr++; + } + } + +if (ptr >= ptrend || !IS_DIGIT(*ptr)) return FALSE; +while (ptr < ptrend && IS_DIGIT(*ptr)) + { + n = n * 10 + *ptr++ - CHAR_0; + if (n > max_value) + { + *errorcodeptr = max_error; + goto EXIT; + } + } + +if (allow_sign >= 0 && sign != 0) + { + if (n == 0) + { + *errorcodeptr = ERR26; /* +0 and -0 are not allowed */ + goto EXIT; + } + + if (sign > 0) n += allow_sign; + else if ((int)n > allow_sign) + { + *errorcodeptr = ERR15; /* Non-existent subpattern */ + goto EXIT; + } + else n = allow_sign + 1 - n; + } + +yield = TRUE; + +EXIT: +*intptr = n; +*ptrptr = ptr; +return yield; +} + + + +/************************************************* +* Read repeat counts * +*************************************************/ + +/* Read an item of the form {n,m} and return the values when non-NULL pointers +are supplied. Repeat counts must be less than 65536 (MAX_REPEAT_COUNT); a +larger value is used for "unlimited". We have to use signed arguments for +read_number() because it is capable of returning a signed value. As of Perl +5.34.0 either n or m may be absent, but not both. Perl also allows spaces and +tabs after { and before } and between the numbers and the comma, so we do too. + +Arguments: + ptrptr points to pointer to character after '{' + ptrend pointer to end of input + minp if not NULL, pointer to int for min + maxp if not NULL, pointer to int for max + errorcodeptr points to error code variable + +Returns: FALSE if not a repeat quantifier, errorcode set zero + FALSE on error, with errorcode set non-zero + TRUE on success, with pointer updated to point after '}' +*/ + +static BOOL +read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, + uint32_t *maxp, int *errorcodeptr) +{ +PCRE2_SPTR p = *ptrptr; +PCRE2_SPTR pp; +BOOL yield = FALSE; +BOOL had_minimum = FALSE; +int32_t min = 0; +int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ + +*errorcodeptr = 0; +while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + +/* Check the syntax before interpreting. Otherwise, a non-quantifier sequence +such as "X{123456ABC" would incorrectly give a "number too big in quantifier" +error. */ + +pp = p; +if (pp < ptrend && IS_DIGIT(*pp)) + { + had_minimum = TRUE; + while (++pp < ptrend && IS_DIGIT(*pp)) {} + } + +while (pp < ptrend && (*pp == CHAR_SPACE || *pp == CHAR_HT)) pp++; +if (pp >= ptrend) return FALSE; + +if (*pp == CHAR_RIGHT_CURLY_BRACKET) + { + if (!had_minimum) return FALSE; + } +else + { + if (*pp++ != CHAR_COMMA) return FALSE; + while (pp < ptrend && (*pp == CHAR_SPACE || *pp == CHAR_HT)) pp++; + if (pp >= ptrend) return FALSE; + if (IS_DIGIT(*pp)) + { + while (++pp < ptrend && IS_DIGIT(*pp)) {} + } + else if (!had_minimum) return FALSE; + while (pp < ptrend && (*pp == CHAR_SPACE || *pp == CHAR_HT)) pp++; + if (pp >= ptrend || *pp != CHAR_RIGHT_CURLY_BRACKET) return FALSE; + } + +/* Now process the quantifier for real. We know it must be {n} or (n,} or {,m} +or {n,m}. The only error that read_number() can return is for a number that is +too big. If *errorcodeptr is returned as zero it means no number was found. */ + +/* Deal with {,m} or n too big. If we successfully read m there is no need to +check m >= n because n defaults to zero. */ + +if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) + { + if (*errorcodeptr != 0) goto EXIT; /* n too big */ + p++; /* Skip comma and subsequent spaces */ + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, errorcodeptr)) + { + if (*errorcodeptr != 0) goto EXIT; /* m too big */ + } + } + +/* Have read one number. Deal with {n} or {n,} or {n,m} */ + +else + { + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + if (*p == CHAR_RIGHT_CURLY_BRACKET) + { + max = min; + } + else /* Handle {n,} or {n,m} */ + { + p++; /* Skip comma and subsequent spaces */ + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, errorcodeptr)) + { + if (*errorcodeptr != 0) goto EXIT; /* m too big */ + } + + if (max < min) + { + *errorcodeptr = ERR4; + goto EXIT; + } + } + } + +/* Valid quantifier exists */ + +while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; +p++; +yield = TRUE; +if (minp != NULL) *minp = (uint32_t)min; +if (maxp != NULL) *maxp = (uint32_t)max; + +/* Update the pattern pointer */ + +EXIT: +*ptrptr = p; +return yield; +} + + + +/************************************************* +* Handle escapes * +*************************************************/ + +/* This function is called when a \ has been encountered. It either returns a +positive value for a simple escape such as \d, or 0 for a data character, which +is placed in chptr. A backreference to group n is returned as negative n. On +entry, ptr is pointing at the character after \. On exit, it points after the +final code unit of the escape sequence. + +This function is also called from pcre2_substitute() to handle escape sequences +in replacement strings. In this case, the cb argument is NULL, and in the case +of escapes that have further processing, only sequences that define a data +character are recognised. The isclass argument is not relevant; the options +argument is the final value of the compiled pattern's options. + +Arguments: + ptrptr points to the input position pointer + ptrend points to the end of the input + chptr points to a returned data character + errorcodeptr points to the errorcode variable (containing zero) + options the current options bits + xoptions the current extra options bits + isclass TRUE if inside a character class + cb compile data block or NULL when called from pcre2_substitute() + +Returns: zero => a data character + positive => a special escape sequence + negative => a numerical back reference + on error, errorcodeptr is set non-zero +*/ + +int +PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, + int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass, + compile_block *cb) +{ +BOOL utf = (options & PCRE2_UTF) != 0; +BOOL alt_bsux = + ((options & PCRE2_ALT_BSUX) | (xoptions & PCRE2_EXTRA_ALT_BSUX)) != 0; +PCRE2_SPTR ptr = *ptrptr; +uint32_t c, cc; +int escape = 0; +int i; + +/* If backslash is at the end of the string, it's an error. */ + +if (ptr >= ptrend) + { + *errorcodeptr = ERR1; + return 0; + } + +GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ +*errorcodeptr = 0; /* Be optimistic */ + +/* Non-alphanumerics are literals, so we just leave the value in c. An initial +value test saves a memory lookup for code points outside the alphanumeric +range. */ + +if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */ + +/* Otherwise, do a table lookup. Non-zero values need little processing here. A +positive value is a literal value for something like \n. A negative value is +the negation of one of the ESC_ macros that is passed back for handling by the +calling function. Some extra checking is needed for \N because only \N{U+dddd} +is supported. If the value is zero, further processing is handled below. */ + +else if ((i = escapes[c - ESCAPES_FIRST]) != 0) + { + if (i > 0) + { + c = (uint32_t)i; + if (c == CHAR_CR && (xoptions & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0) + c = CHAR_LF; + } + else /* Negative table entry */ + { + escape = -i; /* Else return a special escape */ + if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X)) + cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ + + /* Perl supports \N{name} for character names and \N{U+dddd} for numerical + Unicode code points, as well as plain \N for "not newline". PCRE does not + support \N{name}. However, it does support quantification such as \N{2,3}, + so if \N{ is not followed by U+dddd we check for a quantifier. */ + + if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + + /* Perl ignores spaces and tabs after { */ + + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + + /* \N{U+ can be handled by the \x{ code. However, this construction is + not valid in EBCDIC environments because it specifies a Unicode + character, not a codepoint in the local code. For example \N{U+0041} + must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode + casing semantics for the entire pattern, so allow it only in UTF (i.e. + Unicode) mode. */ + + if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) + { +#ifdef EBCDIC + *errorcodeptr = ERR93; +#else + if (utf) + { + ptr = p + 2; + escape = 0; /* Not a fancy escape after all */ + goto COME_FROM_NU; + } + else *errorcodeptr = ERR93; +#endif + } + + /* Give an error if what follows is not a quantifier, but don't override + an error set by the quantifier reader (e.g. number overflow). */ + + else + { + if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && + *errorcodeptr == 0) + *errorcodeptr = ERR37; + } + } + } + } + +/* Escapes that need further processing, including those that are unknown, have +a zero entry in the lookup table. When called from pcre2_substitute(), only \c, +\o, and \x are recognized (\u and \U can never appear as they are used for case +forcing). */ + +else + { + int s; + PCRE2_SPTR oldptr; + BOOL overflow; + + /* Filter calls from pcre2_substitute(). */ + + if (cb == NULL) + { + if (c != CHAR_c && c != CHAR_o && c != CHAR_x) + { + *errorcodeptr = ERR3; + return 0; + } + alt_bsux = FALSE; /* Do not modify \x handling */ + } + + switch (c) + { + /* A number of Perl escapes are not handled by PCRE. We give an explicit + error. */ + + case CHAR_F: + case CHAR_l: + case CHAR_L: + *errorcodeptr = ERR37; + break; + + /* \u is unrecognized when neither PCRE2_ALT_BSUX nor PCRE2_EXTRA_ALT_BSUX + is set. Otherwise, \u must be followed by exactly four hex digits or, if + PCRE2_EXTRA_ALT_BSUX is set, by any number of hex digits in braces. + Otherwise it is a lowercase u letter. This gives some compatibility with + ECMAScript (aka JavaScript). Unlike other braced items, white space is NOT + allowed. When \u{ is not followed by hex digits, a special return is given + because otherwise \u{ 12} (for example) would be treated as u{12}. */ + + case CHAR_u: + if (!alt_bsux) *errorcodeptr = ERR37; else + { + uint32_t xc; + + if (ptr >= ptrend) break; + if (*ptr == CHAR_LEFT_CURLY_BRACKET && + (xoptions & PCRE2_EXTRA_ALT_BSUX) != 0) + { + PCRE2_SPTR hptr = ptr + 1; + + cc = 0; + while (hptr < ptrend && (xc = XDIGIT(*hptr)) != 0xff) + { + if ((cc & 0xf0000000) != 0) /* Test for 32-bit overflow */ + { + *errorcodeptr = ERR77; + ptr = hptr; /* Show where */ + break; /* *hptr != } will cause another break below */ + } + cc = (cc << 4) | xc; + hptr++; + } + + if (hptr == ptr + 1 || /* No hex digits */ + hptr >= ptrend || /* Hit end of input */ + *hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */ + { + escape = ESC_ub; /* Special return */ + ptr++; /* Skip { */ + break; /* Hex escape not recognized */ + } + + c = cc; /* Accept the code point */ + ptr = hptr + 1; + } + + else /* Must be exactly 4 hex digits */ + { + if (ptrend - ptr < 4) break; /* Less than 4 chars */ + if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */ + if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ + cc = (cc << 4) | xc; + if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ + cc = (cc << 4) | xc; + if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */ + c = (cc << 4) | xc; + ptr += 4; + } + + if (utf) + { + if (c > 0x10ffffU) *errorcodeptr = ERR77; + else + if (c >= 0xd800 && c <= 0xdfff && + (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + *errorcodeptr = ERR73; + } + else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; + } + break; + + /* \U is unrecognized unless PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set, + in which case it is an upper case letter. */ + + case CHAR_U: + if (!alt_bsux) *errorcodeptr = ERR37; + break; + + /* In a character class, \g is just a literal "g". Outside a character + class, \g must be followed by one of a number of specific things: + + (1) A number, either plain or braced. If positive, it is an absolute + backreference. If negative, it is a relative backreference. This is a Perl + 5.10 feature. + + (2) Perl 5.10 also supports \g{name} as a reference to a named group. This + is part of Perl's movement towards a unified syntax for back references. As + this is synonymous with \k{name}, we fudge it up by pretending it really + was \k{name}. + + (3) For Oniguruma compatibility we also support \g followed by a name or a + number either in angle brackets or in single quotes. However, these are + (possibly recursive) subroutine calls, _not_ backreferences. We return + the ESC_g code. + + Summary: Return a negative number for a numerical back reference, ESC_k for + a named back reference, and ESC_g for a named or numbered subroutine call. + */ + + case CHAR_g: + if (isclass) break; + + if (ptr >= ptrend) + { + *errorcodeptr = ERR57; + break; + } + + if (*ptr == CHAR_LESS_THAN_SIGN || *ptr == CHAR_APOSTROPHE) + { + escape = ESC_g; + break; + } + + /* If there is a brace delimiter, try to read a numerical reference. If + there isn't one, assume we have a name and treat it as \k. */ + + if (*ptr == CHAR_LEFT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + if (!read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) escape = ESC_k; /* No number found */ + break; + } + while (p < ptrend && (*p == CHAR_SPACE || *p == CHAR_HT)) p++; + + if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + { + *errorcodeptr = ERR57; + break; + } + ptr = p + 1; + } + + /* Read an undelimited number */ + + else + { + if (!read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s, + errorcodeptr)) + { + if (*errorcodeptr == 0) *errorcodeptr = ERR57; /* No number found */ + break; + } + } + + if (s <= 0) + { + *errorcodeptr = ERR15; + break; + } + + escape = -s; + break; + + /* The handling of escape sequences consisting of a string of digits + starting with one that is not zero is not straightforward. Perl has changed + over the years. Nowadays \g{} for backreferences and \o{} for octal are + recommended to avoid the ambiguities in the old syntax. + + Outside a character class, the digits are read as a decimal number. If the + number is less than 10, or if there are that many previous extracting left + brackets, it is a back reference. Otherwise, up to three octal digits are + read to form an escaped character code. Thus \123 is likely to be octal 123 + (cf \0123, which is octal 012 followed by the literal 3). + + Inside a character class, \ followed by a digit is always either a literal + 8 or 9 or an octal number. */ + + case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: + case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: + + if (!isclass) + { + oldptr = ptr; + ptr--; /* Back to the digit */ + + /* As we know we are at a digit, the only possible error from + read_number() is a number that is too large to be a group number. In this + case we fall through handle this as not a group reference. If we have + read a small enough number, check for a back reference. + + \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + are octal escapes if there are not that many previous captures. */ + + if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && + (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) + { + if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; + else escape = -s; /* Indicates a back reference */ + break; + } + + ptr = oldptr; /* Put the pointer back and fall through */ + } + + /* Handle a digit following \ when the number is not a back reference, or + we are within a character class. If the first digit is 8 or 9, Perl used to + generate a binary zero and then treat the digit as a following literal. At + least by Perl 5.18 this changed so as not to insert the binary zero. */ + + if (c >= CHAR_8) break; + + /* Fall through */ + + /* \0 always starts an octal number, but we may drop through to here with a + larger first octal digit. The original code used just to take the least + significant 8 bits of octal numbers (I think this is what early Perls used + to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, + but no more than 3 octal digits. */ + + case CHAR_0: + c -= CHAR_0; + while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) + c = c * 8 + *ptr++ - CHAR_0; +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (!utf && c > 0xff) *errorcodeptr = ERR51; +#endif + break; + + /* \o is a relatively new Perl feature, supporting a more general way of + specifying character codes in octal. The only supported form is \o{ddd}, + with optional spaces or tabs after { and before }. */ + + case CHAR_o: + if (ptr >= ptrend || *ptr++ != CHAR_LEFT_CURLY_BRACKET) + { + ptr--; + *errorcodeptr = ERR55; + break; + } + + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + if (ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) + { + *errorcodeptr = ERR78; + break; + } + + c = 0; + overflow = FALSE; + while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) + { + cc = *ptr++; + if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x20000000l) { overflow = TRUE; break; } +#endif + c = (c << 3) + (cc - CHAR_0); +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } +#elif PCRE2_CODE_UNIT_WIDTH == 16 + if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } +#elif PCRE2_CODE_UNIT_WIDTH == 32 + if (utf && c > 0x10ffffU) { overflow = TRUE; break; } +#endif + } + + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + + if (overflow) + { + while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; + *errorcodeptr = ERR34; + } + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff && + (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + { + ptr--; + *errorcodeptr = ERR73; + } + } + else + { + ptr--; + *errorcodeptr = ERR64; + } + break; + + /* When PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set, \x must be followed + by two hexadecimal digits. Otherwise it is a lowercase x letter. */ + + case CHAR_x: + if (alt_bsux) + { + uint32_t xc; + if (ptrend - ptr < 2) break; /* Less than 2 characters */ + if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */ + if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ + c = (cc << 4) | xc; + ptr += 2; + } + + /* Handle \x in Perl's style. \x{ddd} is a character code which can be + greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex + digits. If not, { used to be treated as a data character. However, Perl + seems to read hex digits up to the first non-such, and ignore the rest, so + that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE + now gives an error. */ + + else + { + if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) + { + ptr++; + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + +#ifndef EBCDIC + COME_FROM_NU: +#endif + if (ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) + { + *errorcodeptr = ERR78; + break; + } + c = 0; + overflow = FALSE; + + while (ptr < ptrend && (cc = XDIGIT(*ptr)) != 0xff) + { + ptr++; + if (c == 0 && cc == 0) continue; /* Leading zeroes */ +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x10000000l) { overflow = TRUE; break; } +#endif + c = (c << 4) | cc; + if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR)) + { + overflow = TRUE; + break; + } + } + + /* Perl ignores spaces and tabs before } */ + + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + + /* On overflow, skip remaining hex digits */ + + if (overflow) + { + while (ptr < ptrend && XDIGIT(*ptr) != 0xff) ptr++; + *errorcodeptr = ERR34; + } + else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + { + if (utf && c >= 0xd800 && c <= 0xdfff && + (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) + { + ptr--; + *errorcodeptr = ERR73; + } + } + + /* If the sequence of hex digits (followed by optional space) does not + end with '}', give an error. We used just to recognize this construct + and fall through to the normal \x handling, but nowadays Perl gives an + error, which seems much more sensible, so we do too. */ + + else + { + ptr--; + *errorcodeptr = ERR67; + } + } /* End of \x{} processing */ + + /* Read a up to two hex digits after \x */ + + else + { + c = 0; + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + ptr++; + c = cc; + if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */ + ptr++; + c = (c << 4) | cc; + } /* End of \xdd handling */ + } /* End of Perl-style \x handling */ + break; + + /* The handling of \c is different in ASCII and EBCDIC environments. In an + ASCII (or Unicode) environment, an error is given if the character + following \c is not a printable ASCII character. Otherwise, the following + character is upper-cased if it is a letter, and after that the 0x40 bit is + flipped. The result is the value of the escape. + + In an EBCDIC environment the handling of \c is compatible with the + specification in the perlebcdic document. The following character must be + a letter or one of small number of special characters. These provide a + means of defining the character values 0-31. + + For testing the EBCDIC handling of \c in an ASCII environment, recognize + the EBCDIC value of 'c' explicitly. */ + +#if defined EBCDIC && 'a' != 0x81 + case 0x83: +#else + case CHAR_c: +#endif + if (ptr >= ptrend) + { + *errorcodeptr = ERR2; + break; + } + c = *ptr; + if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c); + + /* Handle \c in an ASCII/Unicode environment. */ + +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (c < 32 || c > 126) /* Excludes all non-printable ASCII */ + { + *errorcodeptr = ERR68; + break; + } + c ^= 0x40; + + /* Handle \c in an EBCDIC environment. The special case \c? is converted to + 255 (0xff) or 95 (0x5f) if other characters suggest we are using the + POSIX-BC encoding. (This is the way Perl indicates that it handles \c?.) + The other valid sequences correspond to a list of specific characters. */ + +#else + if (c == CHAR_QUESTION_MARK) + c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff; + else + { + for (i = 0; i < 32; i++) + { + if (c == ebcdic_escape_c[i]) break; + } + if (i < 32) c = i; else *errorcodeptr = ERR68; + } +#endif /* EBCDIC */ + + ptr++; + break; + + /* Any other alphanumeric following \ is an error. Perl gives an error only + if in warning mode, but PCRE doesn't have a warning mode. */ + + default: + *errorcodeptr = ERR3; + *ptrptr = ptr - 1; /* Point to the character at fault */ + return 0; + } + } + +/* Set the pointer to the next character before returning. */ + +*ptrptr = ptr; +*chptr = c; +return escape; +} + + + +#ifdef SUPPORT_UNICODE +/************************************************* +* Handle \P and \p * +*************************************************/ + +/* This function is called after \P or \p has been encountered, provided that +PCRE2 is compiled with support for UTF and Unicode properties. On entry, the +contents of ptrptr are pointing after the P or p. On exit, it is left pointing +after the final code unit of the escape sequence. + +Arguments: + ptrptr the pattern position pointer + negptr a boolean that is set TRUE for negation else FALSE + ptypeptr an unsigned int that is set to the type value + pdataptr an unsigned int that is set to the detailed property value + errorcodeptr the error code variable + cb the compile data + +Returns: TRUE if the type value was found, or FALSE for an invalid type +*/ + +static BOOL +get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr, + uint16_t *pdataptr, int *errorcodeptr, compile_block *cb) +{ +PCRE2_UCHAR c; +PCRE2_SIZE i, bot, top; +PCRE2_SPTR ptr = *ptrptr; +PCRE2_UCHAR name[50]; +PCRE2_UCHAR *vptr = NULL; +uint16_t ptscript = PT_NOTSCRIPT; + +if (ptr >= cb->end_pattern) goto ERROR_RETURN; +c = *ptr++; +*negptr = FALSE; + +/* \P or \p can be followed by a name in {}, optionally preceded by ^ for +negation. */ + +if (c == CHAR_LEFT_CURLY_BRACKET) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + + if (*ptr == CHAR_CIRCUMFLEX_ACCENT) + { + *negptr = TRUE; + ptr++; + } + + for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (c == '_' || c == '-' || (c <= 0xff && isspace(c))) +#else + while (c == '_' || c == '-' || isspace(c)) +#endif + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; + } + if (c == CHAR_NUL) goto ERROR_RETURN; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + name[i] = tolower(c); + if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; + } + + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; + name[i] = 0; + } + +/* If { doesn't follow \p or \P there is just one following character, which +must be an ASCII letter. */ + +else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) + { + name[0] = tolower(c); + name[1] = 0; + } +else goto ERROR_RETURN; + +*ptrptr = ptr; + +/* If the property contains ':' or '=' we have class name and value separately +specified. The following are supported: + + . Bidi_Class (synonym bc), for which the property names are "bidi". + . Script (synonym sc) for which the property name is the script name + . Script_Extensions (synonym scx), ditto + +As this is a small number, we currently just check the names directly. If this +grows, a sorted table and a switch will be neater. + +For both the script properties, set a PT_xxx value so that (1) they can be +distinguished and (2) invalid script names that happen to be the name of +another property can be diagnosed. */ + +if (vptr != NULL) + { + int offset = 0; + PCRE2_UCHAR sname[8]; + + *vptr = 0; /* Terminate property name */ + if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 || + PRIV(strcmp_c8)(name, STRING_bc) == 0) + { + offset = 4; + sname[0] = CHAR_b; + sname[1] = CHAR_i; /* There is no strcpy_c8 function */ + sname[2] = CHAR_d; + sname[3] = CHAR_i; + } + + else if (PRIV(strcmp_c8)(name, STRING_script) == 0 || + PRIV(strcmp_c8)(name, STRING_sc) == 0) + ptscript = PT_SC; + + else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 || + PRIV(strcmp_c8)(name, STRING_scx) == 0) + ptscript = PT_SCX; + + else + { + *errorcodeptr = ERR47; + return FALSE; + } + + /* Adjust the string in name[] as needed */ + + memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR)); + if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR)); + } + +/* Search for a recognized property using binary chop. */ + +bot = 0; +top = PRIV(utt_size); + +while (bot < top) + { + int r; + i = (bot + top) >> 1; + r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); + + /* When a matching property is found, some extra checking is needed when the + \p{xx:yy} syntax is used and xx is either sc or scx. */ + + if (r == 0) + { + *pdataptr = PRIV(utt)[i].value; + if (vptr == NULL || ptscript == PT_NOTSCRIPT) + { + *ptypeptr = PRIV(utt)[i].type; + return TRUE; + } + + switch (PRIV(utt)[i].type) + { + case PT_SC: + *ptypeptr = PT_SC; + return TRUE; + + case PT_SCX: + *ptypeptr = ptscript; + return TRUE; + } + + break; /* Non-script found */ + } + + if (r > 0) bot = i + 1; else top = i; + } + +*errorcodeptr = ERR47; /* Unrecognized property */ +return FALSE; + +ERROR_RETURN: /* Malformed \P or \p */ +*errorcodeptr = ERR46; +*ptrptr = ptr; +return FALSE; +} +#endif + + + +/************************************************* +* Check for POSIX class syntax * +*************************************************/ + +/* This function is called when the sequence "[:" or "[." or "[=" is +encountered in a character class. It checks whether this is followed by a +sequence of characters terminated by a matching ":]" or ".]" or "=]". If we +reach an unescaped ']' without the special preceding character, return FALSE. + +Originally, this function only recognized a sequence of letters between the +terminators, but it seems that Perl recognizes any sequence of characters, +though of course unknown POSIX names are subsequently rejected. Perl gives an +"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE +didn't consider this to be a POSIX class. Likewise for [:1234:]. + +The problem in trying to be exactly like Perl is in the handling of escapes. We +have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX +class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code +below handles the special cases \\ and \], but does not try to do any other +escape processing. This makes it different from Perl for cases such as +[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does +not recognize "l\ower". This is a lesser evil than not diagnosing bad classes +when Perl does, I think. + +A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. +It seems that the appearance of a nested POSIX class supersedes an apparent +external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or +a digit. This is handled by returning FALSE if the start of a new group with +the same terminator is encountered, since the next closing sequence must close +the nested group, not the outer one. + +In Perl, unescaped square brackets may also appear as part of class names. For +example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for +[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not +seem right at all. PCRE does not allow closing square brackets in POSIX class +names. + +Arguments: + ptr pointer to the character after the initial [ (colon, dot, equals) + ptrend pointer to the end of the pattern + endptr where to return a pointer to the terminating ':', '.', or '=' + +Returns: TRUE or FALSE +*/ + +static BOOL +check_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR ptrend, PCRE2_SPTR *endptr) +{ +PCRE2_UCHAR terminator; /* Don't combine these lines; the Solaris cc */ +terminator = *ptr++; /* compiler warns about "non-constant" initializer. */ + +for (; ptrend - ptr >= 2; ptr++) + { + if (*ptr == CHAR_BACKSLASH && + (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH)) + ptr++; + + else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || + *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; + + else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + { + *endptr = ptr; + return TRUE; + } + } + +return FALSE; +} + + + +/************************************************* +* Check POSIX class name * +*************************************************/ + +/* This function is called to check the name given in a POSIX-style class entry +such as [:alnum:]. + +Arguments: + ptr points to the first letter + len the length of the name + +Returns: a value representing the name, or -1 if unknown +*/ + +static int +check_posix_name(PCRE2_SPTR ptr, int len) +{ +const char *pn = posix_names; +int yield = 0; +while (posix_name_lengths[yield] != 0) + { + if (len == posix_name_lengths[yield] && + PRIV(strncmp_c8)(ptr, pn, (unsigned int)len) == 0) return yield; + pn += posix_name_lengths[yield] + 1; + yield++; + } +return -1; +} + + + +/************************************************* +* Read a subpattern or VERB name * +*************************************************/ + +/* This function is called from parse_regex() below whenever it needs to read +the name of a subpattern or a (*VERB) or an (*alpha_assertion). The initial +pointer must be to the preceding character. If that character is '*' we are +reading a verb or alpha assertion name. The pointer is updated to point after +the name, for a VERB or alpha assertion name, or after tha name's terminator +for a subpattern name. Returning both the offset and the name pointer is +redundant information, but some callers use one and some the other, so it is +simplest just to return both. When the name is in braces, spaces and tabs are +allowed (and ignored) at either end. + +Arguments: + ptrptr points to the character pointer variable + ptrend points to the end of the input string + utf true if the input is UTF-encoded + terminator the terminator of a subpattern name must be this + offsetptr where to put the offset from the start of the pattern + nameptr where to put a pointer to the name in the input + namelenptr where to put the length of the name + errcodeptr where to put an error code + cb pointer to the compile data block + +Returns: TRUE if a name was read + FALSE otherwise, with error code set +*/ + +static BOOL +read_name(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, BOOL utf, uint32_t terminator, + PCRE2_SIZE *offsetptr, PCRE2_SPTR *nameptr, uint32_t *namelenptr, + int *errorcodeptr, compile_block *cb) +{ +PCRE2_SPTR ptr = *ptrptr; +BOOL is_group = (*ptr++ != CHAR_ASTERISK); +BOOL is_braced = terminator == CHAR_RIGHT_CURLY_BRACKET; + +if (is_braced) + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + +if (ptr >= ptrend) /* No characters in name */ + { + *errorcodeptr = is_group? ERR62: /* Subpattern name expected */ + ERR60; /* Verb not recognized or malformed */ + goto FAILED; + } + +*nameptr = ptr; +*offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern); + +/* In UTF mode, a group name may contain letters and decimal digits as defined +by Unicode properties, and underscores, but must not start with a digit. */ + +#ifdef SUPPORT_UNICODE +if (utf && is_group) + { + uint32_t c, type; + + GETCHAR(c, ptr); + type = UCD_CHARTYPE(c); + + if (type == ucp_Nd) + { + *errorcodeptr = ERR44; + goto FAILED; + } + + for(;;) + { + if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L && + c != CHAR_UNDERSCORE) break; + ptr++; + FORWARDCHARTEST(ptr, ptrend); + if (ptr >= ptrend) break; + GETCHAR(c, ptr); + type = UCD_CHARTYPE(c); + } + } +else +#else +(void)utf; /* Avoid compiler warning */ +#endif /* SUPPORT_UNICODE */ + +/* Handle non-group names and group names in non-UTF modes. A group name must +not start with a digit. If either of the others start with a digit it just +won't be recognized. */ + + { + if (is_group && IS_DIGIT(*ptr)) + { + *errorcodeptr = ERR44; + goto FAILED; + } + + while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) + { + ptr++; + } + } + +/* Check name length */ + +if (ptr > *nameptr + MAX_NAME_SIZE) + { + *errorcodeptr = ERR48; + goto FAILED; + } +*namelenptr = (uint32_t)(ptr - *nameptr); + +/* Subpattern names must not be empty, and their terminator is checked here. +(What follows a verb or alpha assertion name is checked separately.) */ + +if (is_group) + { + if (ptr == *nameptr) + { + *errorcodeptr = ERR62; /* Subpattern name expected */ + goto FAILED; + } + if (is_braced) + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + if (ptr >= ptrend || *ptr != (PCRE2_UCHAR)terminator) + { + *errorcodeptr = ERR42; + goto FAILED; + } + ptr++; + } + +*ptrptr = ptr; +return TRUE; + +FAILED: +*ptrptr = ptr; +return FALSE; +} + + + +/************************************************* +* Manage callouts at start of cycle * +*************************************************/ + +/* At the start of a new item in parse_regex() we are able to record the +details of the previous item in a prior callout, and also to set up an +automatic callout if enabled. Avoid having two adjacent automatic callouts, +which would otherwise happen for items such as \Q that contribute nothing to +the parsed pattern. + +Arguments: + ptr current pattern pointer + pcalloutptr points to a pointer to previous callout, or NULL + auto_callout TRUE if auto_callouts are enabled + parsed_pattern the parsed pattern pointer + cb compile block + +Returns: possibly updated parsed_pattern pointer. +*/ + +static uint32_t * +manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, BOOL auto_callout, + uint32_t *parsed_pattern, compile_block *cb) +{ +uint32_t *previous_callout = *pcalloutptr; + +if (previous_callout != NULL) previous_callout[2] = (uint32_t)(ptr - + cb->start_pattern - (PCRE2_SIZE)previous_callout[1]); + +if (!auto_callout) previous_callout = NULL; else + { + if (previous_callout == NULL || + previous_callout != parsed_pattern - 4 || + previous_callout[3] != 255) + { + previous_callout = parsed_pattern; /* Set up new automatic callout */ + parsed_pattern += 4; + previous_callout[0] = META_CALLOUT_NUMBER; + previous_callout[2] = 0; + previous_callout[3] = 255; + } + previous_callout[1] = (uint32_t)(ptr - cb->start_pattern); + } + +*pcalloutptr = previous_callout; +return parsed_pattern; +} + + + +/************************************************* +* Handle \d, \D, \s, \S, \w, \W * +*************************************************/ + +/* This function is called from parse_regex() below, both for freestanding +escapes, and those within classes, to handle those escapes that may change when +Unicode property support is requested. Note that PCRE2_UCP will never be set +without Unicode support because that is checked when pcre2_compile() is called. + +Arguments: + escape the ESC_... value + parsed_pattern where to add the code + options options bits + xoptions extra options bits + +Returns: updated value of parsed_pattern +*/ +static uint32_t * +handle_escdsw(int escape, uint32_t *parsed_pattern, uint32_t options, + uint32_t xoptions) +{ +uint32_t ascii_option = 0; +uint32_t prop = ESC_p; + +switch(escape) + { + case ESC_D: + prop = ESC_P; + /* Fall through */ + case ESC_d: + ascii_option = PCRE2_EXTRA_ASCII_BSD; + break; + + case ESC_S: + prop = ESC_P; + /* Fall through */ + case ESC_s: + ascii_option = PCRE2_EXTRA_ASCII_BSS; + break; + + case ESC_W: + prop = ESC_P; + /* Fall through */ + case ESC_w: + ascii_option = PCRE2_EXTRA_ASCII_BSW; + break; + } + +if ((options & PCRE2_UCP) == 0 || (xoptions & ascii_option) != 0) + { + *parsed_pattern++ = META_ESCAPE + escape; + } +else + { + *parsed_pattern++ = META_ESCAPE + prop; + switch(escape) + { + case ESC_d: + case ESC_D: + *parsed_pattern++ = (PT_PC << 16) | ucp_Nd; + break; + + case ESC_s: + case ESC_S: + *parsed_pattern++ = PT_SPACE << 16; + break; + + case ESC_w: + case ESC_W: + *parsed_pattern++ = PT_WORD << 16; + break; + } + } + +return parsed_pattern; +} + + + +/************************************************* +* Parse regex and identify named groups * +*************************************************/ + +/* This function is called first of all. It scans the pattern and does two +things: (1) It identifies capturing groups and makes a table of named capturing +groups so that information about them is fully available to both the compiling +scans. (2) It writes a parsed version of the pattern with comments omitted and +escapes processed into the parsed_pattern vector. + +Arguments: + ptr points to the start of the pattern + options compiling dynamic options (may change during the scan) + has_lookbehind points to a boolean, set TRUE if a lookbehind is found + cb pointer to the compile data block + +Returns: zero on success or a non-zero error code, with the + error offset placed in the cb field +*/ + +/* A structure and some flags for dealing with nested groups. */ + +typedef struct nest_save { + uint16_t nest_depth; + uint16_t reset_group; + uint16_t max_group; + uint16_t flags; + uint32_t options; + uint32_t xoptions; +} nest_save; + +#define NSF_RESET 0x0001u +#define NSF_CONDASSERT 0x0002u +#define NSF_ATOMICSR 0x0004u + +/* Options that are changeable within the pattern must be tracked during +parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing, +but all must be tracked so that META_OPTIONS items set the correct values for +the main compiling phase. */ + +#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ + PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_UNGREEDY) + +#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \ + PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW| \ + PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_ASCII_POSIX) + +/* States used for analyzing ranges in character classes. The two OK values +must be last. */ + +enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; + +/* Only in 32-bit mode can there be literals > META_END. A macro encapsulates +the storing of literal values in the main parsed pattern, where they can always +be quantified. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +#define PARSED_LITERAL(c, p) \ + { \ + if (c >= META_END) *p++ = META_BIGVALUE; \ + *p++ = c; \ + okquantifier = TRUE; \ + } +#else +#define PARSED_LITERAL(c, p) *p++ = c; okquantifier = TRUE; +#endif + +/* Here's the actual function. */ + +static int parse_regex(PCRE2_SPTR ptr, uint32_t options, BOOL *has_lookbehind, + compile_block *cb) +{ +uint32_t c; +uint32_t delimiter; +uint32_t namelen; +uint32_t class_range_state; +uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */ +uint32_t *verbstartptr = NULL; +uint32_t *previous_callout = NULL; +uint32_t *parsed_pattern = cb->parsed_pattern; +uint32_t *parsed_pattern_end = cb->parsed_pattern_end; +uint32_t *this_parsed_item = NULL; +uint32_t *prev_parsed_item = NULL; +uint32_t meta_quantifier = 0; +uint32_t add_after_mark = 0; +uint32_t xoptions = cb->cx->extra_options; +uint16_t nest_depth = 0; +int after_manual_callout = 0; +int expect_cond_assert = 0; +int errorcode = 0; +int escape; +int i; +BOOL inescq = FALSE; +BOOL inverbname = FALSE; +BOOL utf = (options & PCRE2_UTF) != 0; +BOOL auto_callout = (options & PCRE2_AUTO_CALLOUT) != 0; +BOOL isdupname; +BOOL negate_class; +BOOL okquantifier = FALSE; +PCRE2_SPTR thisptr; +PCRE2_SPTR name; +PCRE2_SPTR ptrend = cb->end_pattern; +PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */ +named_group *ng; +nest_save *top_nest, *end_nests; + +/* Insert leading items for word and line matching (features provided for the +benefit of pcre2grep). */ + +if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) + { + *parsed_pattern++ = META_CIRCUMFLEX; + *parsed_pattern++ = META_NOCAPTURE; + } +else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) + { + *parsed_pattern++ = META_ESCAPE + ESC_b; + *parsed_pattern++ = META_NOCAPTURE; + } + +/* If the pattern is actually a literal string, process it separately to avoid +cluttering up the main loop. */ + +if ((options & PCRE2_LITERAL) != 0) + { + while (ptr < ptrend) + { + if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + thisptr = ptr; + GETCHARINCTEST(c, ptr); + if (auto_callout) + parsed_pattern = manage_callouts(thisptr, &previous_callout, + auto_callout, parsed_pattern, cb); + PARSED_LITERAL(c, parsed_pattern); + } + goto PARSED_END; + } + +/* Process a real regex which may contain meta-characters. */ + +top_nest = NULL; +end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size); + +/* The size of the nest_save structure might not be a factor of the size of the +workspace. Therefore we must round down end_nests so as to correctly avoid +creating a nest_save that spans the end of the workspace. */ + +end_nests = (nest_save *)((char *)end_nests - + ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save))); + +/* PCRE2_EXTENDED_MORE implies PCRE2_EXTENDED */ + +if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED; + +/* Now scan the pattern */ + +while (ptr < ptrend) + { + int prev_expect_cond_assert; + uint32_t min_repeat = 0, max_repeat = 0; + uint32_t set, unset, *optset; + uint32_t xset, xunset, *xoptset; + uint32_t terminator; + uint32_t prev_meta_quantifier; + BOOL prev_okquantifier; + PCRE2_SPTR tempptr; + PCRE2_SIZE offset; + + if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + + if (nest_depth > cb->cx->parens_nest_limit) + { + errorcode = ERR19; + goto FAILED; /* Parentheses too deeply nested */ + } + + /* If the last time round this loop something was added, parsed_pattern will + no longer be equal to this_parsed_item. Remember where the previous item + started and reset for the next item. Note that sometimes round the loop, + nothing gets added (e.g. for ignored white space). */ + + if (this_parsed_item != parsed_pattern) + { + prev_parsed_item = this_parsed_item; + this_parsed_item = parsed_pattern; + } + + /* Get next input character, save its position for callout handling. */ + + thisptr = ptr; + GETCHARINCTEST(c, ptr); + + /* Copy quoted literals until \E, allowing for the possibility of automatic + callouts, except when processing a (*VERB) "name". */ + + if (inescq) + { + if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E) + { + inescq = FALSE; + ptr++; /* Skip E */ + } + else + { + if (expect_cond_assert > 0) /* A literal is not allowed if we are */ + { /* expecting a conditional assertion, */ + ptr--; /* but an empty \Q\E sequence is OK. */ + errorcode = ERR28; + goto FAILED; + } + if (inverbname) + { /* Don't use PARSED_LITERAL() because it */ +#if PCRE2_CODE_UNIT_WIDTH == 32 /* sets okquantifier. */ + if (c >= META_END) *parsed_pattern++ = META_BIGVALUE; +#endif + *parsed_pattern++ = c; + } + else + { + if (after_manual_callout-- <= 0) + parsed_pattern = manage_callouts(thisptr, &previous_callout, + auto_callout, parsed_pattern, cb); + PARSED_LITERAL(c, parsed_pattern); + } + meta_quantifier = 0; + } + continue; /* Next character */ + } + + /* If we are processing the "name" part of a (*VERB:NAME) item, all + characters up to the closing parenthesis are literals except when + PCRE2_ALT_VERBNAMES is set. That causes backslash interpretation, but only \Q + and \E and escaped characters are allowed (no character types such as \d). If + PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do + this by not entering the special (*VERB:NAME) processing - they are then + picked up below. Note that c is a character, not a code unit, so we must not + use MAX_255 to test its size because MAX_255 tests code units and is assumed + TRUE in 8-bit mode. */ + + if (inverbname && + ( + /* EITHER: not both options set */ + ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != + (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || +#ifdef SUPPORT_UNICODE + /* OR: character > 255 AND not Unicode Pattern White Space */ + (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || +#endif + /* OR: not a # comment or isspace() white space */ + (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 +#ifdef SUPPORT_UNICODE + /* and not CHAR_NEL when Unicode is supported */ + && c != CHAR_NEL +#endif + ))) + { + PCRE2_SIZE verbnamelength; + + switch(c) + { + default: /* Don't use PARSED_LITERAL() because it */ +#if PCRE2_CODE_UNIT_WIDTH == 32 /* sets okquantifier. */ + if (c >= META_END) *parsed_pattern++ = META_BIGVALUE; +#endif + *parsed_pattern++ = c; + break; + + case CHAR_RIGHT_PARENTHESIS: + inverbname = FALSE; + /* This is the length in characters */ + verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1); + /* But the limit on the length is in code units */ + if (ptr - verbnamestart - 1 > (int)MAX_MARK) + { + ptr--; + errorcode = ERR76; + goto FAILED; + } + *verblengthptr = (uint32_t)verbnamelength; + + /* If this name was on a verb such as (*ACCEPT) which does not continue, + a (*MARK) was generated for the name. We now add the original verb as the + next item. */ + + if (add_after_mark != 0) + { + *parsed_pattern++ = add_after_mark; + add_after_mark = 0; + } + break; + + case CHAR_BACKSLASH: + if ((options & PCRE2_ALT_VERBNAMES) != 0) + { + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, + xoptions, FALSE, cb); + if (errorcode != 0) goto FAILED; + } + else escape = 0; /* Treat all as literal */ + + switch(escape) + { + case 0: /* Don't use PARSED_LITERAL() because it */ +#if PCRE2_CODE_UNIT_WIDTH == 32 /* sets okquantifier. */ + if (c >= META_END) *parsed_pattern++ = META_BIGVALUE; +#endif + *parsed_pattern++ = c; + break; + + case ESC_ub: + *parsed_pattern++ = CHAR_u; + PARSED_LITERAL(CHAR_LEFT_CURLY_BRACKET, parsed_pattern); + break; + + case ESC_Q: + inescq = TRUE; + break; + + case ESC_E: /* Ignore */ + break; + + default: + errorcode = ERR40; /* Invalid in verb name */ + goto FAILED; + } + } + continue; /* Next character in pattern */ + } + + /* Not a verb name character. At this point we must process everything that + must not change the quantification state. This is mainly comments, but we + handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as + A+, as in Perl. An isolated \E is ignored. */ + + if (c == CHAR_BACKSLASH && ptr < ptrend) + { + if (*ptr == CHAR_Q || *ptr == CHAR_E) + { + inescq = *ptr == CHAR_Q; + ptr++; + continue; + } + } + + /* Skip over whitespace and # comments in extended mode. Note that c is a + character, not a code unit, so we must not use MAX_255 to test its size + because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The + whitespace characters are those designated as "Pattern White Space" by + Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is + U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a + subset of space characters that match \h and \v. */ + + if ((options & PCRE2_EXTENDED) != 0) + { + if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; +#ifdef SUPPORT_UNICODE + if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; +#endif + if (c == CHAR_NUMBER_SIGN) + { + while (ptr < ptrend) + { + if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ + { /* IS_NEWLINE sets cb->nllen. */ + ptr += cb->nllen; + break; + } + ptr++; +#ifdef SUPPORT_UNICODE + if (utf) FORWARDCHARTEST(ptr, ptrend); +#endif + } + continue; /* Next character in pattern */ + } + } + + /* Skip over bracketed comments */ + + if (c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 2 && + ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) + { + while (++ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS); + if (ptr >= ptrend) + { + errorcode = ERR18; /* A special error for missing ) in a comment */ + goto FAILED; /* to make it easier to debug. */ + } + ptr++; + continue; /* Next character in pattern */ + } + + /* If the next item is not a quantifier, fill in length of any previous + callout and create an auto callout if required. */ + + if (c != CHAR_ASTERISK && c != CHAR_PLUS && c != CHAR_QUESTION_MARK && + (c != CHAR_LEFT_CURLY_BRACKET || + (tempptr = ptr, + !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode)))) + { + if (after_manual_callout-- <= 0) + { + parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout, + parsed_pattern, cb); + this_parsed_item = parsed_pattern; /* New start for current item */ + } + } + + /* If expect_cond_assert is 2, we have just passed (?( and are expecting an + assertion, possibly preceded by a callout. If the value is 1, we have just + had the callout and expect an assertion. There must be at least 3 more + characters in all cases. When expect_cond_assert is 2, we know that the + current character is an opening parenthesis, as otherwise we wouldn't be + here. However, when it is 1, we need to check, and it's easiest just to check + always. Note that expect_cond_assert may be negative, since all callouts just + decrement it. */ + + if (expect_cond_assert > 0) + { + BOOL ok = c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 3 && + (ptr[0] == CHAR_QUESTION_MARK || ptr[0] == CHAR_ASTERISK); + if (ok) + { + if (ptr[0] == CHAR_ASTERISK) /* New alpha assertion format, possibly */ + { + ok = MAX_255(ptr[1]) && (cb->ctypes[ptr[1]] & ctype_lcletter) != 0; + } + else switch(ptr[1]) /* Traditional symbolic format */ + { + case CHAR_C: + ok = expect_cond_assert == 2; + break; + + case CHAR_EQUALS_SIGN: + case CHAR_EXCLAMATION_MARK: + break; + + case CHAR_LESS_THAN_SIGN: + ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK; + break; + + default: + ok = FALSE; + } + } + + if (!ok) + { + ptr--; /* Adjust error offset */ + errorcode = ERR28; + goto FAILED; + } + } + + /* Remember whether we are expecting a conditional assertion, and set the + default for this item. */ + + prev_expect_cond_assert = expect_cond_assert; + expect_cond_assert = 0; + + /* Remember quantification status for the previous significant item, then set + default for this item. */ + + prev_okquantifier = okquantifier; + prev_meta_quantifier = meta_quantifier; + okquantifier = FALSE; + meta_quantifier = 0; + + /* If the previous significant item was a quantifier, adjust the parsed code + if there is a following modifier. The base meta value is always followed by + the PLUS and QUERY values, in that order. We do this here rather than after + reading a quantifier so that intervening comments and /x whitespace can be + ignored without having to replicate code. */ + + if (prev_meta_quantifier != 0 && (c == CHAR_QUESTION_MARK || c == CHAR_PLUS)) + { + parsed_pattern[(prev_meta_quantifier == META_MINMAX)? -3 : -1] = + prev_meta_quantifier + ((c == CHAR_QUESTION_MARK)? + 0x00020000u : 0x00010000u); + continue; /* Next character in pattern */ + } + + /* Process the next item in the main part of a pattern. */ + + switch(c) + { + default: /* Non-special character */ + PARSED_LITERAL(c, parsed_pattern); + break; + + + /* ---- Escape sequence ---- */ + + case CHAR_BACKSLASH: + tempptr = ptr; + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, + xoptions, FALSE, cb); + if (errorcode != 0) + { + ESCAPE_FAILED: + if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + goto FAILED; + ptr = tempptr; + if (ptr >= ptrend) c = CHAR_BACKSLASH; else + { + GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ + } + escape = 0; /* Treat as literal character */ + } + + /* The escape was a data escape or literal character. */ + + if (escape == 0) + { + PARSED_LITERAL(c, parsed_pattern); + } + + /* The escape was a back (or forward) reference. We keep the offset in + order to give a more useful diagnostic for a bad forward reference. For + references to groups numbered less than 10 we can't use more than two items + in parsed_pattern because they may be just two characters in the input (and + in a 64-bit world an offset may need two elements). So for them, the offset + of the first occurrent is held in a special vector. */ + + else if (escape < 0) + { + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1); + escape = -escape; + *parsed_pattern++ = META_BACKREF | (uint32_t)escape; + if (escape < 10) + { + if (cb->small_ref_offset[escape] == PCRE2_UNSET) + cb->small_ref_offset[escape] = offset; + } + else + { + PUTOFFSET(offset, parsed_pattern); + } + okquantifier = TRUE; + } + + /* The escape was a character class such as \d etc. or other special + escape indicator such as \A or \X. Most of them generate just a single + parsed item, but \P and \p are followed by a 16-bit type and a 16-bit + value. They are supported only when Unicode is available. The type and + value are packed into a single 32-bit value so that the whole sequences + uses only two elements in the parsed_vector. This is because the same + coding is used if \d (for example) is turned into \p{Nd} when PCRE2_UCP is + set. + + There are also some cases where the escape sequence is followed by a name: + \k{name}, \k, and \k'name' are backreferences by name, and \g + and \g'name' are subroutine calls by name; \g{name} is a synonym for + \k{name}. Note that \g and \g'number' are handled by check_escape() + and returned as a negative value (handled above). A name is coded as an + offset into the pattern and a length. */ + + else switch (escape) + { + case ESC_C: +#ifdef NEVER_BACKSLASH_C + errorcode = ERR85; + goto ESCAPE_FAILED; +#else + if ((options & PCRE2_NEVER_BACKSLASH_C) != 0) + { + errorcode = ERR83; + goto ESCAPE_FAILED; + } +#endif + okquantifier = TRUE; + *parsed_pattern++ = META_ESCAPE + escape; + break; + + /* This is a special return that happens only in EXTRA_ALT_BSUX mode, + when \u{ is not followed by hex digits and }. It requests two literal + characters, u and { and we need this, as otherwise \u{ 12} (for example) + would be treated as u{12} now that spaces are allowed in quantifiers. */ + + case ESC_ub: + *parsed_pattern++ = CHAR_u; + PARSED_LITERAL(CHAR_LEFT_CURLY_BRACKET, parsed_pattern); + break; + + case ESC_X: +#ifndef SUPPORT_UNICODE + errorcode = ERR45; /* Supported only with Unicode support */ + goto ESCAPE_FAILED; +#endif + case ESC_H: + case ESC_h: + case ESC_N: + case ESC_R: + case ESC_V: + case ESC_v: + okquantifier = TRUE; + *parsed_pattern++ = META_ESCAPE + escape; + break; + + default: /* \A, \B, \b, \G, \K, \Z, \z cannot be quantified. */ + *parsed_pattern++ = META_ESCAPE + escape; + break; + + /* Escapes that may change in UCP mode. */ + + case ESC_d: + case ESC_D: + case ESC_s: + case ESC_S: + case ESC_w: + case ESC_W: + okquantifier = TRUE; + parsed_pattern = handle_escdsw(escape, parsed_pattern, options, + xoptions); + break; + + /* Unicode property matching */ + + case ESC_P: + case ESC_p: +#ifdef SUPPORT_UNICODE + { + BOOL negated; + uint16_t ptype = 0, pdata = 0; + if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) + goto ESCAPE_FAILED; + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; + *parsed_pattern++ = META_ESCAPE + escape; + *parsed_pattern++ = (ptype << 16) | pdata; + okquantifier = TRUE; + } +#else + errorcode = ERR45; + goto ESCAPE_FAILED; +#endif + break; /* End \P and \p */ + + /* When \g is used with quotes or angle brackets as delimiters, it is a + numerical or named subroutine call, and control comes here. When used + with brace delimiters it is a numberical back reference and does not come + here because check_escape() returns it directly as a reference. \k is + always a named back reference. */ + + case ESC_g: + case ESC_k: + if (ptr >= ptrend || (*ptr != CHAR_LEFT_CURLY_BRACKET && + *ptr != CHAR_LESS_THAN_SIGN && *ptr != CHAR_APOSTROPHE)) + { + errorcode = (escape == ESC_g)? ERR57 : ERR69; + goto ESCAPE_FAILED; + } + terminator = (*ptr == CHAR_LESS_THAN_SIGN)? + CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? + CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; + + /* For a non-braced \g, check for a numerical recursion. */ + + if (escape == ESC_g && terminator != CHAR_RIGHT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + + if (read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, + &errorcode)) + { + if (p >= ptrend || *p != terminator) + { + errorcode = ERR57; + goto ESCAPE_FAILED; + } + ptr = p; + goto SET_RECURSION; + } + if (errorcode != 0) goto ESCAPE_FAILED; + } + + /* Not a numerical recursion. Perl allows spaces and tabs after { and + before } but not for other delimiters. */ + + if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto ESCAPE_FAILED; + + /* \k and \g when used with braces are back references, whereas \g used + with quotes or angle brackets is a recursion */ + + *parsed_pattern++ = + (escape == ESC_k || terminator == CHAR_RIGHT_CURLY_BRACKET)? + META_BACKREF_BYNAME : META_RECURSE_BYNAME; + *parsed_pattern++ = namelen; + + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; /* End special escape processing */ + } + break; /* End escape sequence processing */ + + + /* ---- Single-character special items ---- */ + + case CHAR_CIRCUMFLEX_ACCENT: + *parsed_pattern++ = META_CIRCUMFLEX; + break; + + case CHAR_DOLLAR_SIGN: + *parsed_pattern++ = META_DOLLAR; + break; + + case CHAR_DOT: + *parsed_pattern++ = META_DOT; + okquantifier = TRUE; + break; + + + /* ---- Single-character quantifiers ---- */ + + case CHAR_ASTERISK: + meta_quantifier = META_ASTERISK; + goto CHECK_QUANTIFIER; + + case CHAR_PLUS: + meta_quantifier = META_PLUS; + goto CHECK_QUANTIFIER; + + case CHAR_QUESTION_MARK: + meta_quantifier = META_QUERY; + goto CHECK_QUANTIFIER; + + + /* ---- Potential {n,m} quantifier ---- */ + + case CHAR_LEFT_CURLY_BRACKET: + if (!read_repeat_counts(&ptr, ptrend, &min_repeat, &max_repeat, + &errorcode)) + { + if (errorcode != 0) goto FAILED; /* Error in quantifier. */ + PARSED_LITERAL(c, parsed_pattern); /* Not a quantifier */ + break; /* No more quantifier processing */ + } + meta_quantifier = META_MINMAX; + /* Fall through */ + + + /* ---- Quantifier post-processing ---- */ + + /* Check that a quantifier is allowed after the previous item. This + guarantees that there is a previous item. */ + + CHECK_QUANTIFIER: + if (!prev_okquantifier) + { + errorcode = ERR9; + goto FAILED_BACK; + } + + /* Most (*VERB)s are not allowed to be quantified, but an ungreedy + quantifier can be useful for (*ACCEPT) - meaning "succeed on backtrack", a + sort of negated (*COMMIT). We therefore allow (*ACCEPT) to be quantified by + wrapping it in non-capturing brackets, but we have to allow for a preceding + (*MARK) for when (*ACCEPT) has an argument. */ + + if (*prev_parsed_item == META_ACCEPT) + { + uint32_t *p; + for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0]; + *verbstartptr = META_NOCAPTURE; + parsed_pattern[1] = META_KET; + parsed_pattern += 2; + } + + /* Now we can put the quantifier into the parsed pattern vector. At this + stage, we have only the basic quantifier. The check for a following + or ? + modifier happens at the top of the loop, after any intervening comments + have been removed. */ + + *parsed_pattern++ = meta_quantifier; + if (c == CHAR_LEFT_CURLY_BRACKET) + { + *parsed_pattern++ = min_repeat; + *parsed_pattern++ = max_repeat; + } + break; + + + /* ---- Character class ---- */ + + case CHAR_LEFT_SQUARE_BRACKET: + okquantifier = TRUE; + + /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is + used for "start of word" and "end of word". As these are otherwise illegal + sequences, we don't break anything by recognizing them. They are replaced + by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are + erroneous and are handled by the normal code below. */ + + if (ptrend - ptr >= 6 && + (PRIV(strncmp_c8)(ptr, STRING_WEIRD_STARTWORD, 6) == 0 || + PRIV(strncmp_c8)(ptr, STRING_WEIRD_ENDWORD, 6) == 0)) + { + *parsed_pattern++ = META_ESCAPE + ESC_b; + + if (ptr[2] == CHAR_LESS_THAN_SIGN) + { + *parsed_pattern++ = META_LOOKAHEAD; + } + else + { + *parsed_pattern++ = META_LOOKBEHIND; + *has_lookbehind = TRUE; + + /* The offset is used only for the "non-fixed length" error; this won't + occur here, so just store zero. */ + + PUTOFFSET((PCRE2_SIZE)0, parsed_pattern); + } + + if ((options & PCRE2_UCP) == 0) + *parsed_pattern++ = META_ESCAPE + ESC_w; + else + { + *parsed_pattern++ = META_ESCAPE + ESC_p; + *parsed_pattern++ = PT_WORD << 16; + } + *parsed_pattern++ = META_KET; + ptr += 6; + break; + } + + /* PCRE supports POSIX class stuff inside a class. Perl gives an error if + they are encountered at the top level, so we'll do that too. */ + + if (ptr < ptrend && (*ptr == CHAR_COLON || *ptr == CHAR_DOT || + *ptr == CHAR_EQUALS_SIGN) && + check_posix_syntax(ptr, ptrend, &tempptr)) + { + errorcode = (*ptr-- == CHAR_COLON)? ERR12 : ERR13; + goto FAILED; + } + + /* Process a regular character class. If the first character is '^', set + the negation flag. If the first few characters (either before or after ^) + are \Q\E or \E or space or tab in extended-more mode, we skip them too. + This makes for compatibility with Perl. */ + + negate_class = FALSE; + while (ptr < ptrend) + { + GETCHARINCTEST(c, ptr); + if (c == CHAR_BACKSLASH) + { + if (ptr < ptrend && *ptr == CHAR_E) ptr++; + else if (ptrend - ptr >= 3 && + PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0) + ptr += 3; + else + break; + } + else if ((options & PCRE2_EXTENDED_MORE) != 0 && + (c == CHAR_SPACE || c == CHAR_HT)) /* Note: just these two */ + continue; + else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) + negate_class = TRUE; + else break; + } + + /* Now the real contents of the class; c has the first "real" character. + Empty classes are permitted only if the option is set. */ + + if (c == CHAR_RIGHT_SQUARE_BRACKET && + (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) + { + *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY; + break; /* End of class processing */ + } + + /* Process a non-empty class. */ + + *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS; + class_range_state = RANGE_NO; + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range A-Z + (for example) would include the characters in the holes. This applies only + to ranges where both values are literal; [\xC1-\xE9] is different to [A-Z] + in this respect. In order to accommodate this, we keep track of whether + character values are literal or not, and a state variable for handling + ranges. */ + + /* Loop for the contents of the class */ + + for (;;) + { + BOOL char_is_literal = TRUE; + + /* Inside \Q...\E everything is literal except \E */ + + if (inescq) + { + if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E) + { + inescq = FALSE; /* Reset literal state */ + ptr++; /* Skip the 'E' */ + goto CLASS_CONTINUE; + } + goto CLASS_LITERAL; + } + + /* Skip over space and tab (only) in extended-more mode. */ + + if ((options & PCRE2_EXTENDED_MORE) != 0 && + (c == CHAR_SPACE || c == CHAR_HT)) + goto CLASS_CONTINUE; + + /* Handle POSIX class names. Perl allows a negation extension of the + form [:^name:]. A square bracket that doesn't match the syntax is + treated as a literal. We also recognize the POSIX constructions + [.ch.] and [=ch=] ("collating elements") and fault them, as Perl + 5.6 and 5.8 do. */ + + if (c == CHAR_LEFT_SQUARE_BRACKET && + ptrend - ptr >= 3 && + (*ptr == CHAR_COLON || *ptr == CHAR_DOT || + *ptr == CHAR_EQUALS_SIGN) && + check_posix_syntax(ptr, ptrend, &tempptr)) + { + BOOL posix_negate = FALSE; + int posix_class; + + /* Perl treats a hyphen before a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode. PCRE + does not have a warning mode, so we give an error, because this is + likely an error on the user's part. */ + + if (class_range_state == RANGE_STARTED) + { + errorcode = ERR50; + goto FAILED; + } + + if (*ptr != CHAR_COLON) + { + errorcode = ERR13; + goto FAILED_BACK; + } + + if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT) + { + posix_negate = TRUE; + ptr++; + } + + posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); + if (posix_class < 0) + { + errorcode = ERR30; + goto FAILED; + } + ptr = tempptr + 2; + + /* Perl treats a hyphen after a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode + unless the hyphen is the last character in the class. PCRE does not + have a warning mode, so we give an error, because this is likely an + error on the user's part. */ + + if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && + ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) + { + errorcode = ERR50; + goto FAILED; + } + + /* Set "a hyphen is not the start of a range" for the -] case, and also + in case the POSIX class is followed by \E or \Q\E (possibly repeated - + fuzzers do that kind of thing) and *then* a hyphen. This causes that + hyphen to be treated as a literal. I don't think it's worth setting up + special apparatus to do otherwise. */ + + class_range_state = RANGE_NO; + + /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some + of the POSIX classes are converted to use Unicode properties \p or \P + or, in one case, \h or \H. The substitutes table has two values per + class, containing the type and value of a \p or \P item. The special + cases are specified with a negative type: a non-zero value causes \h or + \H to be used, and a zero value falls through to behave like a non-UCP + POSIX class. There are now also some extra options that force ASCII for + some classes. */ + +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UCP) != 0 && + (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0 && + !((xoptions & PCRE2_EXTRA_ASCII_DIGIT) != 0 && + (posix_class == PC_DIGIT || posix_class == PC_XDIGIT))) + { + int ptype = posix_substitutes[2*posix_class]; + int pvalue = posix_substitutes[2*posix_class + 1]; + + if (ptype >= 0) + { + *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p); + *parsed_pattern++ = (ptype << 16) | pvalue; + goto CLASS_CONTINUE; + } + + if (pvalue != 0) + { + *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_H : ESC_h); + goto CLASS_CONTINUE; + } + + /* Fall through */ + } +#endif /* SUPPORT_UNICODE */ + + /* Non-UCP POSIX class */ + + *parsed_pattern++ = posix_negate? META_POSIX_NEG : META_POSIX; + *parsed_pattern++ = posix_class; + } + + /* Handle potential start of range */ + + else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED) + { + *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)? + META_RANGE_LITERAL : META_RANGE_ESCAPED; + class_range_state = RANGE_STARTED; + } + + /* Handle a literal character */ + + else if (c != CHAR_BACKSLASH) + { + CLASS_LITERAL: + if (class_range_state == RANGE_STARTED) + { + if (c == parsed_pattern[-2]) /* Optimize one-char range */ + parsed_pattern--; + else if (parsed_pattern[-2] > c) /* Check range is in order */ + { + errorcode = ERR8; + goto FAILED_BACK; + } + else + { + if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL) + parsed_pattern[-1] = META_RANGE_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + class_range_state = RANGE_NO; + } + else /* Potential start of range */ + { + class_range_state = char_is_literal? + RANGE_OK_LITERAL : RANGE_OK_ESCAPED; + PARSED_LITERAL(c, parsed_pattern); + } + } + + /* Handle escapes in a class */ + + else + { + tempptr = ptr; + escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, + xoptions, TRUE, cb); + + if (errorcode != 0) + { + if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0) + goto FAILED; + ptr = tempptr; + if (ptr >= ptrend) c = CHAR_BACKSLASH; else + { + GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ + } + escape = 0; /* Treat as literal character */ + } + + switch(escape) + { + case 0: /* Escaped character code point is in c */ + char_is_literal = FALSE; + goto CLASS_LITERAL; /* (a few lines above) */ + + case ESC_b: + c = CHAR_BS; /* \b is backspace in a class */ + char_is_literal = FALSE; + goto CLASS_LITERAL; + + case ESC_Q: + inescq = TRUE; /* Enter literal mode */ + goto CLASS_CONTINUE; + + case ESC_E: /* Ignore orphan \E */ + goto CLASS_CONTINUE; + + case ESC_B: /* Always an error in a class */ + case ESC_R: + case ESC_X: + errorcode = ERR7; + ptr--; + goto FAILED; + } + + /* The second part of a range can be a single-character escape + sequence (detected above), but not any of the other escapes. Perl + treats a hyphen as a literal in such circumstances. However, in Perl's + warning mode, a warning is given, so PCRE now faults it, as it is + almost certainly a mistake on the user's part. */ + + if (class_range_state == RANGE_STARTED) + { + errorcode = ERR50; + goto FAILED; /* Not CLASS_ESCAPE_FAILED; always an error */ + } + + /* Of the remaining escapes, only those that define characters are + allowed in a class. None may start a range. */ + + class_range_state = RANGE_NO; + switch(escape) + { + case ESC_N: + errorcode = ERR71; + goto FAILED; + + case ESC_H: + case ESC_h: + case ESC_V: + case ESC_v: + *parsed_pattern++ = META_ESCAPE + escape; + break; + + /* These escapes may be converted to Unicode property tests when + PCRE2_UCP is set. */ + + case ESC_d: + case ESC_D: + case ESC_s: + case ESC_S: + case ESC_w: + case ESC_W: + parsed_pattern = handle_escdsw(escape, parsed_pattern, options, + xoptions); + break; + + /* Explicit Unicode property matching */ + + case ESC_P: + case ESC_p: +#ifdef SUPPORT_UNICODE + { + BOOL negated; + uint16_t ptype = 0, pdata = 0; + if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb)) + goto FAILED; + if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P; + *parsed_pattern++ = META_ESCAPE + escape; + *parsed_pattern++ = (ptype << 16) | pdata; + } +#else + errorcode = ERR45; + goto FAILED; +#endif + break; /* End \P and \p */ + + default: /* All others are not allowed in a class */ + errorcode = ERR7; + ptr--; + goto FAILED; + } + + /* Perl gives a warning unless a following hyphen is the last character + in the class. PCRE throws an error. */ + + if (ptr < ptrend - 1 && *ptr == CHAR_MINUS && + ptr[1] != CHAR_RIGHT_SQUARE_BRACKET) + { + errorcode = ERR50; + goto FAILED; + } + } + + /* Proceed to next thing in the class. */ + + CLASS_CONTINUE: + if (ptr >= ptrend) + { + errorcode = ERR6; /* Missing terminating ']' */ + goto FAILED; + } + GETCHARINCTEST(c, ptr); + if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; + } /* End of class-processing loop */ + + /* -] at the end of a class is a literal '-' */ + + if (class_range_state == RANGE_STARTED) + { + parsed_pattern[-1] = CHAR_MINUS; + class_range_state = RANGE_NO; + } + + *parsed_pattern++ = META_CLASS_END; + break; /* End of character class */ + + + /* ---- Opening parenthesis ---- */ + + case CHAR_LEFT_PARENTHESIS: + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* If ( is not followed by ? it is either a capture or a special verb or an + alpha assertion or a positive non-atomic lookahead. */ + + if (*ptr != CHAR_QUESTION_MARK) + { + const char *vn; + + /* Handle capturing brackets (or non-capturing if auto-capture is turned + off). */ + + if (*ptr != CHAR_ASTERISK) + { + nest_depth++; + if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) + { + if (cb->bracount >= MAX_GROUP_NUMBER) + { + errorcode = ERR97; + goto FAILED; + } + cb->bracount++; + *parsed_pattern++ = META_CAPTURE | cb->bracount; + } + else *parsed_pattern++ = META_NOCAPTURE; + } + + /* Do nothing for (* followed by end of pattern or ) so it gives a "bad + quantifier" error rather than "(*MARK) must have an argument". */ + + else if (ptrend - ptr <= 1 || (c = ptr[1]) == CHAR_RIGHT_PARENTHESIS) + break; + + /* Handle "alpha assertions" such as (*pla:...). Most of these are + synonyms for the historical symbolic assertions, but the script run and + non-atomic lookaround ones are new. They are distinguished by starting + with a lower case letter. Checking both ends of the alphabet makes this + work in all character codes. */ + + else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0) + { + uint32_t meta; + + vn = alasnames; + if (!read_name(&ptr, ptrend, utf, 0, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + if (ptr >= ptrend || *ptr != CHAR_COLON) + { + errorcode = ERR95; /* Malformed */ + goto FAILED; + } + + /* Scan the table of alpha assertion names */ + + for (i = 0; i < alascount; i++) + { + if (namelen == alasmeta[i].len && + PRIV(strncmp_c8)(name, vn, namelen) == 0) + break; + vn += alasmeta[i].len + 1; + } + + if (i >= alascount) + { + errorcode = ERR95; /* Alpha assertion not recognized */ + goto FAILED; + } + + /* Check for expecting an assertion condition. If so, only atomic + lookaround assertions are valid. */ + + meta = alasmeta[i].meta; + if (prev_expect_cond_assert > 0 && + (meta < META_LOOKAHEAD || meta > META_LOOKBEHINDNOT)) + { + errorcode = (meta == META_LOOKAHEAD_NA || meta == META_LOOKBEHIND_NA)? + ERR98 : ERR28; /* (Atomic) assertion expected */ + goto FAILED; + } + + /* The lookaround alphabetic synonyms can mostly be handled by jumping + to the code that handles the traditional symbolic forms. */ + + switch(meta) + { + default: + errorcode = ERR89; /* Unknown code; should never occur because */ + goto FAILED; /* the meta values come from a table above. */ + + case META_ATOMIC: + goto ATOMIC_GROUP; + + case META_LOOKAHEAD: + goto POSITIVE_LOOK_AHEAD; + + case META_LOOKAHEAD_NA: + goto POSITIVE_NONATOMIC_LOOK_AHEAD; + + case META_LOOKAHEADNOT: + goto NEGATIVE_LOOK_AHEAD; + + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + case META_LOOKBEHIND_NA: + *parsed_pattern++ = meta; + ptr--; + goto POST_LOOKBEHIND; + + /* The script run facilities are handled here. Unicode support is + required (give an error if not, as this is a security issue). Always + record a META_SCRIPT_RUN item. Then, for the atomic version, insert + META_ATOMIC and remember that we need two META_KETs at the end. */ + + case META_SCRIPT_RUN: + case META_ATOMIC_SCRIPT_RUN: +#ifdef SUPPORT_UNICODE + *parsed_pattern++ = META_SCRIPT_RUN; + nest_depth++; + ptr++; + if (meta == META_ATOMIC_SCRIPT_RUN) + { + *parsed_pattern++ = META_ATOMIC; + if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); + else if (++top_nest >= end_nests) + { + errorcode = ERR84; + goto FAILED; + } + top_nest->nest_depth = nest_depth; + top_nest->flags = NSF_ATOMICSR; + top_nest->options = options & PARSE_TRACKED_OPTIONS; + top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + } + break; +#else /* SUPPORT_UNICODE */ + errorcode = ERR96; + goto FAILED; +#endif + } + } + + + /* ---- Handle (*VERB) and (*VERB:NAME) ---- */ + + else + { + vn = verbnames; + if (!read_name(&ptr, ptrend, utf, 0, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + if (ptr >= ptrend || (*ptr != CHAR_COLON && + *ptr != CHAR_RIGHT_PARENTHESIS)) + { + errorcode = ERR60; /* Malformed */ + goto FAILED; + } + + /* Scan the table of verb names */ + + for (i = 0; i < verbcount; i++) + { + if (namelen == verbs[i].len && + PRIV(strncmp_c8)(name, vn, namelen) == 0) + break; + vn += verbs[i].len + 1; + } + + if (i >= verbcount) + { + errorcode = ERR60; /* Verb not recognized */ + goto FAILED; + } + + /* An empty argument is treated as no argument. */ + + if (*ptr == CHAR_COLON && ptr + 1 < ptrend && + ptr[1] == CHAR_RIGHT_PARENTHESIS) + ptr++; /* Advance to the closing parens */ + + /* Check for mandatory non-empty argument; this is (*MARK) */ + + if (verbs[i].has_arg > 0 && *ptr != CHAR_COLON) + { + errorcode = ERR66; + goto FAILED; + } + + /* Remember where this verb, possibly with a preceding (*MARK), starts, + for handling quantified (*ACCEPT). */ + + verbstartptr = parsed_pattern; + okquantifier = (verbs[i].meta == META_ACCEPT); + + /* It appears that Perl allows any characters whatsoever, other than a + closing parenthesis, to appear in arguments ("names"), so we no longer + insist on letters, digits, and underscores. Perl does not, however, do + any interpretation within arguments, and has no means of including a + closing parenthesis. PCRE supports escape processing but only when it + is requested by an option. We set inverbname TRUE here, and let the + main loop take care of this so that escape and \x processing is done by + the main code above. */ + + if (*ptr++ == CHAR_COLON) /* Skip past : or ) */ + { + /* Some optional arguments can be treated as a preceding (*MARK) */ + + if (verbs[i].has_arg < 0) + { + add_after_mark = verbs[i].meta; + *parsed_pattern++ = META_MARK; + } + + /* The remaining verbs with arguments (except *MARK) need a different + opcode. */ + + else + { + *parsed_pattern++ = verbs[i].meta + + ((verbs[i].meta != META_MARK)? 0x00010000u:0); + } + + /* Set up for reading the name in the main loop. */ + + verblengthptr = parsed_pattern++; + verbnamestart = ptr; + inverbname = TRUE; + } + else /* No verb "name" argument */ + { + *parsed_pattern++ = verbs[i].meta; + } + } /* End of (*VERB) handling */ + break; /* Done with this parenthesis */ + } /* End of groups that don't start with (? */ + + + /* ---- Items starting (? ---- */ + + /* The type of item is determined by what follows (?. Handle (?| and option + changes under "default" because both need a new block on the nest stack. + Comments starting with (?# are handled above. Note that there is some + ambiguity about the sequence (?- because if a digit follows it's a relative + recursion or subroutine call whereas otherwise it's an option unsetting. */ + + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + switch(*ptr) + { + default: + if (*ptr == CHAR_MINUS && ptrend - ptr > 1 && IS_DIGIT(ptr[1])) + goto RECURSION_BYNUMBER; /* The + case is handled by CHAR_PLUS */ + + /* We now have either (?| or a (possibly empty) option setting, + optionally followed by a non-capturing group. */ + + nest_depth++; + if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); + else if (++top_nest >= end_nests) + { + errorcode = ERR84; + goto FAILED; + } + top_nest->nest_depth = nest_depth; + top_nest->flags = 0; + top_nest->options = options & PARSE_TRACKED_OPTIONS; + top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + + /* Start of non-capturing group that resets the capture count for each + branch. */ + + if (*ptr == CHAR_VERTICAL_LINE) + { + top_nest->reset_group = (uint16_t)cb->bracount; + top_nest->max_group = (uint16_t)cb->bracount; + top_nest->flags |= NSF_RESET; + cb->external_flags |= PCRE2_DUPCAPUSED; + *parsed_pattern++ = META_NOCAPTURE; + ptr++; + } + + /* Scan for options imnrsxJU to be set or unset. */ + + else + { + BOOL hyphenok = TRUE; + uint32_t oldoptions = options; + uint32_t oldxoptions = xoptions; + + top_nest->reset_group = 0; + top_nest->max_group = 0; + set = unset = 0; + optset = &set; + xset = xunset = 0; + xoptset = &xset; + + /* ^ at the start unsets irmnsx and disables the subsequent use of - */ + + if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT) + { + options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| + PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); + xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT); + hyphenok = FALSE; + ptr++; + } + + while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS && + *ptr != CHAR_COLON) + { + switch (*ptr++) + { + case CHAR_MINUS: + if (!hyphenok) + { + errorcode = ERR94; + ptr--; /* Correct the offset */ + goto FAILED; + } + optset = &unset; + xoptset = &xunset; + hyphenok = FALSE; + break; + + /* There are some two-character sequences that start with 'a'. */ + + case CHAR_a: + if (ptr < ptrend) + { + if (*ptr == CHAR_D) + { + *xoptset |= PCRE2_EXTRA_ASCII_BSD; + ptr++; + break; + } + if (*ptr == CHAR_P) + { + *xoptset |= (PCRE2_EXTRA_ASCII_POSIX|PCRE2_EXTRA_ASCII_DIGIT); + ptr++; + break; + } + if (*ptr == CHAR_S) + { + *xoptset |= PCRE2_EXTRA_ASCII_BSS; + ptr++; + break; + } + if (*ptr == CHAR_T) + { + *xoptset |= PCRE2_EXTRA_ASCII_DIGIT; + ptr++; + break; + } + if (*ptr == CHAR_W) + { + *xoptset |= PCRE2_EXTRA_ASCII_BSW; + ptr++; + break; + } + } + *xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| + PCRE2_EXTRA_ASCII_BSW| + PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_ASCII_POSIX; + break; + + case CHAR_J: /* Record that it changed in the external options */ + *optset |= PCRE2_DUPNAMES; + cb->external_flags |= PCRE2_JCHANGED; + break; + + case CHAR_i: *optset |= PCRE2_CASELESS; break; + case CHAR_m: *optset |= PCRE2_MULTILINE; break; + case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; + case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break; + case CHAR_s: *optset |= PCRE2_DOTALL; break; + case CHAR_U: *optset |= PCRE2_UNGREEDY; break; + + /* If x appears twice it sets the extended extended option. */ + + case CHAR_x: + *optset |= PCRE2_EXTENDED; + if (ptr < ptrend && *ptr == CHAR_x) + { + *optset |= PCRE2_EXTENDED_MORE; + ptr++; + } + break; + + default: + errorcode = ERR11; + ptr--; /* Correct the offset */ + goto FAILED; + } + } + + /* If we are setting extended without extended-more, ensure that any + existing extended-more gets unset. Also, unsetting extended must also + unset extended-more. */ + + if ((set & (PCRE2_EXTENDED|PCRE2_EXTENDED_MORE)) == PCRE2_EXTENDED || + (unset & PCRE2_EXTENDED) != 0) + unset |= PCRE2_EXTENDED_MORE; + + options = (options | set) & (~unset); + xoptions = (xoptions | xset) & (~xunset); + + /* If the options ended with ')' this is not the start of a nested + group with option changes, so the options change at this level. + In this case, if the previous level set up a nest block, discard the + one we have just created. Otherwise adjust it for the previous level. + If the options ended with ':' we are starting a non-capturing group, + possibly with an options setting. */ + + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + if (*ptr++ == CHAR_RIGHT_PARENTHESIS) + { + nest_depth--; /* This is not a nested group after all. */ + if (top_nest > (nest_save *)(cb->start_workspace) && + (top_nest-1)->nest_depth == nest_depth) top_nest--; + else top_nest->nest_depth = nest_depth; + } + else *parsed_pattern++ = META_NOCAPTURE; + + /* If nothing changed, no need to record. */ + + if (options != oldoptions || xoptions != oldxoptions) + { + *parsed_pattern++ = META_OPTIONS; + *parsed_pattern++ = options; + *parsed_pattern++ = xoptions; + } + } /* End options processing */ + break; /* End default case after (? */ + + + /* ---- Python syntax support ---- */ + + case CHAR_P: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* (?P is the same as (?, which defines a named group. */ + + if (*ptr == CHAR_LESS_THAN_SIGN) + { + terminator = CHAR_GREATER_THAN_SIGN; + goto DEFINE_NAME; + } + + /* (?P>name) is the same as (?&name), which is a recursion or subroutine + call. */ + + if (*ptr == CHAR_GREATER_THAN_SIGN) goto RECURSE_BY_NAME; + + /* (?P=name) is the same as \k, a back reference by name. Anything + else after (?P is an error. */ + + if (*ptr != CHAR_EQUALS_SIGN) + { + errorcode = ERR41; + goto FAILED; + } + if (!read_name(&ptr, ptrend, utf, CHAR_RIGHT_PARENTHESIS, &offset, &name, + &namelen, &errorcode, cb)) goto FAILED; + *parsed_pattern++ = META_BACKREF_BYNAME; + *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; /* End of (?P processing */ + + + /* ---- Recursion/subroutine calls by number ---- */ + + case CHAR_R: + i = 0; /* (?R) == (?R0) */ + ptr++; + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR58; + goto FAILED; + } + goto SET_RECURSION; + + /* An item starting (?- followed by a digit comes here via the "default" + case because (?- followed by a non-digit is an options setting. */ + + case CHAR_PLUS: + if (ptrend - ptr < 2 || !IS_DIGIT(ptr[1])) + { + errorcode = ERR29; /* Missing number */ + goto FAILED; + } + /* Fall through */ + + case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: + case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: + RECURSION_BYNUMBER: + if (!read_number(&ptr, ptrend, + (IS_DIGIT(*ptr))? -1:(int)(cb->bracount), /* + and - are relative */ + MAX_GROUP_NUMBER, ERR61, + &i, &errorcode)) goto FAILED; + if (i < 0) /* NB (?0) is permitted */ + { + errorcode = ERR15; /* Unknown group */ + goto FAILED_BACK; + } + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + goto UNCLOSED_PARENTHESIS; + + SET_RECURSION: + *parsed_pattern++ = META_RECURSE | (uint32_t)i; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern); + ptr++; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; /* End of recursive call by number handling */ + + + /* ---- Recursion/subroutine calls by name ---- */ + + case CHAR_AMPERSAND: + RECURSE_BY_NAME: + if (!read_name(&ptr, ptrend, utf, CHAR_RIGHT_PARENTHESIS, &offset, &name, + &namelen, &errorcode, cb)) goto FAILED; + *parsed_pattern++ = META_RECURSE_BYNAME; + *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + okquantifier = TRUE; + break; + + /* ---- Callout with numerical or string argument ---- */ + + case CHAR_C: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + + /* If the previous item was a condition starting (?(? an assertion, + optionally preceded by a callout, is expected. This is checked later on, + during actual compilation. However we need to identify this kind of + assertion in this pass because it must not be qualified. The value of + expect_cond_assert is set to 2 after (?(? is processed. We decrement it + for a callout - still leaving a positive value that identifies the + assertion. Multiple callouts or any other items will make it zero or + less, which doesn't matter because they will cause an error later. */ + + expect_cond_assert = prev_expect_cond_assert - 1; + + /* If previous_callout is not NULL, it means this follows a previous + callout. If it was a manual callout, do nothing; this means its "length + of next pattern item" field will remain zero. If it was an automatic + callout, abolish it. */ + + if (previous_callout != NULL && (options & PCRE2_AUTO_CALLOUT) != 0 && + previous_callout == parsed_pattern - 4 && + parsed_pattern[-1] == 255) + parsed_pattern = previous_callout; + + /* Save for updating next pattern item length, and skip one item before + completing. */ + + previous_callout = parsed_pattern; + after_manual_callout = 1; + + /* Handle a string argument; specific delimiter is required. */ + + if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) + { + PCRE2_SIZE calloutlength; + PCRE2_SPTR startptr = ptr; + + delimiter = 0; + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + { + if (*ptr == PRIV(callout_start_delims)[i]) + { + delimiter = PRIV(callout_end_delims)[i]; + break; + } + } + if (delimiter == 0) + { + errorcode = ERR82; + goto FAILED; + } + + *parsed_pattern = META_CALLOUT_STRING; + parsed_pattern += 3; /* Skip pattern info */ + + for (;;) + { + if (++ptr >= ptrend) + { + errorcode = ERR81; + ptr = startptr; /* To give a more useful message */ + goto FAILED; + } + if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter)) + break; + } + + calloutlength = (PCRE2_SIZE)(ptr - startptr); + if (calloutlength > UINT32_MAX) + { + errorcode = ERR72; + goto FAILED; + } + *parsed_pattern++ = (uint32_t)calloutlength; + offset = (PCRE2_SIZE)(startptr - cb->start_pattern); + PUTOFFSET(offset, parsed_pattern); + } + + /* Handle a callout with an optional numerical argument, which must be + less than or equal to 255. A missing argument gives 0. */ + + else + { + int n = 0; + *parsed_pattern = META_CALLOUT_NUMBER; /* Numerical callout */ + parsed_pattern += 3; /* Skip pattern info */ + while (ptr < ptrend && IS_DIGIT(*ptr)) + { + n = n * 10 + *ptr++ - CHAR_0; + if (n > 255) + { + errorcode = ERR38; + goto FAILED; + } + } + *parsed_pattern++ = n; + } + + /* Both formats must have a closing parenthesis */ + + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR39; + goto FAILED; + } + ptr++; + + /* Remember the offset to the next item in the pattern, and set a default + length. This should get updated after the next item is read. */ + + previous_callout[1] = (uint32_t)(ptr - cb->start_pattern); + previous_callout[2] = 0; + break; /* End callout */ + + + /* ---- Conditional group ---- */ + + /* A condition can be an assertion, a number (referring to a numbered + group's having been set), a name (referring to a named group), or 'R', + referring to overall recursion. R and R&name are also permitted + for recursion state tests. Numbers may be preceded by + or - to specify a + relative group number. + + There are several syntaxes for testing a named group: (?(name)) is used + by Python; Perl 5.10 onwards uses (?() or (?('name')). + + There are two unfortunate ambiguities. 'R' can be the recursive thing or + the name 'R' (and similarly for 'R' followed by digits). 'DEFINE' can be + the Perl DEFINE feature or the Python named test. We look for a name + first; if not found, we try the other case. + + For compatibility with auto-callouts, we allow a callout to be specified + before a condition that is an assertion. */ + + case CHAR_LEFT_PARENTHESIS: + if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + nest_depth++; + + /* If the next character is ? or * there must be an assertion next + (optionally preceded by a callout). We do not check this here, but + instead we set expect_cond_assert to 2. If this is still greater than + zero (callouts decrement it) when the next assertion is read, it will be + marked as a condition that must not be repeated. A value greater than + zero also causes checking that an assertion (possibly with callout) + follows. */ + + if (*ptr == CHAR_QUESTION_MARK || *ptr == CHAR_ASTERISK) + { + *parsed_pattern++ = META_COND_ASSERT; + ptr--; /* Pull pointer back to the opening parenthesis. */ + expect_cond_assert = 2; + break; /* End of conditional */ + } + + /* Handle (?([+-]number)... */ + + if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i, + &errorcode)) + { + if (i <= 0) + { + errorcode = ERR15; + goto FAILED; + } + *parsed_pattern++ = META_COND_NUMBER; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2); + PUTOFFSET(offset, parsed_pattern); + *parsed_pattern++ = i; + } + else if (errorcode != 0) goto FAILED; /* Number too big */ + + /* No number found. Handle the special case (?(VERSION[>]=n.m)... */ + + else if (ptrend - ptr >= 10 && + PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && + ptr[7] != CHAR_RIGHT_PARENTHESIS) + { + uint32_t ge = 0; + int major = 0; + int minor = 0; + + ptr += 7; + if (*ptr == CHAR_GREATER_THAN_SIGN) + { + ge = 1; + ptr++; + } + + /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT + references its argument twice. */ + + if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) + goto BAD_VERSION_CONDITION; + + if (!read_number(&ptr, ptrend, -1, 1000, ERR79, &major, &errorcode)) + goto FAILED; + + if (ptr >= ptrend) goto BAD_VERSION_CONDITION; + if (*ptr == CHAR_DOT) + { + if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; + minor = (*ptr++ - CHAR_0) * 10; + if (ptr >= ptrend) goto BAD_VERSION_CONDITION; + if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0; + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + goto BAD_VERSION_CONDITION; + } + + *parsed_pattern++ = META_COND_VERSION; + *parsed_pattern++ = ge; + *parsed_pattern++ = major; + *parsed_pattern++ = minor; + } + + /* All the remaining cases now require us to read a name. We cannot at + this stage distinguish ambiguous cases such as (?(R12) which might be a + recursion test by number or a name, because the named groups have not yet + all been identified. Those cases are treated as names, but given a + different META code. */ + + else + { + BOOL was_r_ampersand = FALSE; + + if (*ptr == CHAR_R && ptrend - ptr > 1 && ptr[1] == CHAR_AMPERSAND) + { + terminator = CHAR_RIGHT_PARENTHESIS; + was_r_ampersand = TRUE; + ptr++; + } + else if (*ptr == CHAR_LESS_THAN_SIGN) + terminator = CHAR_GREATER_THAN_SIGN; + else if (*ptr == CHAR_APOSTROPHE) + terminator = CHAR_APOSTROPHE; + else + { + terminator = CHAR_RIGHT_PARENTHESIS; + ptr--; /* Point to char before name */ + } + if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + + /* Handle (?(R&name) */ + + if (was_r_ampersand) + { + *parsed_pattern = META_COND_RNAME; + ptr--; /* Back to closing parens */ + } + + /* Handle (?(name). If the name is "DEFINE" we identify it with a + special code. Likewise if the name consists of R followed only by + digits. Otherwise, handle it like a quoted name. */ + + else if (terminator == CHAR_RIGHT_PARENTHESIS) + { + if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) + *parsed_pattern = META_COND_DEFINE; + else + { + for (i = 1; i < (int)namelen; i++) + if (!IS_DIGIT(name[i])) break; + *parsed_pattern = (*name == CHAR_R && i >= (int)namelen)? + META_COND_RNUMBER : META_COND_NAME; + } + ptr--; /* Back to closing parens */ + } + + /* Handle (?('name') or (?() */ + + else *parsed_pattern = META_COND_NAME; + + /* All these cases except DEFINE end with the name length and offset; + DEFINE just has an offset (for the "too many branches" error). */ + + if (*parsed_pattern++ != META_COND_DEFINE) *parsed_pattern++ = namelen; + PUTOFFSET(offset, parsed_pattern); + } /* End cases that read a name */ + + /* Check the closing parenthesis of the condition */ + + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR24; + goto FAILED; + } + ptr++; + break; /* End of condition processing */ + + + /* ---- Atomic group ---- */ + + case CHAR_GREATER_THAN_SIGN: + ATOMIC_GROUP: /* Come from (*atomic: */ + *parsed_pattern++ = META_ATOMIC; + nest_depth++; + ptr++; + break; + + + /* ---- Lookahead assertions ---- */ + + case CHAR_EQUALS_SIGN: + POSITIVE_LOOK_AHEAD: /* Come from (*pla: */ + *parsed_pattern++ = META_LOOKAHEAD; + ptr++; + goto POST_ASSERTION; + + case CHAR_ASTERISK: + POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */ + *parsed_pattern++ = META_LOOKAHEAD_NA; + ptr++; + goto POST_ASSERTION; + + case CHAR_EXCLAMATION_MARK: + NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */ + *parsed_pattern++ = META_LOOKAHEADNOT; + ptr++; + goto POST_ASSERTION; + + + /* ---- Lookbehind assertions ---- */ + + /* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?< + is the start of the name of a capturing group. */ + + case CHAR_LESS_THAN_SIGN: + if (ptrend - ptr <= 1 || + (ptr[1] != CHAR_EQUALS_SIGN && + ptr[1] != CHAR_EXCLAMATION_MARK && + ptr[1] != CHAR_ASTERISK)) + { + terminator = CHAR_GREATER_THAN_SIGN; + goto DEFINE_NAME; + } + *parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)? + META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)? + META_LOOKBEHINDNOT : META_LOOKBEHIND_NA; + + POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */ + *has_lookbehind = TRUE; + offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2); + PUTOFFSET(offset, parsed_pattern); + ptr += 2; + /* Fall through */ + + /* If the previous item was a condition starting (?(? an assertion, + optionally preceded by a callout, is expected. This is checked later on, + during actual compilation. However we need to identify this kind of + assertion in this pass because it must not be qualified. The value of + expect_cond_assert is set to 2 after (?(? is processed. We decrement it + for a callout - still leaving a positive value that identifies the + assertion. Multiple callouts or any other items will make it zero or + less, which doesn't matter because they will cause an error later. */ + + POST_ASSERTION: + nest_depth++; + if (prev_expect_cond_assert > 0) + { + if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); + else if (++top_nest >= end_nests) + { + errorcode = ERR84; + goto FAILED; + } + top_nest->nest_depth = nest_depth; + top_nest->flags = NSF_CONDASSERT; + top_nest->options = options & PARSE_TRACKED_OPTIONS; + top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS; + } + break; + + + /* ---- Define a named group ---- */ + + /* A named group may be defined as (?'name') or (?). In the latter + case we jump to DEFINE_NAME from the disambiguation of (?< above with the + terminator set to '>'. */ + + case CHAR_APOSTROPHE: + terminator = CHAR_APOSTROPHE; /* Terminator */ + + DEFINE_NAME: + if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen, + &errorcode, cb)) goto FAILED; + + /* We have a name for this capturing group. It is also assigned a number, + which is its primary means of identification. */ + + if (cb->bracount >= MAX_GROUP_NUMBER) + { + errorcode = ERR97; + goto FAILED; + } + cb->bracount++; + *parsed_pattern++ = META_CAPTURE | cb->bracount; + nest_depth++; + + /* Check not too many names */ + + if (cb->names_found >= MAX_NAME_COUNT) + { + errorcode = ERR49; + goto FAILED; + } + + /* Adjust the entry size to accommodate the longest name found. */ + + if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) + cb->name_entry_size = (uint16_t)(namelen + IMM2_SIZE + 1); + + /* Scan the list to check for duplicates. For duplicate names, if the + number is the same, break the loop, which causes the name to be + discarded; otherwise, if DUPNAMES is not set, give an error. + If it is set, allow the name with a different number, but continue + scanning in case this is a duplicate with the same number. For + non-duplicate names, give an error if the number is duplicated. */ + + isdupname = FALSE; + ng = cb->named_groups; + for (i = 0; i < cb->names_found; i++, ng++) + { + if (namelen == ng->length && + PRIV(strncmp)(name, ng->name, (PCRE2_SIZE)namelen) == 0) + { + if (ng->number == cb->bracount) break; + if ((options & PCRE2_DUPNAMES) == 0) + { + errorcode = ERR43; + goto FAILED; + } + isdupname = ng->isdup = TRUE; /* Mark as a duplicate */ + cb->dupnames = TRUE; /* Duplicate names exist */ + } + else if (ng->number == cb->bracount) + { + errorcode = ERR65; + goto FAILED; + } + } + + if (i < cb->names_found) break; /* Ignore duplicate with same number */ + + /* Increase the list size if necessary */ + + if (cb->names_found >= cb->named_group_list_size) + { + uint32_t newsize = cb->named_group_list_size * 2; + named_group *newspace = + cb->cx->memctl.malloc(newsize * sizeof(named_group), + cb->cx->memctl.memory_data); + if (newspace == NULL) + { + errorcode = ERR21; + goto FAILED; + } + + memcpy(newspace, cb->named_groups, + cb->named_group_list_size * sizeof(named_group)); + if (cb->named_group_list_size > NAMED_GROUP_LIST_SIZE) + cb->cx->memctl.free((void *)cb->named_groups, + cb->cx->memctl.memory_data); + cb->named_groups = newspace; + cb->named_group_list_size = newsize; + } + + /* Add this name to the list */ + + cb->named_groups[cb->names_found].name = name; + cb->named_groups[cb->names_found].length = (uint16_t)namelen; + cb->named_groups[cb->names_found].number = cb->bracount; + cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname; + cb->names_found++; + break; + } /* End of (? switch */ + break; /* End of ( handling */ + + + /* ---- Branch terminators ---- */ + + /* Alternation: reset the capture count if we are in a (?| group. */ + + case CHAR_VERTICAL_LINE: + if (top_nest != NULL && top_nest->nest_depth == nest_depth && + (top_nest->flags & NSF_RESET) != 0) + { + if (cb->bracount > top_nest->max_group) + top_nest->max_group = (uint16_t)cb->bracount; + cb->bracount = top_nest->reset_group; + } + *parsed_pattern++ = META_ALT; + break; + + /* End of group; reset the capture count to the maximum if we are in a (?| + group and/or reset the options that are tracked during parsing. Disallow + quantifier for a condition that is an assertion. */ + + case CHAR_RIGHT_PARENTHESIS: + okquantifier = TRUE; + if (top_nest != NULL && top_nest->nest_depth == nest_depth) + { + options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; + xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions; + if ((top_nest->flags & NSF_RESET) != 0 && + top_nest->max_group > cb->bracount) + cb->bracount = top_nest->max_group; + if ((top_nest->flags & NSF_CONDASSERT) != 0) + okquantifier = FALSE; + + if ((top_nest->flags & NSF_ATOMICSR) != 0) + { + *parsed_pattern++ = META_KET; + } + + if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; + else top_nest--; + } + if (nest_depth == 0) /* Unmatched closing parenthesis */ + { + errorcode = ERR22; + goto FAILED_BACK; + } + nest_depth--; + *parsed_pattern++ = META_KET; + break; + } /* End of switch on pattern character */ + } /* End of main character scan loop */ + +/* End of pattern reached. Check for missing ) at the end of a verb name. */ + +if (inverbname && ptr >= ptrend) + { + errorcode = ERR60; + goto FAILED; + } + +/* Manage callout for the final item */ + +PARSED_END: +parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout, + parsed_pattern, cb); + +/* Insert trailing items for word and line matching (features provided for the +benefit of pcre2grep). */ + +if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0) + { + *parsed_pattern++ = META_KET; + *parsed_pattern++ = META_DOLLAR; + } +else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0) + { + *parsed_pattern++ = META_KET; + *parsed_pattern++ = META_ESCAPE + ESC_b; + } + +/* Terminate the parsed pattern, then return success if all groups are closed. +Otherwise we have unclosed parentheses. */ + +if (parsed_pattern >= parsed_pattern_end) + { + errorcode = ERR63; /* Internal error (parsed pattern overflow) */ + goto FAILED; + } + +*parsed_pattern = META_END; +if (nest_depth == 0) return 0; + +UNCLOSED_PARENTHESIS: +errorcode = ERR14; + +/* Come here for all failures. */ + +FAILED: +cb->erroroffset = (PCRE2_SIZE)(ptr - cb->start_pattern); +return errorcode; + +/* Some errors need to indicate the previous character. */ + +FAILED_BACK: +ptr--; +goto FAILED; + +/* This failure happens several times. */ + +BAD_VERSION_CONDITION: +errorcode = ERR79; +goto FAILED; +} + + + +/************************************************* +* Find first significant opcode * +*************************************************/ + +/* This is called by several functions that scan a compiled expression looking +for a fixed first character, or an anchoring opcode etc. It skips over things +that do not influence this. For some calls, it makes sense to skip negative +forward and all backward assertions, and also the \b assertion; for others it +does not. + +Arguments: + code pointer to the start of the group + skipassert TRUE if certain assertions are to be skipped + +Returns: pointer to the first significant opcode +*/ + +static const PCRE2_UCHAR* +first_significant_code(PCRE2_SPTR code, BOOL skipassert) +{ +for (;;) + { + switch ((int)*code) + { + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERTBACK_NA: + if (!skipassert) return code; + do code += GET(code, 1); while (*code == OP_ALT); + code += PRIV(OP_lengths)[*code]; + break; + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + if (!skipassert) return code; + /* Fall through */ + + case OP_CALLOUT: + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FALSE: + case OP_TRUE: + code += PRIV(OP_lengths)[*code]; + break; + + case OP_CALLOUT_STR: + code += GET(code, 1 + 2*LINK_SIZE); + break; + + case OP_SKIPZERO: + code += 2 + GET(code, 2) + LINK_SIZE; + break; + + case OP_COND: + case OP_SCOND: + if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */ + code[GET(code, 1)] != OP_KET) /* More than one branch */ + return code; + code += GET(code, 1) + 1 + LINK_SIZE; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1] + PRIV(OP_lengths)[*code]; + break; + + default: + return code; + } + } +/* Control never reaches here */ +} + + + +#ifdef SUPPORT_UNICODE +/************************************************* +* Get othercase range * +*************************************************/ + +/* This function is passed the start and end of a class range in UCP mode. For +single characters the range may be just one character long. The function +searches up the characters, looking for ranges of characters in the "other" +case. Each call returns the next one, updating the start address. A character +with multiple other cases is returned on its own with a special return value. + +Arguments: + cptr points to starting character value; updated + d end value + ocptr where to put start of othercase range + odptr where to put end of othercase range + restricted TRUE if caseless restriction applies + +Yield: -1 when no more + 0 when a range is returned + >0 the CASESET offset for char with multiple other cases; + for this return, *ocptr contains the original +*/ + +static int +get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, + uint32_t *odptr, BOOL restricted) +{ +uint32_t c, othercase, next; +unsigned int co; + +/* Find the first character that has an other case. If it has multiple other +cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the +multi-case entries that begin with ASCII values. In 32-bit mode, a value +greater than the Unicode maximum ends the range. */ + +for (c = *cptr; c <= d; c++) + { +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) return -1; +#endif + if ((co = UCD_CASESET(c)) != 0 && + (!restricted || PRIV(ucd_caseless_sets)[co] > 127)) + { + *ocptr = c++; /* Character that has the set */ + *cptr = c; /* Rest of input range */ + return (int)co; + } + + /* This is not a valid multiple-case character. Check that the single other + case is different to the original. We don't need to check "restricted" here + because the non-ASCII characters with multiple cases that include an ASCII + character don't have a different "othercase". */ + + if ((othercase = UCD_OTHERCASE(c)) != c) break; + } + +if (c > d) return -1; /* Reached end of range */ + +/* Found a character that has a single other case. Search for the end of the +range, which is either the end of the input range, or a character that has zero +or more than one other cases. */ + +*ocptr = othercase; +next = othercase + 1; + +for (++c; c <= d; c++) + { + if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; + next++; + } + +*odptr = next - 1; /* End of othercase range */ +*cptr = c; /* Rest of input range */ +return 0; +} +#endif /* SUPPORT_UNICODE */ + + + +/************************************************* +* Add a character or range to a class (internal) * +*************************************************/ + +/* This function packages up the logic of adding a character or range of +characters to a class. The character values in the arguments will be within the +valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is +called only from within the "add to class" group of functions, some of which +are recursive and mutually recursive. The external entry point is +add_to_class(). + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits + xoptions the extra options bits + cb compile data + start start of range character + end end of range character + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start, + uint32_t end) +{ +uint32_t c; +uint32_t classbits_end = (end <= 0xff ? end : 0xff); +unsigned int n8 = 0; + +/* If caseless matching is required, scan the range and process alternate +cases. In Unicode, there are 8-bit characters that have alternate cases that +are greater than 255 and vice-versa (though these may be ignored if caseless +restriction is in force). Sometimes we can just extend the original range. */ + +if ((options & PCRE2_CASELESS) != 0) + { +#ifdef SUPPORT_UNICODE + if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0) + { + int rc; + uint32_t oc, od; + + options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ + c = start; + + while ((rc = get_othercase_range(&c, end, &oc, &od, + (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0) + { + /* Handle a single character that has more than one other case. */ + + if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, + options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc); + + /* Do nothing if the other case range is within the original range. */ + + else if (oc >= cb->class_range_start && od <= cb->class_range_end) + continue; + + /* Extend the original range if there is overlap, noting that if oc < c, + we can't have od > end because a subrange is always shorter than the + basic range. Otherwise, use a recursive call to add the additional range. + */ + + else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ + else if (od > end && oc <= end + 1) + { + end = od; /* Extend upwards */ + if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); + } + else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, + cb, oc, od); + } + } + else +#else + (void)xoptions; /* Avoid compiler warning */ +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + + for (c = start; c <= classbits_end; c++) + { + SETBIT(classbits, cb->fcc[c]); + n8++; + } + } + +/* Now handle the originally supplied range. Adjust the final value according +to the bit length - this means that the same lists of (e.g.) horizontal spaces +can be used in all cases. */ + +if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) + end = MAX_NON_UTF_CHAR; + +if (start > cb->class_range_start && end < cb->class_range_end) return n8; + +/* Use the bitmap for characters < 256. Otherwise use extra data.*/ + +for (c = start; c <= classbits_end; c++) + { + /* Regardless of start, c will always be <= 255. */ + SETBIT(classbits, c); + n8++; + } + +#ifdef SUPPORT_WIDE_CHARS +if (start <= 0xff) start = 0xff + 1; + +if (end >= start) + { + PCRE2_UCHAR *uchardata = *uchardptr; + +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UTF) != 0) + { + if (start < end) + { + *uchardata++ = XCL_RANGE; + uchardata += PRIV(ord2utf)(start, uchardata); + uchardata += PRIV(ord2utf)(end, uchardata); + } + else if (start == end) + { + *uchardata++ = XCL_SINGLE; + uchardata += PRIV(ord2utf)(start, uchardata); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Without UTF support, character values are constrained by the bit length, + and can only be > 256 for 16-bit and 32-bit libraries. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 + {} +#else + if (start < end) + { + *uchardata++ = XCL_RANGE; + *uchardata++ = start; + *uchardata++ = end; + } + else if (start == end) + { + *uchardata++ = XCL_SINGLE; + *uchardata++ = start; + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + *uchardptr = uchardata; /* Updata extra data pointer */ + } +#else /* SUPPORT_WIDE_CHARS */ + (void)uchardptr; /* Avoid compiler warning */ +#endif /* SUPPORT_WIDE_CHARS */ + +return n8; /* Number of 8-bit characters */ +} + + + +#ifdef SUPPORT_UNICODE +/************************************************* +* Add a list of characters to a class (internal) * +*************************************************/ + +/* This function is used for adding a list of case-equivalent characters to a +class when in UTF mode. This function is called only from within +add_to_class_internal(), with which it is mutually recursive. + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of + case-equivalent characters to avoid including the one we + already know about + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p, + unsigned int except) +{ +unsigned int n8 = 0; +while (p[0] < NOTACHAR) + { + unsigned int n = 0; + if (p[0] != except) + { + while(p[n+1] == p[0] + n + 1) n++; + n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + p[0], p[n]); + } + p += n + 1; + } +return n8; +} +#endif + + + +/************************************************* +* External entry point for add range to class * +*************************************************/ + +/* This function sets the overall range so that the internal functions can try +to avoid duplication when handling case-independence. + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits + xoptions the extra options bits + cb compile data + start start of range character + end end of range character + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, + uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end) +{ +cb->class_range_start = start; +cb->class_range_end = end; +return add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + start, end); +} + + +/************************************************* +* External entry point for add list to class * +*************************************************/ + +/* This function is used for adding a list of horizontal or vertical whitespace +characters to a class. The list must be in order so that ranges of characters +can be detected and handled appropriately. This function sets the overall range +so that the internal functions can try to avoid duplication when handling +case-independence. + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + except character to omit; this is used when adding lists of + case-equivalent characters to avoid including the one we + already know about + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, + uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except) +{ +unsigned int n8 = 0; +while (p[0] < NOTACHAR) + { + unsigned int n = 0; + if (p[0] != except) + { + while(p[n+1] == p[0] + n + 1) n++; + cb->class_range_start = p[0]; + cb->class_range_end = p[n]; + n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb, + p[0], p[n]); + } + p += n + 1; + } +return n8; +} + + + +/************************************************* +* Add characters not in a list to a class * +*************************************************/ + +/* This function is used for adding the complement of a list of horizontal or +vertical whitespace to a class. The list must be in order. + +Arguments: + classbits the bit map for characters < 256 + uchardptr points to the pointer for extra data + options the options bits + xoptions the extra options bits + cb contains pointers to tables etc. + p points to row of 32-bit values, terminated by NOTACHAR + +Returns: the number of < 256 characters added + the pointer to extra data is updated +*/ + +static unsigned int +add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, + uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p) +{ +BOOL utf = (options & PCRE2_UTF) != 0; +unsigned int n8 = 0; +if (p[0] > 0) + n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1); +while (p[0] < NOTACHAR) + { + while (p[1] == p[0] + 1) p++; + n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1, + (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); + p++; + } +return n8; +} + + + +/************************************************* +* Find details of duplicate group names * +*************************************************/ + +/* This is called from compile_branch() when it needs to know the index and +count of duplicates in the names table when processing named backreferences, +either directly, or as conditions. + +Arguments: + name points to the name + length the length of the name + indexptr where to put the index + countptr where to put the count of duplicates + errorcodeptr where to put an error code + cb the compile block + +Returns: TRUE if OK, FALSE if not, error code set +*/ + +static BOOL +find_dupname_details(PCRE2_SPTR name, uint32_t length, int *indexptr, + int *countptr, int *errorcodeptr, compile_block *cb) +{ +uint32_t i, groupnumber; +int count; +PCRE2_UCHAR *slot = cb->name_table; + +/* Find the first entry in the table */ + +for (i = 0; i < cb->names_found; i++) + { + if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) == 0 && + slot[IMM2_SIZE+length] == 0) break; + slot += cb->name_entry_size; + } + +/* This should not occur, because this function is called only when we know we +have duplicate names. Give an internal error. */ + +if (i >= cb->names_found) + { + *errorcodeptr = ERR53; + cb->erroroffset = name - cb->start_pattern; + return FALSE; + } + +/* Record the index and then see how many duplicates there are, updating the +backref map and maximum back reference as we do. */ + +*indexptr = i; +count = 0; + +for (;;) + { + count++; + groupnumber = GET2(slot,0); + cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1; + if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; + if (++i >= cb->names_found) break; + slot += cb->name_entry_size; + if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) != 0 || + (slot+IMM2_SIZE)[length] != 0) break; + } + +*countptr = count; +return TRUE; +} + + + +/************************************************* +* Compile one branch * +*************************************************/ + +/* Scan the parsed pattern, compiling it into the a vector of PCRE2_UCHAR. If +the options are changed during the branch, the pointer is used to change the +external options bits. This function is used during the pre-compile phase when +we are trying to find out the amount of memory needed, as well as during the +real compile phase. The value of lengthptr distinguishes the two phases. + +Arguments: + optionsptr pointer to the option bits + xoptionsptr pointer to the extra option bits + codeptr points to the pointer to the current code point + pptrptr points to the current parsed pattern pointer + errorcodeptr points to error code variable + firstcuptr place to put the first required code unit + firstcuflagsptr place to put the first code unit flags + reqcuptr place to put the last required code unit + reqcuflagsptr place to put the last required code unit flags + bcptr points to current branch chain + open_caps points to current capitem + cb contains pointers to tables etc. + lengthptr NULL during the real compile phase + points to length accumulator during pre-compile phase + +Returns: 0 There's been an error, *errorcodeptr is non-zero + +1 Success, this branch must match at least one character + -1 Success, this branch may match an empty string +*/ + +static int +compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr, + PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, + uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, + uint32_t *reqcuflagsptr, branch_chain *bcptr, open_capitem *open_caps, + compile_block *cb, PCRE2_SIZE *lengthptr) +{ +int bravalue = 0; +int okreturn = -1; +int group_return = 0; +uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */ +uint32_t greedy_default, greedy_non_default; +uint32_t repeat_type, op_type; +uint32_t options = *optionsptr; /* May change dynamically */ +uint32_t xoptions = *xoptionsptr; /* May change dynamically */ +uint32_t firstcu, reqcu; +uint32_t zeroreqcu, zerofirstcu; +uint32_t escape; +uint32_t *pptr = *pptrptr; +uint32_t meta, meta_arg; +uint32_t firstcuflags, reqcuflags; +uint32_t zeroreqcuflags, zerofirstcuflags; +uint32_t req_caseopt, reqvary, tempreqvary; +PCRE2_SIZE offset = 0; +PCRE2_SIZE length_prevgroup = 0; +PCRE2_UCHAR *code = *codeptr; +PCRE2_UCHAR *last_code = code; +PCRE2_UCHAR *orig_code = code; +PCRE2_UCHAR *tempcode; +PCRE2_UCHAR *previous = NULL; +PCRE2_UCHAR op_previous; +BOOL groupsetfirstcu = FALSE; +BOOL had_accept = FALSE; +BOOL matched_char = FALSE; +BOOL previous_matched_char = FALSE; +BOOL reset_caseful = FALSE; +const uint8_t *cbits = cb->cbits; +uint8_t classbits[32]; + +/* We can fish out the UTF setting once and for all into a BOOL, but we must +not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically +as we process the pattern. */ + +#ifdef SUPPORT_UNICODE +BOOL utf = (options & PCRE2_UTF) != 0; +BOOL ucp = (options & PCRE2_UCP) != 0; +#else /* No Unicode support */ +BOOL utf = FALSE; +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). We define +class_uchardata always so that it can be passed to add_to_class() always, +though it will not be used in non-UTF 8-bit cases. This avoids having to supply +alternative calls for the different cases. */ + +PCRE2_UCHAR *class_uchardata; +#ifdef SUPPORT_WIDE_CHARS +BOOL xclass; +PCRE2_UCHAR *class_uchardata_base; +#endif + +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE2_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + +/* Initialize no first unit, no required unit. REQ_UNSET means "no char +matching encountered yet". It gets changed to REQ_NONE if we hit something that +matches a non-fixed first unit; reqcu just remains unset if we never find one. + +When we hit a repeat whose minimum is zero, we may have to adjust these values +to take the zero repeat into account. This is implemented by setting them to +zerofirstcu and zeroreqcu when such a repeat is encountered. The individual +item types that can be repeated set these backoff variables appropriately. */ + +firstcu = reqcu = zerofirstcu = zeroreqcu = 0; +firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; + +/* The variable req_caseopt contains either the REQ_CASELESS bit or zero, +according to the current setting of the caseless flag. The REQ_CASELESS value +leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables +to record the case status of the value. This is used only for ASCII characters. +*/ + +req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; + +/* Switch on next META item until the end of the branch */ + +for (;; pptr++) + { +#ifdef SUPPORT_WIDE_CHARS + BOOL xclass_has_prop; +#endif + BOOL negate_class; + BOOL should_flip_negation; + BOOL match_all_or_no_wide_chars; + BOOL possessive_quantifier; + BOOL note_group_empty; + int class_has_8bitchar; + uint32_t mclength; + uint32_t skipunits; + uint32_t subreqcu, subfirstcu; + uint32_t groupnumber; + uint32_t verbarglen, verbculen; + uint32_t subreqcuflags, subfirstcuflags; + open_capitem *oc; + PCRE2_UCHAR mcbuffer[8]; + + /* Get next META item in the pattern and its potential argument. */ + + meta = META_CODE(*pptr); + meta_arg = META_DATA(*pptr); + + /* If we are in the pre-compile phase, accumulate the length used for the + previous cycle of this loop, unless the next item is a quantifier. */ + + if (lengthptr != NULL) + { + if (code > cb->start_workspace + cb->workspace_size - + WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ + { + *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)? + ERR52 : ERR86; + return 0; + } + + /* There is at least one situation where code goes backwards: this is the + case of a zero quantifier after a class (e.g. [ab]{0}). When the quantifier + is processed, the whole class is eliminated. However, it is created first, + so we have to allow memory for it. Therefore, don't ever reduce the length + at this point. */ + + if (code < last_code) code = last_code; + + /* If the next thing is not a quantifier, we add the length of the previous + item into the total, and reset the code pointer to the start of the + workspace. Otherwise leave the previous item available to be quantified. */ + + if (meta < META_ASTERISK || meta > META_MINMAX_QUERY) + { + if (OFLOW_MAX - *lengthptr < (PCRE2_SIZE)(code - orig_code)) + { + *errorcodeptr = ERR20; /* Integer overflow */ + return 0; + } + *lengthptr += (PCRE2_SIZE)(code - orig_code); + if (*lengthptr > MAX_PATTERN_SIZE) + { + *errorcodeptr = ERR20; /* Pattern is too large */ + return 0; + } + code = orig_code; + } + + /* Remember where this code item starts so we can catch the "backwards" + case above next time round. */ + + last_code = code; + } + + /* Process the next parsed pattern item. If it is not a quantifier, remember + where it starts so that it can be quantified when a quantifier follows. + Checking for the legality of quantifiers happens in parse_regex(), except for + a quantifier after an assertion that is a condition. */ + + if (meta < META_ASTERISK || meta > META_MINMAX_QUERY) + { + previous = code; + if (matched_char && !had_accept) okreturn = 1; + } + + previous_matched_char = matched_char; + matched_char = FALSE; + note_group_empty = FALSE; + skipunits = 0; /* Default value for most subgroups */ + + switch(meta) + { + /* ===================================================================*/ + /* The branch terminates at pattern end or | or ) */ + + case META_END: + case META_ALT: + case META_KET: + *firstcuptr = firstcu; + *firstcuflagsptr = firstcuflags; + *reqcuptr = reqcu; + *reqcuflagsptr = reqcuflags; + *codeptr = code; + *pptrptr = pptr; + return okreturn; + + + /* ===================================================================*/ + /* Handle single-character metacharacters. In multiline mode, ^ disables + the setting of any following char as a first character. */ + + case META_CIRCUMFLEX: + if ((options & PCRE2_MULTILINE) != 0) + { + if (firstcuflags == REQ_UNSET) + zerofirstcuflags = firstcuflags = REQ_NONE; + *code++ = OP_CIRCM; + } + else *code++ = OP_CIRC; + break; + + case META_DOLLAR: + *code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; + break; + + /* There can never be a first char if '.' is first, whatever happens about + repeats. The value of reqcu doesn't change either. */ + + case META_DOT: + matched_char = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + *code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY; + break; + + + /* ===================================================================*/ + /* Empty character classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. + Otherwise, an initial ']' is taken as a data character. When empty classes + are allowed, [] must always fail, so generate OP_FAIL, whereas [^] must + match any character, so generate OP_ALLANY. */ + + case META_CLASS_EMPTY: + case META_CLASS_EMPTY_NOT: + matched_char = TRUE; + *code++ = (meta == META_CLASS_EMPTY_NOT)? OP_ALLANY : OP_FAIL; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + break; + + + /* ===================================================================*/ + /* Non-empty character class. If the included characters are all < 256, we + build a 32-byte bitmap of the permitted characters, except in the special + case where there is only one such character. For negated classes, we build + the map as usual, then invert it at the end. However, we use a different + opcode so that data characters > 255 can be handled correctly. + + If the class contains characters outside the 0-255 range, a different + opcode is compiled. It may optionally have a bit map for characters < 256, + but those above are explicitly listed afterwards. A flag code unit tells + whether the bitmap is present, and whether this is a negated class or + not. */ + + case META_CLASS_NOT: + case META_CLASS: + matched_char = TRUE; + negate_class = meta == META_CLASS_NOT; + + /* We can optimize the case of a single character in a class by generating + OP_CHAR or OP_CHARI if it's positive, or OP_NOT or OP_NOTI if it's + negative. In the negative case there can be no first char if this item is + first, whatever repeat count may follow. In the case of reqcu, save the + previous value for reinstating. */ + + /* NOTE: at present this optimization is not effective if the only + character in a class in 32-bit, non-UCP mode has its top bit set. */ + + if (pptr[1] < META_END && pptr[2] == META_CLASS_END) + { +#ifdef SUPPORT_UNICODE + uint32_t d; +#endif + uint32_t c = pptr[1]; + + pptr += 2; /* Move on to class end */ + if (meta == META_CLASS) /* A positive one-char class can be */ + { /* handled as a normal literal character. */ + meta = c; /* Set up the character */ + goto NORMAL_CHAR_SET; + } + + /* Handle a negative one-character class */ + + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + + /* For caseless UTF or UCP mode, check whether this character has more + than one other case. If so, generate a special OP_NOTPROP item instead of + OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any + caseless set that starts with an ASCII character. */ + +#ifdef SUPPORT_UNICODE + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 && + (d = UCD_CASESET(c)) != 0 && + ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || + PRIV(ucd_caseless_sets)[d] > 127)) + { + *code++ = OP_NOTPROP; + *code++ = PT_CLIST; + *code++ = d; + break; /* We are finished with this class */ + } +#endif + /* Char has only one other (usable) case, or UCP not available */ + + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; + code += PUTCHAR(c, code); + break; /* We are finished with this class */ + } /* End of 1-char optimization */ + + /* Handle character classes that contain more than just one literal + character. If there are exactly two characters in a positive class, see if + they are case partners. This can be optimized to generate a caseless single + character match (which also sets first/required code units if relevant). + When casing restrictions apply, ignore a caseless set if both characters + are ASCII. */ + + if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END && + pptr[3] == META_CLASS_END) + { + uint32_t c = pptr[1]; + +#ifdef SUPPORT_UNICODE + if (UCD_CASESET(c) == 0 || + ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 && + c < 128 && pptr[2] < 128)) +#endif + { + uint32_t d; + +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else +#endif + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) d = c; else +#endif + d = TABLE_GET(c, cb->fcc, c); + } + + if (c != d && pptr[2] == d) + { + pptr += 3; /* Move on to class end */ + meta = c; + if ((options & PCRE2_CASELESS) == 0) + { + reset_caseful = TRUE; + options |= PCRE2_CASELESS; + req_caseopt = REQ_CASELESS; + } + goto CLASS_CASELESS_CHAR; + } + } + } + + /* If a non-extended class contains a negative special such as \S, we need + to flip the negation flag at the end, so that support for characters > 255 + works correctly (they are all included in the class). An extended class may + need to insert specific matching or non-matching code for wide characters. + */ + + should_flip_negation = match_all_or_no_wide_chars = FALSE; + + /* Extended class (xclass) will be used when characters > 255 + might match. */ + +#ifdef SUPPORT_WIDE_CHARS + xclass = FALSE; + class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ + class_uchardata_base = class_uchardata; /* Save the start */ +#endif + + /* For optimization purposes, we track some properties of the class: + class_has_8bitchar will be non-zero if the class contains at least one + character with a code point less than 256; xclass_has_prop will be TRUE if + Unicode property checks are present in the class. */ + + class_has_8bitchar = 0; +#ifdef SUPPORT_WIDE_CHARS + xclass_has_prop = FALSE; +#endif + + /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map + in a temporary bit of memory, in case the class contains fewer than two + 8-bit characters because in that case the compiled code doesn't use the bit + map. */ + + memset(classbits, 0, 32 * sizeof(uint8_t)); + + /* Process items until META_CLASS_END is reached. */ + + while ((meta = *(++pptr)) != META_CLASS_END) + { + /* Handle POSIX classes such as [:alpha:] etc. */ + + if (meta == META_POSIX || meta == META_POSIX_NEG) + { + BOOL local_negate = (meta == META_POSIX_NEG); + int posix_class = *(++pptr); + int taboffset, tabopt; + uint8_t pbits[32]; + + should_flip_negation = local_negate; /* Note negative special */ + + /* If matching is caseless, upper and lower are converted to alpha. + This relies on the fact that the class table starts with alpha, + lower, upper as the first 3 entries. */ + + if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + + /* When PCRE2_UCP is set, some of the POSIX classes are converted to + different escape sequences that use Unicode properties \p or \P. + Others that are not available via \p or \P have to generate + XCL_PROP/XCL_NOTPROP directly, which is done here. */ + +#ifdef SUPPORT_UNICODE + if ((options & PCRE2_UCP) != 0 && + (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0) + { + switch(posix_class) + { + case PC_GRAPH: + case PC_PRINT: + case PC_PUNCT: + *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; + *class_uchardata++ = (PCRE2_UCHAR) + ((posix_class == PC_GRAPH)? PT_PXGRAPH : + (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT); + *class_uchardata++ = 0; + xclass_has_prop = TRUE; + goto CONTINUE_CLASS; + + /* For the other POSIX classes (ex: ascii) we are going to + fall through to the non-UCP case and build a bit map for + characters with code points less than 256. However, if we are in + a negated POSIX class, characters with code points greater than + 255 must either all match or all not match, depending on whether + the whole class is not or is negated. For example, for + [[:^ascii:]... they must all match, whereas for [^[:^ascii:]... + they must not. + + In the special case where there are no xclass items, this is + automatically handled by the use of OP_CLASS or OP_NCLASS, but an + explicit range is needed for OP_XCLASS. Setting a flag here + causes the range to be generated later when it is known that + OP_XCLASS is required. In the 8-bit library this is relevant only in + utf mode, since no wide characters can exist otherwise. */ + + default: +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) +#endif + match_all_or_no_wide_chars |= local_negate; + break; + } + } +#endif /* SUPPORT_UNICODE */ + + /* In the non-UCP case, or when UCP makes no difference, we build the + bit map for the POSIX class in a chunk of local store because we may + be adding and subtracting from it, and we don't want to subtract bits + that may be in the main map already. At the end we or the result into + the bit map that is being built. */ + + posix_class *= 3; + + /* Copy in the first table (always present) */ + + memcpy(pbits, cbits + posix_class_maps[posix_class], + 32 * sizeof(uint8_t)); + + /* If there is a second table, add or remove it as required. */ + + taboffset = posix_class_maps[posix_class + 1]; + tabopt = posix_class_maps[posix_class + 2]; + + if (taboffset >= 0) + { + if (tabopt >= 0) + for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; + else + for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; + } + + /* Now see if we need to remove any special characters. An option + value of 1 removes vertical space and 2 removes underscore. */ + + if (tabopt < 0) tabopt = -tabopt; + if (tabopt == 1) pbits[1] &= ~0x3c; + else if (tabopt == 2) pbits[11] &= 0x7f; + + /* Add the POSIX table or its complement into the main table that is + being built and we are done. */ + + if (local_negate) + for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]); + else + for (int i = 0; i < 32; i++) classbits[i] |= pbits[i]; + + /* Every class contains at least one < 256 character. */ + + class_has_8bitchar = 1; + goto CONTINUE_CLASS; /* End of POSIX handling */ + } + + /* Other than POSIX classes, the only items we should encounter are + \d-type escapes and literal characters (possibly as ranges). */ + + if (meta == META_BIGVALUE) + { + meta = *(++pptr); + goto CLASS_LITERAL; + } + + /* Any other non-literal must be an escape */ + + if (meta >= META_END) + { + if (META_CODE(meta) != META_ESCAPE) + { +#ifdef DEBUG_SHOW_PARSED + fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x " + "in character class\n", meta); +#endif + *errorcodeptr = ERR89; /* Internal error - unrecognized. */ + return 0; + } + escape = META_DATA(meta); + + /* Every class contains at least one < 256 character. */ + + class_has_8bitchar++; + + switch(escape) + { + case ESC_d: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + break; + + case ESC_D: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); + break; + + case ESC_w: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + break; + + case ESC_W: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); + break; + + /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl + 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was + previously set by something earlier in the character class. + Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so + we could just adjust the appropriate bit. From PCRE 8.34 we no + longer treat \s and \S specially. */ + + case ESC_s: + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + break; + + case ESC_S: + should_flip_negation = TRUE; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); + break; + + /* When adding the horizontal or vertical space lists to a class, or + their complements, disable PCRE2_CASELESS, because it justs wastes + time, and in the "not-x" UTF cases can create unwanted duplicates in + the XCLASS list (provoked by characters that have more than one other + case and by both cases being in the same "not-x" sublist). */ + + case ESC_h: + (void)add_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list), + NOTACHAR); + break; + + case ESC_H: + (void)add_not_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list)); + break; + + case ESC_v: + (void)add_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list), + NOTACHAR); + break; + + case ESC_V: + (void)add_not_list_to_class(classbits, &class_uchardata, + options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list)); + break; + + /* If Unicode is not supported, \P and \p are not allowed and are + faulted at parse time, so will never appear here. */ + +#ifdef SUPPORT_UNICODE + case ESC_p: + case ESC_P: + { + uint32_t ptype = *(++pptr) >> 16; + uint32_t pdata = *pptr & 0xffff; + *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP; + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; + xclass_has_prop = TRUE; + class_has_8bitchar--; /* Undo! */ + } + break; +#endif + } + + goto CONTINUE_CLASS; + } /* End handling \d-type escapes */ + + /* A literal character may be followed by a range meta. At parse time + there are checks for out-of-order characters, for ranges where the two + characters are equal, and for hyphens that cannot indicate a range. At + this point, therefore, no checking is needed. */ + + else + { + uint32_t c, d; + + CLASS_LITERAL: + c = d = meta; + + /* Remember if \r or \n were explicitly used */ + + if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* Process a character range */ + + if (pptr[1] == META_RANGE_LITERAL || pptr[1] == META_RANGE_ESCAPED) + { +#ifdef EBCDIC + BOOL range_is_literal = (pptr[1] == META_RANGE_LITERAL); +#endif + pptr += 2; + d = *pptr; + if (d == META_BIGVALUE) d = *(++pptr); + + /* Remember an explicit \r or \n, and add the range to the class. */ + + if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; + + /* In an EBCDIC environment, Perl treats alphabetic ranges specially + because there are holes in the encoding, and simply using the range + A-Z (for example) would include the characters in the holes. This + applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ + +#ifdef EBCDIC + if (range_is_literal && + (cb->ctypes[c] & ctype_letter) != 0 && + (cb->ctypes[d] & ctype_letter) != 0 && + (c <= CHAR_z) == (d <= CHAR_z)) + { + uint32_t uc = (d <= CHAR_z)? 0 : 64; + uint32_t C = c - uc; + uint32_t D = d - uc; + + if (C <= CHAR_i) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc); + C = CHAR_j; + } + + if (C <= D && C <= CHAR_r) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc); + C = CHAR_s; + } + + if (C <= D) + { + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, xoptions, + cb, C + uc, D + uc); + } + } + else +#endif + /* Not an EBCDIC special range */ + + class_has_8bitchar += add_to_class(classbits, &class_uchardata, + options, xoptions, cb, c, d); + goto CONTINUE_CLASS; /* Go get the next char in the class */ + } /* End of range handling */ + + + /* Handle a single character. */ + + class_has_8bitchar += + add_to_class(classbits, &class_uchardata, options, xoptions, cb, + meta, meta); + } + + /* Continue to the next item in the class. */ + + CONTINUE_CLASS: + +#ifdef SUPPORT_WIDE_CHARS + /* If any wide characters or Unicode properties have been encountered, + set xclass = TRUE. Then, in the pre-compile phase, accumulate the length + of the extra data and reset the pointer. This is so that very large + classes that contain a zillion wide characters or Unicode property tests + do not overwrite the workspace (which is on the stack). */ + + if (class_uchardata > class_uchardata_base) + { + xclass = TRUE; + if (lengthptr != NULL) + { + *lengthptr += class_uchardata - class_uchardata_base; + class_uchardata = class_uchardata_base; + } + } +#endif + + continue; /* Needed to avoid error when not supporting wide chars */ + } /* End of main class-processing loop */ + + /* If this class is the first thing in the branch, there can be no first + char setting, whatever the repeat count. Any reqcu setting must remain + unchanged after any kind of repeat. */ + + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If there are characters with values > 255, or Unicode property settings + (\p or \P), we have to compile an extended class, with its own opcode, + unless there were no property settings and there was a negated special such + as \S in the class, and PCRE2_UCP is not set, because in that case all + characters > 255 are in or not in the class, so any that were explicitly + given as well can be ignored. + + In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were + were present in a class, we either have to match or not match all wide + characters (depending on whether the whole class is or is not negated). + This requirement is indicated by match_all_or_no_wide_chars being true. + We do this by including an explicit range, which works in both cases. + This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there + cannot be any wide characters in 8-bit non-UTF mode. + + When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit + class where \S etc is present without PCRE2_UCP, causing an extended class + to be compiled, we make sure that all characters > 255 are included by + forcing match_all_or_no_wide_chars to be true. + + If, when generating an xclass, there are no characters < 256, we can omit + the bitmap in the actual compiled code. */ + +#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */ + if (xclass && ( +#ifdef SUPPORT_UNICODE + (options & PCRE2_UCP) != 0 || +#endif + xclass_has_prop || !should_flip_negation)) + { + if (match_all_or_no_wide_chars || ( +#if PCRE2_CODE_UNIT_WIDTH == 8 + utf && +#endif + should_flip_negation && !negate_class && (options & PCRE2_UCP) == 0)) + { + *class_uchardata++ = XCL_RANGE; + if (utf) /* Will always be utf in the 8-bit library */ + { + class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); + class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata); + } + else /* Can only happen for the 16-bit & 32-bit libraries */ + { +#if PCRE2_CODE_UNIT_WIDTH == 16 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffu; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffffffu; +#endif + } + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT:0; + if (xclass_has_prop) *code |= XCL_HASPROP; + + /* If the map is required, move up the extra data to make room for it; + otherwise just move the code pointer to the end of the extra data. */ + + if (class_has_8bitchar > 0) + { + *code++ |= XCL_MAP; + (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + CU2BYTES(class_uchardata - code)); + if (negate_class && !xclass_has_prop) + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + } + memcpy(code, classbits, 32); + code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); + } + else code = class_uchardata; + + /* Now fill in the complete length of the item */ + + PUT(previous, 1, (int)(code - previous)); + break; /* End of class handling */ + } +#endif /* SUPPORT_WIDE_CHARS */ + + /* If there are no characters > 255, or they are all to be included or + excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the + whole class was negated and whether there were negative specials such as \S + (non-UCP) in the class. Then copy the 32-byte map into the code vector, + negating it if necessary. */ + + *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; + if (lengthptr == NULL) /* Save time in the pre-compile phase */ + { + if (negate_class) + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + } + memcpy(code, classbits, 32); + } + code += 32 / sizeof(PCRE2_UCHAR); + break; /* End of class processing */ + + + /* ===================================================================*/ + /* Deal with (*VERB)s. */ + + /* Check for open captures before ACCEPT and close those that are within + the same assertion level, also converting ACCEPT to ASSERT_ACCEPT in an + assertion. In the first pass, just accumulate the length required; + otherwise hitting (*ACCEPT) inside many nested parentheses can cause + workspace overflow. Do not set firstcu after *ACCEPT. */ + + case META_ACCEPT: + cb->had_accept = had_accept = TRUE; + for (oc = open_caps; + oc != NULL && oc->assert_depth >= cb->assert_depth; + oc = oc->next) + { + if (lengthptr != NULL) + { + *lengthptr += CU2BYTES(1) + IMM2_SIZE; + } + else + { + *code++ = OP_CLOSE; + PUT2INC(code, 0, oc->number); + } + } + *code++ = (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + break; + + case META_PRUNE: + case META_SKIP: + cb->had_pruneorskip = TRUE; + /* Fall through */ + case META_COMMIT: + case META_FAIL: + *code++ = verbops[(meta - META_MARK) >> 16]; + break; + + case META_THEN: + cb->external_flags |= PCRE2_HASTHEN; + *code++ = OP_THEN; + break; + + /* Handle verbs with arguments. Arguments can be very long, especially in + 16- and 32-bit modes, and can overflow the workspace in the first pass. + However, the argument length is constrained to be small enough to fit in + one code unit. This check happens in parse_regex(). In the first pass, + instead of putting the argument into memory, we just update the length + counter and set up an empty argument. */ + + case META_THEN_ARG: + cb->external_flags |= PCRE2_HASTHEN; + goto VERB_ARG; + + case META_PRUNE_ARG: + case META_SKIP_ARG: + cb->had_pruneorskip = TRUE; + /* Fall through */ + case META_MARK: + case META_COMMIT_ARG: + VERB_ARG: + *code++ = verbops[(meta - META_MARK) >> 16]; + /* The length is in characters. */ + verbarglen = *(++pptr); + verbculen = 0; + tempcode = code++; + for (int i = 0; i < (int)verbarglen; i++) + { + meta = *(++pptr); +#ifdef SUPPORT_UNICODE + if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else +#endif + { + mclength = 1; + mcbuffer[0] = meta; + } + if (lengthptr != NULL) *lengthptr += mclength; else + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + verbculen += mclength; + } + } + + *tempcode = verbculen; /* Fill in the code unit length */ + *code++ = 0; /* Terminating zero */ + break; + + + /* ===================================================================*/ + /* Handle options change. The new setting must be passed back for use in + subsequent branches. Reset the greedy defaults and the case value for + firstcu and reqcu. */ + + case META_OPTIONS: + *optionsptr = options = *(++pptr); + *xoptionsptr = xoptions = *(++pptr); + greedy_default = ((options & PCRE2_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; + break; + + + /* ===================================================================*/ + /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous + because it could be a numerical check on recursion, or a name check on a + group's being set. The pre-pass sets up META_COND_RNUMBER as a name so that + we can handle it either way. We first try for a name; if not found, process + the number. */ + + case META_COND_RNUMBER: /* (?(Rdigits) */ + case META_COND_NAME: /* (?(name) or (?'name') or ?() */ + case META_COND_RNAME: /* (?(R&name) - test for recursion */ + bravalue = OP_COND; + { + int count, index; + unsigned int i; + PCRE2_SPTR name; + named_group *ng = cb->named_groups; + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + + /* In the first pass, the names generated in the pre-pass are available, + but the main name table has not yet been created. Scan the list of names + generated in the pre-pass in order to get a number and whether or not + this name is duplicated. If it is not duplicated, we can handle it as a + numerical group. */ + + for (i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && + PRIV(strncmp)(name, ng->name, length) == 0) + { + if (!ng->isdup) + { + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + PUT2(code, 2+LINK_SIZE, ng->number); + if (ng->number > cb->top_backref) cb->top_backref = ng->number; + skipunits = 1+IMM2_SIZE; + goto GROUP_PROCESS_NOTE_EMPTY; + } + break; /* Found a duplicated name */ + } + } + + /* If the name was not found we have a bad reference, unless we are + dealing with R, which is treated as a recursion test by number. + */ + + if (i >= cb->names_found) + { + groupnumber = 0; + if (meta == META_COND_RNUMBER) + { + for (i = 1; i < length; i++) + { + groupnumber = groupnumber * 10 + name[i] - CHAR_0; + if (groupnumber > MAX_GROUP_NUMBER) + { + *errorcodeptr = ERR61; + cb->erroroffset = offset + i; + return 0; + } + } + } + + if (meta != META_COND_RNUMBER || groupnumber > cb->bracount) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + + /* (?Rdigits) treated as a recursion reference by number. A value of + zero (which is the result of both (?R) and (?R0)) means "any", and is + translated into RREF_ANY (which is 0xffff). */ + + if (groupnumber == 0) groupnumber = RREF_ANY; + code[1+LINK_SIZE] = OP_RREF; + PUT2(code, 2+LINK_SIZE, groupnumber); + skipunits = 1+IMM2_SIZE; + goto GROUP_PROCESS_NOTE_EMPTY; + } + + /* A duplicated name was found. Note that if an R name is found + (META_COND_RNUMBER), it is a reference test, not a recursion test. */ + + code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF; + + /* We have a duplicated name. In the compile pass we have to search the + main table in order to get the index and count values. */ + + count = 0; /* Values for first pass (avoids compiler warning) */ + index = 0; + if (lengthptr == NULL && !find_dupname_details(name, length, &index, + &count, errorcodeptr, cb)) return 0; + + /* Add one to the opcode to change CREF/RREF into DNCREF/DNRREF and + insert appropriate data values. */ + + code[1+LINK_SIZE]++; + skipunits = 1+2*IMM2_SIZE; + PUT2(code, 2+LINK_SIZE, index); + PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); + } + goto GROUP_PROCESS_NOTE_EMPTY; + + /* The DEFINE condition is always false. Its internal groups may never + be called, so matched_char must remain false, hence the jump to + GROUP_PROCESS rather than GROUP_PROCESS_NOTE_EMPTY. */ + + case META_COND_DEFINE: + bravalue = OP_COND; + GETPLUSOFFSET(offset, pptr); + code[1+LINK_SIZE] = OP_DEFINE; + skipunits = 1; + goto GROUP_PROCESS; + + /* Conditional test of a group's being set. */ + + case META_COND_NUMBER: + bravalue = OP_COND; + GETPLUSOFFSET(offset, pptr); + groupnumber = *(++pptr); + if (groupnumber > cb->bracount) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + if (groupnumber > cb->top_backref) cb->top_backref = groupnumber; + offset -= 2; /* Point at initial ( for too many branches error */ + code[1+LINK_SIZE] = OP_CREF; + skipunits = 1+IMM2_SIZE; + PUT2(code, 2+LINK_SIZE, groupnumber); + goto GROUP_PROCESS_NOTE_EMPTY; + + /* Test for the PCRE2 version. */ + + case META_COND_VERSION: + bravalue = OP_COND; + if (pptr[1] > 0) + code[1+LINK_SIZE] = ((PCRE2_MAJOR > pptr[2]) || + (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR >= pptr[3]))? + OP_TRUE : OP_FALSE; + else + code[1+LINK_SIZE] = (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR == pptr[3])? + OP_TRUE : OP_FALSE; + skipunits = 1; + pptr += 3; + goto GROUP_PROCESS_NOTE_EMPTY; + + /* The condition is an assertion, possibly preceded by a callout. */ + + case META_COND_ASSERT: + bravalue = OP_COND; + goto GROUP_PROCESS_NOTE_EMPTY; + + + /* ===================================================================*/ + /* Handle all kinds of nested bracketed groups. The non-capturing, + non-conditional cases are here; others come to GROUP_PROCESS via goto. */ + + case META_LOOKAHEAD: + bravalue = OP_ASSERT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_LOOKAHEAD_NA: + bravalue = OP_ASSERT_NA; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird + thing to do, but Perl allows all assertions to be quantified, and when + they contain capturing parentheses there may be a potential use for + this feature. Not that that applies to a quantified (?!) but we allow + it for uniformity. */ + + case META_LOOKAHEADNOT: + if (pptr[1] == META_KET && + (pptr[2] < META_ASTERISK || pptr[2] > META_MINMAX_QUERY)) + { + *code++ = OP_FAIL; + pptr++; + } + else + { + bravalue = OP_ASSERT_NOT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + } + break; + + case META_LOOKBEHIND: + bravalue = OP_ASSERTBACK; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_LOOKBEHINDNOT: + bravalue = OP_ASSERTBACK_NOT; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_LOOKBEHIND_NA: + bravalue = OP_ASSERTBACK_NA; + cb->assert_depth += 1; + goto GROUP_PROCESS; + + case META_ATOMIC: + bravalue = OP_ONCE; + goto GROUP_PROCESS_NOTE_EMPTY; + + case META_SCRIPT_RUN: + bravalue = OP_SCRIPT_RUN; + goto GROUP_PROCESS_NOTE_EMPTY; + + case META_NOCAPTURE: + bravalue = OP_BRA; + /* Fall through */ + + /* Process nested bracketed regex. The nesting depth is maintained for the + benefit of the stackguard function. The test for too deep nesting is now + done in parse_regex(). Assertion and DEFINE groups come to GROUP_PROCESS; + others come to GROUP_PROCESS_NOTE_EMPTY, to indicate that we need to take + note of whether or not they may match an empty string. */ + + GROUP_PROCESS_NOTE_EMPTY: + note_group_empty = TRUE; + + GROUP_PROCESS: + cb->parens_depth += 1; + *code = bravalue; + pptr++; + tempcode = code; + tempreqvary = cb->req_varyopt; /* Save value before group */ + length_prevgroup = 0; /* Initialize for pre-compile phase */ + + if ((group_return = + compile_regex( + options, /* The options state */ + xoptions, /* The extra options state */ + &tempcode, /* Where to put code (updated) */ + &pptr, /* Input pointer (updated) */ + errorcodeptr, /* Where to put an error message */ + skipunits, /* Skip over bracket number */ + &subfirstcu, /* For possible first char */ + &subfirstcuflags, + &subreqcu, /* For possible last char */ + &subreqcuflags, + bcptr, /* Current branch chain */ + open_caps, /* Pointer to capture stack */ + cb, /* Compile data block */ + (lengthptr == NULL)? NULL : /* Actual compile phase */ + &length_prevgroup /* Pre-compile phase */ + )) == 0) + return 0; /* Error */ + + cb->parens_depth -= 1; + + /* If that was a non-conditional significant group (not an assertion, not a + DEFINE) that matches at least one character, then the current item matches + a character. Conditionals are handled below. */ + + if (note_group_empty && bravalue != OP_COND && group_return > 0) + matched_char = TRUE; + + /* If we've just compiled an assertion, pop the assert depth. */ + + if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NA) + cb->assert_depth -= 1; + + /* At the end of compiling, code is still pointing to the start of the + group, while tempcode has been updated to point past the end of the group. + The parsed pattern pointer (pptr) is on the closing META_KET. + + If this is a conditional bracket, check that there are no more than + two branches in the group, or just one if it's a DEFINE group. We do this + in the real compile phase, not in the pre-pass, where the whole group may + not be available. */ + + if (bravalue == OP_COND && lengthptr == NULL) + { + PCRE2_UCHAR *tc = code; + int condcount = 0; + + do { + condcount++; + tc += GET(tc,1); + } + while (*tc != OP_KET); + + /* A DEFINE group is never obeyed inline (the "condition" is always + false). It must have only one branch. Having checked this, change the + opcode to OP_FALSE. */ + + if (code[LINK_SIZE+1] == OP_DEFINE) + { + if (condcount > 1) + { + cb->erroroffset = offset; + *errorcodeptr = ERR54; + return 0; + } + code[LINK_SIZE+1] = OP_FALSE; + bravalue = OP_DEFINE; /* A flag to suppress char handling below */ + } + + /* A "normal" conditional group. If there is just one branch, we must not + make use of its firstcu or reqcu, because this is equivalent to an + empty second branch. Also, it may match an empty string. If there are two + branches, this item must match a character if the group must. */ + + else + { + if (condcount > 2) + { + cb->erroroffset = offset; + *errorcodeptr = ERR27; + return 0; + } + if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE; + else if (group_return > 0) matched_char = TRUE; + } + } + + /* In the pre-compile phase, update the length by the length of the group, + less the brackets at either end. Then reduce the compiled code to just a + set of non-capturing brackets so that it doesn't use much memory if it is + duplicated by a quantifier.*/ + + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; + code++; /* This already contains bravalue */ + PUTINC(code, 0, 1 + LINK_SIZE); + *code++ = OP_KET; + PUTINC(code, 0, 1 + LINK_SIZE); + break; /* No need to waste time with special character handling */ + } + + /* Otherwise update the main code pointer to the end of the group. */ + + code = tempcode; + + /* For a DEFINE group, required and first character settings are not + relevant. */ + + if (bravalue == OP_DEFINE) break; + + /* Handle updating of the required and first code units for other types of + group. Update for normal brackets of all kinds, and conditions with two + branches (see code above). If the bracket is followed by a quantifier with + zero repeat, we have to back off. Hence the definition of zeroreqcu and + zerofirstcu outside the main loop so that they can be accessed for the back + off. */ + + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + groupsetfirstcu = FALSE; + + if (bravalue >= OP_ONCE) /* Not an assertion */ + { + /* If we have not yet set a firstcu in this branch, take it from the + subpattern, remembering that it was set here so that a repeat of more + than one can replicate it as reqcu if necessary. If the subpattern has + no firstcu, set "none" for the whole branch. In both cases, a zero + repeat forces firstcu to "none". */ + + if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) + { + if (subfirstcuflags < REQ_NONE) + { + firstcu = subfirstcu; + firstcuflags = subfirstcuflags; + groupsetfirstcu = TRUE; + } + else firstcuflags = REQ_NONE; + zerofirstcuflags = REQ_NONE; + } + + /* If firstcu was previously set, convert the subpattern's firstcu + into reqcu if there wasn't one, using the vary flag that was in + existence beforehand. */ + + else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE) + { + subreqcu = subfirstcu; + subreqcuflags = subfirstcuflags | tempreqvary; + } + + /* If the subpattern set a required code unit (or set a first code unit + that isn't really the first code unit - see above), set it. */ + + if (subreqcuflags < REQ_NONE) + { + reqcu = subreqcu; + reqcuflags = subreqcuflags; + } + } + + /* For a forward assertion, we take the reqcu, if set, provided that the + group has also set a firstcu. This can be helpful if the pattern that + follows the assertion doesn't set a different char. For example, it's + useful for /(?=abcde).+/. We can't set firstcu for an assertion, however + because it leads to incorrect effect for patterns such as /(?=a)a.+/ when + the "real" "a" would then become a reqcu instead of a firstcu. This is + overcome by a scan at the end if there's no firstcu, looking for an + asserted first char. A similar effect for patterns like /(?=.*X)X$/ means + we must only take the reqcu when the group also set a firstcu. Otherwise, + in that example, 'X' ends up set for both. */ + + else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) && + subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE) + { + reqcu = subreqcu; + reqcuflags = subreqcuflags; + } + + break; /* End of nested group handling */ + + + /* ===================================================================*/ + /* Handle named backreferences and recursions. */ + + case META_BACKREF_BYNAME: + case META_RECURSE_BYNAME: + { + int count, index; + PCRE2_SPTR name; + BOOL is_dupname = FALSE; + named_group *ng = cb->named_groups; + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + + /* In the first pass, the names generated in the pre-pass are available, + but the main name table has not yet been created. Scan the list of names + generated in the pre-pass in order to get a number and whether or not + this name is duplicated. */ + + groupnumber = 0; + for (unsigned int i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && + PRIV(strncmp)(name, ng->name, length) == 0) + { + is_dupname = ng->isdup; + groupnumber = ng->number; + + /* For a recursion, that's all that is needed. We can now go to + the code that handles numerical recursion, applying it to the first + group with the given name. */ + + if (meta == META_RECURSE_BYNAME) + { + meta_arg = groupnumber; + goto HANDLE_NUMERICAL_RECURSION; + } + + /* For a back reference, update the back reference map and the + maximum back reference. */ + + cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1; + if (groupnumber > cb->top_backref) + cb->top_backref = groupnumber; + } + } + + /* If the name was not found we have a bad reference. */ + + if (groupnumber == 0) + { + *errorcodeptr = ERR15; + cb->erroroffset = offset; + return 0; + } + + /* If a back reference name is not duplicated, we can handle it as + a numerical reference. */ + + if (!is_dupname) + { + meta_arg = groupnumber; + goto HANDLE_SINGLE_REFERENCE; + } + + /* If a back reference name is duplicated, we generate a different + opcode to a numerical back reference. In the second pass we must + search for the index and count in the final name table. */ + + count = 0; /* Values for first pass (avoids compiler warning) */ + index = 0; + if (lengthptr == NULL && !find_dupname_details(name, length, &index, + &count, errorcodeptr, cb)) return 0; + + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; + PUT2INC(code, 0, index); + PUT2INC(code, 0, count); + } + break; + + + /* ===================================================================*/ + /* Handle a numerical callout. */ + + case META_CALLOUT_NUMBER: + code[0] = OP_CALLOUT; + PUT(code, 1, pptr[1]); /* Offset to next pattern item */ + PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */ + code[1 + 2*LINK_SIZE] = pptr[3]; + pptr += 3; + code += PRIV(OP_lengths)[OP_CALLOUT]; + break; + + + /* ===================================================================*/ + /* Handle a callout with a string argument. In the pre-pass we just compute + the length without generating anything. The length in pptr[3] includes both + delimiters; in the actual compile only the first one is copied, but a + terminating zero is added. Any doubled delimiters within the string make + this an overestimate, but it is not worth bothering about. */ + + case META_CALLOUT_STRING: + if (lengthptr != NULL) + { + *lengthptr += pptr[3] + (1 + 4*LINK_SIZE); + pptr += 3; + SKIPOFFSET(pptr); + } + + /* In the real compile we can copy the string. The starting delimiter is + included so that the client can discover it if they want. We also pass the + start offset to help a script language give better error messages. */ + + else + { + PCRE2_SPTR pp; + uint32_t delimiter; + uint32_t length = pptr[3]; + PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE); + + code[0] = OP_CALLOUT_STR; + PUT(code, 1, pptr[1]); /* Offset to next pattern item */ + PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */ + + pptr += 3; + GETPLUSOFFSET(offset, pptr); /* Offset to string in pattern */ + pp = cb->start_pattern + offset; + delimiter = *callout_string++ = *pp++; + if (delimiter == CHAR_LEFT_CURLY_BRACKET) + delimiter = CHAR_RIGHT_CURLY_BRACKET; + PUT(code, 1 + 3*LINK_SIZE, (int)(offset + 1)); /* One after delimiter */ + + /* The syntax of the pattern was checked in the parsing scan. The length + includes both delimiters, but we have passed the opening one just above, + so we reduce length before testing it. The test is for > 1 because we do + not want to copy the final delimiter. This also ensures that pp[1] is + accessible. */ + + while (--length > 1) + { + if (*pp == delimiter && pp[1] == delimiter) + { + *callout_string++ = delimiter; + pp += 2; + length--; + } + else *callout_string++ = *pp++; + } + *callout_string++ = CHAR_NUL; + + /* Set the length of the entire item, the advance to its end. */ + + PUT(code, 1 + 2*LINK_SIZE, (int)(callout_string - code)); + code = callout_string; + } + break; + + + /* ===================================================================*/ + /* Handle repetition. The different types are all sorted out in the parsing + pass. */ + + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + case META_MINMAX: + repeat_min = *(++pptr); + repeat_max = *(++pptr); + goto REPEAT; + + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + repeat_min = 0; + repeat_max = REPEAT_UNLIMITED; + goto REPEAT; + + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + repeat_min = 1; + repeat_max = REPEAT_UNLIMITED; + goto REPEAT; + + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + repeat_min = 0; + repeat_max = 1; + + REPEAT: + if (previous_matched_char && repeat_min > 0) matched_char = TRUE; + + /* Remember whether this is a variable length repeat, and default to + single-char opcodes. */ + + reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; + op_type = 0; + + /* Adjust first and required code units for a zero repeat. */ + + if (repeat_min == 0) + { + firstcu = zerofirstcu; + firstcuflags = zerofirstcuflags; + reqcu = zeroreqcu; + reqcuflags = zeroreqcuflags; + } + + /* Note the greediness and possessiveness. */ + + switch (meta) + { + case META_MINMAX_PLUS: + case META_ASTERISK_PLUS: + case META_PLUS_PLUS: + case META_QUERY_PLUS: + repeat_type = 0; /* Force greedy */ + possessive_quantifier = TRUE; + break; + + case META_MINMAX_QUERY: + case META_ASTERISK_QUERY: + case META_PLUS_QUERY: + case META_QUERY_QUERY: + repeat_type = greedy_non_default; + possessive_quantifier = FALSE; + break; + + default: + repeat_type = greedy_default; + possessive_quantifier = FALSE; + break; + } + + /* Save start of previous item, in case we have to move it up in order to + insert something before it, and remember what it was. */ + + tempcode = previous; + op_previous = *previous; + + /* Now handle repetition for the different types of item. If the repeat + minimum and the repeat maximum are both 1, we can ignore the quantifier for + non-parenthesized items, as they have only one alternative. For anything in + parentheses, we must not ignore if {1} is possessive. */ + + switch (op_previous) + { + /* If previous was a character or negated character match, abolish the + item and generate a repeat item instead. If a char item has a minimum of + more than one, ensure that it is set in reqcu - it might not be if a + sequence such as x{3} is the first thing in a branch because the x will + have gone into firstcu instead. */ + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT; + op_type = chartypeoffset[op_previous - OP_CHAR]; + + /* Deal with UTF characters that take up more than one code unit. */ + +#ifdef MAYBE_UTF_MULTI + if (utf && NOT_FIRSTCU(code[-1])) + { + PCRE2_UCHAR *lastchar = code - 1; + BACKCHAR(lastchar); + mclength = (uint32_t)(code - lastchar); /* Length of UTF character */ + memcpy(mcbuffer, lastchar, CU2BYTES(mclength)); /* Save the char */ + } + else +#endif /* MAYBE_UTF_MULTI */ + + /* Handle the case of a single code unit - either with no UTF support, or + with UTF disabled, or for a single-code-unit UTF character. In the latter + case, for a repeated positive match, get the caseless flag for the + required code unit from the previous character, because a class like [Aa] + sets a caseless A but by now the req_caseopt flag has been reset. */ + + { + mcbuffer[0] = code[-1]; + mclength = 1; + if (op_previous <= OP_CHARI && repeat_min > 1) + { + reqcu = mcbuffer[0]; + reqcuflags = cb->req_varyopt; + if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS; + } + } + goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ + + /* If previous was a character class or a back reference, we put the + repeat stuff after it, but just skip the item if the repeat was {0,0}. */ + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: +#endif + case OP_CLASS: + case OP_NCLASS: + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + + if (repeat_max == 0) + { + code = previous; + goto END_REPEAT; + } + if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT; + + if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED) + *code++ = OP_CRSTAR + repeat_type; + else if (repeat_min == 1 && repeat_max == REPEAT_UNLIMITED) + *code++ = OP_CRPLUS + repeat_type; + else if (repeat_min == 0 && repeat_max == 1) + *code++ = OP_CRQUERY + repeat_type; + else + { + *code++ = OP_CRRANGE + repeat_type; + PUT2INC(code, 0, repeat_min); + if (repeat_max == REPEAT_UNLIMITED) repeat_max = 0; /* 2-byte encoding for max */ + PUT2INC(code, 0, repeat_max); + } + break; + + /* If previous is OP_FAIL, it was generated by an empty class [] + (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be + generated, that is by (*FAIL) or (?!), disallow a quantifier at parse + time. We can just ignore this repeat. */ + + case OP_FAIL: + goto END_REPEAT; + + /* Prior to 10.30, repeated recursions were wrapped in OP_ONCE brackets + because pcre2_match() could not handle backtracking into recursively + called groups. Now that this backtracking is available, we no longer need + to do this. However, we still need to replicate recursions as we do for + groups so as to have independent backtracking points. We can replicate + for the minimum number of repeats directly. For optional repeats we now + wrap the recursion in OP_BRA brackets and make use of the bracket + repetition. */ + + case OP_RECURSE: + if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier) + goto END_REPEAT; + + /* Generate unwrapped repeats for a non-zero minimum, except when the + minimum is 1 and the maximum unlimited, because that can be handled with + OP_BRA terminated by OP_KETRMAX/MIN. When the maximum is equal to the + minimum, we just need to generate the appropriate additional copies. + Otherwise we need to generate one more, to simulate the situation when + the minimum is zero. */ + + if (repeat_min > 0 && (repeat_min != 1 || repeat_max != REPEAT_UNLIMITED)) + { + int replicate = repeat_min; + if (repeat_min == repeat_max) replicate--; + + /* In the pre-compile phase, we don't actually do the replication. We + just adjust the length as if we had. Do some paranoid checks for + potential integer overflow. */ + + if (lengthptr != NULL) + { + PCRE2_SIZE delta; + if (PRIV(ckd_smul)(&delta, replicate, 1 + LINK_SIZE) || + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += delta; + } + + else for (int i = 0; i < replicate; i++) + { + memcpy(code, previous, CU2BYTES(1 + LINK_SIZE)); + previous = code; + code += 1 + LINK_SIZE; + } + + /* If the number of repeats is fixed, we are done. Otherwise, adjust + the counts and fall through. */ + + if (repeat_min == repeat_max) break; + if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min; + repeat_min = 0; + } + + /* Wrap the recursion call in OP_BRA brackets. */ + + (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); + op_previous = *previous = OP_BRA; + PUT(previous, 1, 2 + 2*LINK_SIZE); + previous[2 + 2*LINK_SIZE] = OP_KET; + PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); + code += 2 + 2 * LINK_SIZE; + length_prevgroup = 3 + 3*LINK_SIZE; + group_return = -1; /* Set "may match empty string" */ + + /* Now treat as a repeated OP_BRA. */ + /* Fall through */ + + /* If previous was a bracket group, we may have to replicate it in + certain cases. Note that at this point we can encounter only the "basic" + bracket opcodes such as BRA and CBRA, as this is the place where they get + converted into the more special varieties such as BRAPOS and SBRA. + Originally, PCRE did not allow repetition of assertions, but now it does, + for Perl compatibility. */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_CBRA: + case OP_COND: + { + int len = (int)(code - previous); + PCRE2_UCHAR *bralink = NULL; + PCRE2_UCHAR *brazeroptr = NULL; + + if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier) + goto END_REPEAT; + + /* Repeating a DEFINE group (or any group where the condition is always + FALSE and there is only one branch) is pointless, but Perl allows the + syntax, so we just ignore the repeat. */ + + if (op_previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && + previous[GET(previous, 1)] != OP_ALT) + goto END_REPEAT; + + /* Perl allows all assertions to be quantified, and when they contain + capturing parentheses and/or are optional there are potential uses for + this feature. PCRE2 used to force the maximum quantifier to 1 on the + invalid grounds that further repetition was never useful. This was + always a bit pointless, since an assertion could be wrapped with a + repeated group to achieve the effect. General repetition is now + permitted, but if the maximum is unlimited it is set to one more than + the minimum. */ + + if (op_previous < OP_ONCE) /* Assertion */ + { + if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1; + } + + /* The case of a zero minimum is special because of the need to stick + OP_BRAZERO in front of it, and because the group appears once in the + data, whereas in other cases it appears the minimum number of times. For + this reason, it is simplest to treat this case separately, as otherwise + the code gets far too messy. There are several special subcases when the + minimum is zero. */ + + if (repeat_min == 0) + { + /* If the maximum is also zero, we used to just omit the group from + the output altogether, like this: + + ** if (repeat_max == 0) + ** { + ** code = previous; + ** goto END_REPEAT; + ** } + + However, that fails when a group or a subgroup within it is + referenced as a subroutine from elsewhere in the pattern, so now we + stick in OP_SKIPZERO in front of it so that it is skipped on + execution. As we don't have a list of which groups are referenced, we + cannot do this selectively. + + If the maximum is 1 or unlimited, we just have to stick in the + BRAZERO and do no more at this point. */ + + if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) + { + (void)memmove(previous + 1, previous, CU2BYTES(len)); + code++; + if (repeat_max == 0) + { + *previous++ = OP_SKIPZERO; + goto END_REPEAT; + } + brazeroptr = previous; /* Save for possessive optimizing */ + *previous++ = OP_BRAZERO + repeat_type; + } + + /* If the maximum is greater than 1 and limited, we have to replicate + in a nested fashion, sticking OP_BRAZERO before each set of brackets. + The first one has to be handled carefully because it's the original + copy, which has to be moved up. The remainder can be handled by code + that is common with the non-zero minimum case below. We have to + adjust the value or repeat_max, since one less copy is required. */ + + else + { + int linkoffset; + (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); + code += 2 + LINK_SIZE; + *previous++ = OP_BRAZERO + repeat_type; + *previous++ = OP_BRA; + + /* We chain together the bracket link offset fields that have to be + filled in later when the ends of the brackets are reached. */ + + linkoffset = (bralink == NULL)? 0 : (int)(previous - bralink); + bralink = previous; + PUTINC(previous, 0, linkoffset); + } + + if (repeat_max != REPEAT_UNLIMITED) repeat_max--; + } + + /* If the minimum is greater than zero, replicate the group as many + times as necessary, and adjust the maximum to the number of subsequent + copies that we need. */ + + else + { + if (repeat_min > 1) + { + /* In the pre-compile phase, we don't actually do the replication. + We just adjust the length as if we had. Do some paranoid checks for + potential integer overflow. */ + + if (lengthptr != NULL) + { + PCRE2_SIZE delta; + if (PRIV(ckd_smul)(&delta, repeat_min - 1, + (int)length_prevgroup) || + OFLOW_MAX - *lengthptr < delta) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += delta; + } + + /* This is compiling for real. If there is a set first code unit + for the group, and we have not yet set a "required code unit", set + it. */ + + else + { + if (groupsetfirstcu && reqcuflags >= REQ_NONE) + { + reqcu = firstcu; + reqcuflags = firstcuflags; + } + for (uint32_t i = 1; i < repeat_min; i++) + { + memcpy(code, previous, CU2BYTES(len)); + code += len; + } + } + } + + if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min; + } + + /* This code is common to both the zero and non-zero minimum cases. If + the maximum is limited, it replicates the group in a nested fashion, + remembering the bracket starts on a stack. In the case of a zero + minimum, the first one was set up above. In all cases the repeat_max + now specifies the number of additional copies needed. Again, we must + remember to replicate entries on the forward reference list. */ + + if (repeat_max != REPEAT_UNLIMITED) + { + /* In the pre-compile phase, we don't actually do the replication. We + just adjust the length as if we had. For each repetition we must add + 1 to the length for BRAZERO and for all but the last repetition we + must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some + paranoid checks to avoid integer overflow. */ + + if (lengthptr != NULL && repeat_max > 0) + { + PCRE2_SIZE delta; + if (PRIV(ckd_smul)(&delta, repeat_max, + (int)length_prevgroup + 1 + 2 + 2*LINK_SIZE) || + OFLOW_MAX + (2 + 2*LINK_SIZE) - *lengthptr < delta) + { + *errorcodeptr = ERR20; + return 0; + } + delta -= (2 + 2*LINK_SIZE); /* Last one doesn't nest */ + *lengthptr += delta; + } + + /* This is compiling for real */ + + else for (uint32_t i = repeat_max; i >= 1; i--) + { + *code++ = OP_BRAZERO + repeat_type; + + /* All but the final copy start a new nesting, maintaining the + chain of brackets outstanding. */ + + if (i != 1) + { + int linkoffset; + *code++ = OP_BRA; + linkoffset = (bralink == NULL)? 0 : (int)(code - bralink); + bralink = code; + PUTINC(code, 0, linkoffset); + } + + memcpy(code, previous, CU2BYTES(len)); + code += len; + } + + /* Now chain through the pending brackets, and fill in their length + fields (which are holding the chain links pro tem). */ + + while (bralink != NULL) + { + int oldlinkoffset; + int linkoffset = (int)(code - bralink + 1); + PCRE2_UCHAR *bra = code - linkoffset; + oldlinkoffset = GET(bra, 1); + bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; + *code++ = OP_KET; + PUTINC(code, 0, linkoffset); + PUT(bra, 1, linkoffset); + } + } + + /* If the maximum is unlimited, set a repeater in the final copy. For + SCRIPT_RUN and ONCE brackets, that's all we need to do. However, + possessively repeated ONCE brackets can be converted into non-capturing + brackets, as the behaviour of (?:xx)++ is the same as (?>xx)++ and this + saves having to deal with possessive ONCEs specially. + + Otherwise, when we are doing the actual compile phase, check to see + whether this group is one that could match an empty string. If so, + convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so + that runtime checking can be done. [This check is also applied to ONCE + and SCRIPT_RUN groups at runtime, but in a different way.] + + Then, if the quantifier was possessive and the bracket is not a + conditional, we convert the BRA code to the POS form, and the KET code + to KETRPOS. (It turns out to be convenient at runtime to detect this + kind of subpattern at both the start and at the end.) The use of + special opcodes makes it possible to reduce greatly the stack usage in + pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to + OP_BRAPOSZERO. + + Then, if the minimum number of matches is 1 or 0, cancel the possessive + flag so that the default action below, of wrapping everything inside + atomic brackets, does not happen. When the minimum is greater than 1, + there will be earlier copies of the group, and so we still have to wrap + the whole thing. */ + + else + { + PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE; + PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1); + + /* Convert possessive ONCE brackets to non-capturing */ + + if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA; + + /* For non-possessive ONCE and for SCRIPT_RUN brackets, all we need + to do is to set the KET. */ + + if (*bracode == OP_ONCE || *bracode == OP_SCRIPT_RUN) + *ketcode = OP_KETRMAX + repeat_type; + + /* Handle non-SCRIPT_RUN and non-ONCE brackets and possessive ONCEs + (which have been converted to non-capturing above). */ + + else + { + /* In the compile phase, adjust the opcode if the group can match + an empty string. For a conditional group with only one branch, the + value of group_return will not show "could be empty", so we must + check that separately. */ + + if (lengthptr == NULL) + { + if (group_return < 0) *bracode += OP_SBRA - OP_BRA; + if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT) + *bracode = OP_SCOND; + } + + /* Handle possessive quantifiers. */ + + if (possessive_quantifier) + { + /* For COND brackets, we wrap the whole thing in a possessively + repeated non-capturing bracket, because we have not invented POS + versions of the COND opcodes. */ + + if (*bracode == OP_COND || *bracode == OP_SCOND) + { + int nlen = (int)(code - bracode); + (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); + code += 1 + LINK_SIZE; + nlen += 1 + LINK_SIZE; + *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; + *code++ = OP_KETRPOS; + PUTINC(code, 0, nlen); + PUT(bracode, 1, nlen); + } + + /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ + + else + { + *bracode += 1; /* Switch to xxxPOS opcodes */ + *ketcode = OP_KETRPOS; + } + + /* If the minimum is zero, mark it as possessive, then unset the + possessive flag when the minimum is 0 or 1. */ + + if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; + if (repeat_min < 2) possessive_quantifier = FALSE; + } + + /* Non-possessive quantifier */ + + else *ketcode = OP_KETRMAX + repeat_type; + } + } + } + break; + + /* If previous was a character type match (\d or similar), abolish it and + create a suitable repeat item. The code is shared with single-character + repeats by setting op_type to add a suitable offset into repeat_type. + Note the the Unicode property types will be present only when + SUPPORT_UNICODE is defined, but we don't wrap the little bits of code + here because it just makes it horribly messy. */ + + default: + if (op_previous >= OP_EODN) /* Not a character type - internal error */ + { + *errorcodeptr = ERR10; + return 0; + } + else + { + int prop_type, prop_value; + PCRE2_UCHAR *oldcode; + + if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT; + + op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ + mclength = 0; /* Not a character */ + + if (op_previous == OP_PROP || op_previous == OP_NOTPROP) + { + prop_type = previous[1]; + prop_value = previous[2]; + } + else + { + /* Come here from just above with a character in mcbuffer/mclength. */ + OUTPUT_SINGLE_REPEAT: + prop_type = prop_value = -1; + } + + /* At this point, if prop_type == prop_value == -1 we either have a + character in mcbuffer when mclength is greater than zero, or we have + mclength zero, in which case there is a non-property character type in + op_previous. If prop_type/value are not negative, we have a property + character type in op_previous. */ + + oldcode = code; /* Save where we were */ + code = previous; /* Usually overwrite previous item */ + + /* If the maximum is zero then the minimum must also be zero; Perl allows + this case, so we do too - by simply omitting the item altogether. */ + + if (repeat_max == 0) goto END_REPEAT; + + /* Combine the op_type with the repeat_type */ + + repeat_type += op_type; + + /* A minimum of zero is handled either as the special case * or ?, or as + an UPTO, with the maximum given. */ + + if (repeat_min == 0) + { + if (repeat_max == REPEAT_UNLIMITED) *code++ = OP_STAR + repeat_type; + else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + + /* A repeat minimum of 1 is optimized into some special cases. If the + maximum is unlimited, we use OP_PLUS. Otherwise, the original item is + left in place and, if the maximum is greater than 1, we use OP_UPTO with + one less than the maximum. */ + + else if (repeat_min == 1) + { + if (repeat_max == REPEAT_UNLIMITED) + *code++ = OP_PLUS + repeat_type; + else + { + code = oldcode; /* Leave previous item in place */ + if (repeat_max == 1) goto END_REPEAT; + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max - 1); + } + } + + /* The case {n,n} is just an EXACT, while the general case {n,m} is + handled as an EXACT followed by an UPTO or STAR or QUERY. */ + + else + { + *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ + PUT2INC(code, 0, repeat_min); + + /* Unless repeat_max equals repeat_min, fill in the data for EXACT, + and then generate the second opcode. For a repeated Unicode property + match, there are two extra values that define the required property, + and mclength is set zero to indicate this. */ + + if (repeat_max != repeat_min) + { + if (mclength > 0) + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + } + else + { + *code++ = op_previous; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + } + + /* Now set up the following opcode */ + + if (repeat_max == REPEAT_UNLIMITED) + *code++ = OP_STAR + repeat_type; + else + { + repeat_max -= repeat_min; + if (repeat_max == 1) + { + *code++ = OP_QUERY + repeat_type; + } + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + } + } + + /* Fill in the character or character type for the final opcode. */ + + if (mclength > 0) + { + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + } + else + { + *code++ = op_previous; + if (prop_type >= 0) + { + *code++ = prop_type; + *code++ = prop_value; + } + } + } + break; + } /* End of switch on different op_previous values */ + + + /* If the character following a repeat is '+', possessive_quantifier is + TRUE. For some opcodes, there are special alternative opcodes for this + case. For anything else, we wrap the entire repeated item inside OP_ONCE + brackets. Logically, the '+' notation is just syntactic sugar, taken from + Sun's Java package, but the special opcodes can optimize it. + + Some (but not all) possessively repeated subpatterns have already been + completely handled in the code just above. For them, possessive_quantifier + is always FALSE at this stage. Note that the repeated item starts at + tempcode, not at previous, which might be the first part of a string whose + (former) last char we repeated. */ + + if (possessive_quantifier) + { + int len; + + /* Possessifying an EXACT quantifier has no effect, so we can ignore it. + However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, + {5,}, or {5,10}). We skip over an EXACT item; if the length of what + remains is greater than zero, there's a further opcode that can be + handled. If not, do nothing, leaving the EXACT alone. */ + + switch(*tempcode) + { + case OP_TYPEEXACT: + tempcode += PRIV(OP_lengths)[*tempcode] + + ((tempcode[1 + IMM2_SIZE] == OP_PROP + || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); + break; + + /* CHAR opcodes are used for exacts whose count is 1. */ + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + tempcode += PRIV(OP_lengths)[*tempcode]; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(tempcode[-1])) + tempcode += GET_EXTRALEN(tempcode[-1]); +#endif + break; + + /* For the class opcodes, the repeat operator appears at the end; + adjust tempcode to point to it. */ + + case OP_CLASS: + case OP_NCLASS: + tempcode += 1 + 32/sizeof(PCRE2_UCHAR); + break; + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + tempcode += GET(tempcode, 1); + break; +#endif + } + + /* If tempcode is equal to code (which points to the end of the repeated + item), it means we have skipped an EXACT item but there is no following + QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In + all other cases, tempcode will be pointing to the repeat opcode, and will + be less than code, so the value of len will be greater than 0. */ + + len = (int)(code - tempcode); + if (len > 0) + { + unsigned int repcode = *tempcode; + + /* There is a table for possessifying opcodes, all of which are less + than OP_CALLOUT. A zero entry means there is no possessified version. + */ + + if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0) + *tempcode = opcode_possessify[repcode]; + + /* For opcode without a special possessified version, wrap the item in + ONCE brackets. */ + + else + { + (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); + code += 1 + LINK_SIZE; + len += 1 + LINK_SIZE; + tempcode[0] = OP_ONCE; + *code++ = OP_KET; + PUTINC(code, 0, len); + PUT(tempcode, 1, len); + } + } + } + + /* We set the "follows varying string" flag for subsequently encountered + reqcus if it isn't already set and we have just passed a varying length + item. */ + + END_REPEAT: + cb->req_varyopt |= reqvary; + break; + + + /* ===================================================================*/ + /* Handle a 32-bit data character with a value greater than META_END. */ + + case META_BIGVALUE: + pptr++; + goto NORMAL_CHAR; + + + /* ===============================================================*/ + /* Handle a back reference by number, which is the meta argument. The + pattern offsets for back references to group numbers less than 10 are held + in a special vector, to avoid using more than two parsed pattern elements + in 64-bit environments. We only need the offset to the first occurrence, + because if that doesn't fail, subsequent ones will also be OK. */ + + case META_BACKREF: + if (meta_arg < 10) offset = cb->small_ref_offset[meta_arg]; + else GETPLUSOFFSET(offset, pptr); + + if (meta_arg > cb->bracount) + { + cb->erroroffset = offset; + *errorcodeptr = ERR15; /* Non-existent subpattern */ + return 0; + } + + /* Come here from named backref handling when the reference is to a + single group (that is, not to a duplicated name). The back reference + data will have already been updated. We must disable firstcu if not + set, to cope with cases like (?=(\w+))\1: which would otherwise set ':' + later. */ + + HANDLE_SINGLE_REFERENCE: + if (firstcuflags == REQ_UNSET) zerofirstcuflags = firstcuflags = REQ_NONE; + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; + PUT2INC(code, 0, meta_arg); + + /* Update the map of back references, and keep the highest one. We + could do this in parse_regex() for numerical back references, but not + for named back references, because we don't know the numbers to which + named back references refer. So we do it all in this function. */ + + cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1; + if (meta_arg > cb->top_backref) cb->top_backref = meta_arg; + break; + + + /* ===============================================================*/ + /* Handle recursion by inserting the number of the called group (which is + the meta argument) after OP_RECURSE. At the end of compiling the pattern is + scanned and these numbers are replaced by offsets within the pattern. It is + done like this to avoid problems with forward references and adjusting + offsets when groups are duplicated and moved (as discovered in previous + implementations). Note that a recursion does not have a set first + character. */ + + case META_RECURSE: + GETPLUSOFFSET(offset, pptr); + if (meta_arg > cb->bracount) + { + cb->erroroffset = offset; + *errorcodeptr = ERR15; /* Non-existent subpattern */ + return 0; + } + HANDLE_NUMERICAL_RECURSION: + *code = OP_RECURSE; + PUT(code, 1, meta_arg); + code += 1 + LINK_SIZE; + groupsetfirstcu = FALSE; + cb->had_recurse = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + break; + + + /* ===============================================================*/ + /* Handle capturing parentheses; the number is the meta argument. */ + + case META_CAPTURE: + bravalue = OP_CBRA; + skipunits = IMM2_SIZE; + PUT2(code, 1+LINK_SIZE, meta_arg); + cb->lastcapture = meta_arg; + goto GROUP_PROCESS_NOTE_EMPTY; + + + /* ===============================================================*/ + /* Handle escape sequence items. For ones like \d, the ESC_values are + arranged to be the same as the corresponding OP_values in the default case + when PCRE2_UCP is not set (which is the only case in which they will appear + here). + + Note: \Q and \E are never seen here, as they were dealt with in + parse_pattern(). Neither are numerical back references or recursions, which + were turned into META_BACKREF or META_RECURSE items, respectively. \k and + \g, when followed by names, are turned into META_BACKREF_BYNAME or + META_RECURSE_BYNAME. */ + + case META_ESCAPE: + + /* We can test for escape sequences that consume a character because their + values lie between ESC_b and ESC_Z; this may have to change if any new ones + are ever created. For these sequences, we disable the setting of a first + character if it hasn't already been set. */ + + if (meta_arg > ESC_b && meta_arg < ESC_Z) + { + matched_char = TRUE; + if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + } + + /* Set values to reset to if this is followed by a zero repeat. */ + + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If Unicode is not supported, \P and \p are not allowed and are + faulted at parse time, so will never appear here. */ + +#ifdef SUPPORT_UNICODE + if (meta_arg == ESC_P || meta_arg == ESC_p) + { + uint32_t ptype = *(++pptr) >> 16; + uint32_t pdata = *pptr & 0xffff; + + /* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit + from the auto-anchoring code. */ + + if (meta_arg == ESC_p && ptype == PT_ANY) + { + *code++ = OP_ALLANY; + } + else + { + *code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP; + *code++ = ptype; + *code++ = pdata; + } + break; /* End META_ESCAPE */ + } +#endif + + /* \K is forbidden in lookarounds since 10.38 because that's what Perl has + done. However, there's an option, in case anyone was relying on it. */ + + if (cb->assert_depth > 0 && meta_arg == ESC_K && + (xoptions & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0) + { + *errorcodeptr = ERR99; + return 0; + } + + /* For the rest (including \X when Unicode is supported - if not it's + faulted at parse time), the OP value is the escape value when PCRE2_UCP is + not set; if it is set, most of them do not show up here because they are + converted into Unicode property tests in parse_regex(). + + In non-UTF mode, and for both 32-bit modes, we turn \C into OP_ALLANY + instead of OP_ANYBYTE so that it works in DFA mode and in lookbehinds. + There are special UCP codes for \B and \b which are used in UCP mode unless + "word" matching is being forced to ASCII. + + Note that \b and \B do a one-character lookbehind, and \A also behaves as + if it does. */ + + switch(meta_arg) + { + case ESC_C: + cb->external_flags |= PCRE2_HASBKC; /* Record */ +#if PCRE2_CODE_UNIT_WIDTH == 32 + meta_arg = OP_ALLANY; +#else + if (!utf) meta_arg = OP_ALLANY; +#endif + break; + + case ESC_B: + case ESC_b: + if ((options & PCRE2_UCP) != 0 && (xoptions & PCRE2_EXTRA_ASCII_BSW) == 0) + meta_arg = (meta_arg == ESC_B)? OP_NOT_UCP_WORD_BOUNDARY : + OP_UCP_WORD_BOUNDARY; + /* Fall through */ + + case ESC_A: + if (cb->max_lookbehind == 0) cb->max_lookbehind = 1; + break; + } + + *code++ = meta_arg; + break; /* End META_ESCAPE */ + + + /* ===================================================================*/ + /* Handle an unrecognized meta value. A parsed pattern value less than + META_END is a literal. Otherwise we have a problem. */ + + default: + if (meta >= META_END) + { +#ifdef DEBUG_SHOW_PARSED + fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x\n", *pptr); +#endif + *errorcodeptr = ERR89; /* Internal error - unrecognized. */ + return 0; + } + + /* Handle a literal character. We come here by goto in the case of a + 32-bit, non-UTF character whose value is greater than META_END. */ + + NORMAL_CHAR: + meta = *pptr; /* Get the full 32 bits */ + NORMAL_CHAR_SET: /* Character is already in meta */ + matched_char = TRUE; + + /* For caseless UTF or UCP mode, check whether this character has more than + one other case. If so, generate a special OP_PROP item instead of OP_CHARI. + When casing restrictions apply, ignore caseless sets that start with an + ASCII character. */ + +#ifdef SUPPORT_UNICODE + if ((utf||ucp) && (options & PCRE2_CASELESS) != 0) + { + uint32_t caseset = UCD_CASESET(meta); + if (caseset != 0 && + ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 || + PRIV(ucd_caseless_sets)[caseset] > 127)) + { + *code++ = OP_PROP; + *code++ = PT_CLIST; + *code++ = caseset; + if (firstcuflags == REQ_UNSET) + firstcuflags = zerofirstcuflags = REQ_NONE; + break; /* End handling this meta item */ + } + } +#endif + + /* Caseful matches, or caseless and not one of the multicase characters. We + come here by goto in the case of a positive class that contains only + case-partners of a character with just two cases; matched_char has already + been set TRUE and options fudged if necessary. */ + + CLASS_CASELESS_CHAR: + + /* Get the character's code units into mcbuffer, with the length in + mclength. When not in UTF mode, the length is always 1. */ + +#ifdef SUPPORT_UNICODE + if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else +#endif + { + mclength = 1; + mcbuffer[0] = meta; + } + + /* Generate the appropriate code */ + + *code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR; + memcpy(code, mcbuffer, CU2BYTES(mclength)); + code += mclength; + + /* Remember if \r or \n were seen */ + + if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) + cb->external_flags |= PCRE2_HASCRORLF; + + /* Set the first and required code units appropriately. If no previous + first code unit, set it from this character, but revert to none on a zero + repeat. Otherwise, leave the firstcu value alone, and don't change it on + a zero repeat. */ + + if (firstcuflags == REQ_UNSET) + { + zerofirstcuflags = REQ_NONE; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + + /* If the character is more than one code unit long, we can set a single + firstcu only if it is not to be matched caselessly. Multiple possible + starting code units may be picked up later in the studying code. */ + + if (mclength == 1 || req_caseopt == 0) + { + firstcu = mcbuffer[0]; + firstcuflags = req_caseopt; + if (mclength != 1) + { + reqcu = code[-1]; + reqcuflags = cb->req_varyopt; + } + } + else firstcuflags = reqcuflags = REQ_NONE; + } + + /* firstcu was previously set; we can set reqcu only if the length is + 1 or the matching is caseful. */ + + else + { + zerofirstcu = firstcu; + zerofirstcuflags = firstcuflags; + zeroreqcu = reqcu; + zeroreqcuflags = reqcuflags; + if (mclength == 1 || req_caseopt == 0) + { + reqcu = code[-1]; + reqcuflags = req_caseopt | cb->req_varyopt; + } + } + + /* If caselessness was temporarily instated, reset it. */ + + if (reset_caseful) + { + options &= ~PCRE2_CASELESS; + req_caseopt = 0; + reset_caseful = FALSE; + } + + break; /* End literal character handling */ + } /* End of big switch */ + } /* End of big loop */ + +/* Control never reaches here. */ +} + + + +/************************************************* +* Compile regex: a sequence of alternatives * +*************************************************/ + +/* On entry, pptr is pointing past the bracket meta, but on return it points to +the closing bracket or META_END. The code variable is pointing at the code unit +into which the BRA operator has been stored. This function is used during the +pre-compile phase when we are trying to find out the amount of memory needed, +as well as during the real compile phase. The value of lengthptr distinguishes +the two phases. + +Arguments: + options option bits, including any changes for this subpattern + xoptions extra option bits, ditto + codeptr -> the address of the current code pointer + pptrptr -> the address of the current parsed pattern pointer + errorcodeptr -> pointer to error code variable + skipunits skip this many code units at start (for brackets and OP_COND) + firstcuptr place to put the first required code unit + firstcuflagsptr place to put the first code unit flags + reqcuptr place to put the last required code unit + reqcuflagsptr place to put the last required code unit flags + bcptr pointer to the chain of currently open branches + cb points to the data block with tables pointers etc. + lengthptr NULL during the real compile phase + points to length accumulator during pre-compile phase + +Returns: 0 There has been an error + +1 Success, this group must match at least one character + -1 Success, this group may match an empty string +*/ + +static int +compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr, + uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, + uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr, + uint32_t *reqcuflagsptr, branch_chain *bcptr, open_capitem *open_caps, + compile_block *cb, PCRE2_SIZE *lengthptr) +{ +PCRE2_UCHAR *code = *codeptr; +PCRE2_UCHAR *last_branch = code; +PCRE2_UCHAR *start_bracket = code; +BOOL lookbehind; +open_capitem capitem; +int capnumber = 0; +int okreturn = 1; +uint32_t *pptr = *pptrptr; +uint32_t firstcu, reqcu; +uint32_t lookbehindlength; +uint32_t lookbehindminlength; +uint32_t firstcuflags, reqcuflags; +uint32_t branchfirstcu, branchreqcu; +uint32_t branchfirstcuflags, branchreqcuflags; +PCRE2_SIZE length; +branch_chain bc; + +/* If set, call the external function that checks for stack availability. */ + +if (cb->cx->stack_guard != NULL && + cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data)) + { + *errorcodeptr= ERR33; + return 0; + } + +/* Miscellaneous initialization */ + +bc.outer = bcptr; +bc.current_branch = code; + +firstcu = reqcu = 0; +firstcuflags = reqcuflags = REQ_UNSET; + +/* Accumulate the length for use in the pre-compile phase. Start with the +length of the BRA and KET and any extra code units that are required at the +beginning. We accumulate in a local variable to save frequent testing of +lengthptr for NULL. We cannot do this by looking at the value of 'code' at the +start and end of each alternative, because compiled items are discarded during +the pre-compile phase so that the workspace is not exceeded. */ + +length = 2 + 2*LINK_SIZE + skipunits; + +/* Remember if this is a lookbehind assertion, and if it is, save its length +and skip over the pattern offset. */ + +lookbehind = *code == OP_ASSERTBACK || + *code == OP_ASSERTBACK_NOT || + *code == OP_ASSERTBACK_NA; + +if (lookbehind) + { + lookbehindlength = META_DATA(pptr[-1]); + lookbehindminlength = *pptr; + pptr += SIZEOFFSET; + } +else lookbehindlength = lookbehindminlength = 0; + +/* If this is a capturing subpattern, add to the chain of open capturing items +so that we can detect them if (*ACCEPT) is encountered. Note that only OP_CBRA +need be tested here; changing this opcode to one of its variants, e.g. +OP_SCBRAPOS, happens later, after the group has been compiled. */ + +if (*code == OP_CBRA) + { + capnumber = GET2(code, 1 + LINK_SIZE); + capitem.number = capnumber; + capitem.next = open_caps; + capitem.assert_depth = cb->assert_depth; + open_caps = &capitem; + } + +/* Offset is set zero to mark that this bracket is still open */ + +PUT(code, 1, 0); +code += 1 + LINK_SIZE + skipunits; + +/* Loop for each alternative branch */ + +for (;;) + { + int branch_return; + + /* Insert OP_REVERSE or OP_VREVERSE if this is a lookbehind assertion. There + is only a single mimimum length for the whole assertion. When the mimimum + length is LOOKBEHIND_MAX it means that all branches are of fixed length, + though not necessarily the same length. In this case, the original OP_REVERSE + can be used. It can also be used if a branch in a variable length lookbehind + has the same maximum and minimum. Otherwise, use OP_VREVERSE, which has both + maximum and minimum values. */ + + if (lookbehind && lookbehindlength > 0) + { + if (lookbehindminlength == LOOKBEHIND_MAX || + lookbehindminlength == lookbehindlength) + { + *code++ = OP_REVERSE; + PUT2INC(code, 0, lookbehindlength); + length += 1 + IMM2_SIZE; + } + else + { + *code++ = OP_VREVERSE; + PUT2INC(code, 0, lookbehindminlength); + PUT2INC(code, 0, lookbehindlength); + length += 1 + 2*IMM2_SIZE; + } + } + + /* Now compile the branch; in the pre-compile phase its length gets added + into the length. */ + + if ((branch_return = + compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr, + &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags, + &bc, open_caps, cb, (lengthptr == NULL)? NULL : &length)) == 0) + return 0; + + /* If a branch can match an empty string, so can the whole group. */ + + if (branch_return < 0) okreturn = -1; + + /* In the real compile phase, there is some post-processing to be done. */ + + if (lengthptr == NULL) + { + /* If this is the first branch, the firstcu and reqcu values for the + branch become the values for the regex. */ + + if (*last_branch != OP_ALT) + { + firstcu = branchfirstcu; + firstcuflags = branchfirstcuflags; + reqcu = branchreqcu; + reqcuflags = branchreqcuflags; + } + + /* If this is not the first branch, the first char and reqcu have to + match the values from all the previous branches, except that if the + previous value for reqcu didn't have REQ_VARY set, it can still match, + and we set REQ_VARY for the group from this branch's value. */ + + else + { + /* If we previously had a firstcu, but it doesn't match the new branch, + we have to abandon the firstcu for the regex, but if there was + previously no reqcu, it takes on the value of the old firstcu. */ + + if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) + { + if (firstcuflags < REQ_NONE) + { + if (reqcuflags >= REQ_NONE) + { + reqcu = firstcu; + reqcuflags = firstcuflags; + } + } + firstcuflags = REQ_NONE; + } + + /* If we (now or from before) have no firstcu, a firstcu from the + branch becomes a reqcu if there isn't a branch reqcu. */ + + if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE && + branchreqcuflags >= REQ_NONE) + { + branchreqcu = branchfirstcu; + branchreqcuflags = branchfirstcuflags; + } + + /* Now ensure that the reqcus match */ + + if (((reqcuflags & ~REQ_VARY) != (branchreqcuflags & ~REQ_VARY)) || + reqcu != branchreqcu) + reqcuflags = REQ_NONE; + else + { + reqcu = branchreqcu; + reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY if present */ + } + } + } + + /* Handle reaching the end of the expression, either ')' or end of pattern. + In the real compile phase, go back through the alternative branches and + reverse the chain of offsets, with the field in the BRA item now becoming an + offset to the first alternative. If there are no alternatives, it points to + the end of the group. The length in the terminating ket is always the length + of the whole bracketed item. Return leaving the pointer at the terminating + char. */ + + if (META_CODE(*pptr) != META_ALT) + { + if (lengthptr == NULL) + { + PCRE2_SIZE branch_length = code - last_branch; + do + { + PCRE2_SIZE prev_length = GET(last_branch, 1); + PUT(last_branch, 1, branch_length); + branch_length = prev_length; + last_branch -= branch_length; + } + while (branch_length > 0); + } + + /* Fill in the ket */ + + *code = OP_KET; + PUT(code, 1, (int)(code - start_bracket)); + code += 1 + LINK_SIZE; + + /* Set values to pass back */ + + *codeptr = code; + *pptrptr = pptr; + *firstcuptr = firstcu; + *firstcuflagsptr = firstcuflags; + *reqcuptr = reqcu; + *reqcuflagsptr = reqcuflags; + if (lengthptr != NULL) + { + if (OFLOW_MAX - *lengthptr < length) + { + *errorcodeptr = ERR20; + return 0; + } + *lengthptr += length; + } + return okreturn; + } + + /* Another branch follows. In the pre-compile phase, we can move the code + pointer back to where it was for the start of the first branch. (That is, + pretend that each branch is the only one.) + + In the real compile phase, insert an ALT node. Its length field points back + to the previous branch while the bracket remains open. At the end the chain + is reversed. It's done like this so that the start of the bracket has a + zero offset until it is closed, making it possible to detect recursion. */ + + if (lengthptr != NULL) + { + code = *codeptr + 1 + LINK_SIZE + skipunits; + length += 1 + LINK_SIZE; + } + else + { + *code = OP_ALT; + PUT(code, 1, (int)(code - last_branch)); + bc.current_branch = last_branch = code; + code += 1 + LINK_SIZE; + } + + /* Set the maximum lookbehind length for the next branch (if not in a + lookbehind the value will be zero) and then advance past the vertical bar. */ + + lookbehindlength = META_DATA(*pptr); + pptr++; + } +/* Control never reaches here */ +} + + + +/************************************************* +* Check for anchored pattern * +*************************************************/ + +/* Try to find out if this is an anchored regular expression. Consider each +alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket +all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then +it's anchored. However, if this is a multiline pattern, then only OP_SOD will +be found, because ^ generates OP_CIRCM in that mode. + +We can also consider a regex to be anchored if OP_SOM starts all its branches. +This is the code for \G, which means "match at start of match position, taking +into account the match offset". + +A branch is also implicitly anchored if it starts with .* and DOTALL is set, +because that will try the rest of the pattern at all possible matching points, +so there is no point trying again.... er .... + +.... except when the .* appears inside capturing parentheses, and there is a +subsequent back reference to those parentheses. We haven't enough information +to catch that case precisely. + +At first, the best we could do was to detect when .* was in capturing brackets +and the highest back reference was greater than or equal to that level. +However, by keeping a bitmap of the first 31 back references, we can catch some +of the more common cases more precisely. + +... A second exception is when the .* appears inside an atomic group, because +this prevents the number of characters it matches from being adjusted. + +Arguments: + code points to start of the compiled pattern + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + cb points to the compile data block + atomcount atomic group level + inassert TRUE if in an assertion + +Returns: TRUE or FALSE +*/ + +static BOOL +is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb, + int atomcount, BOOL inassert) +{ +do { + PCRE2_SPTR scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); + int op = *scode; + + /* Non-capturing brackets */ + + if (op == OP_BRA || op == OP_BRAPOS || + op == OP_SBRA || op == OP_SBRAPOS) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Capturing brackets */ + + else if (op == OP_CBRA || op == OP_CBRAPOS || + op == OP_SCBRA || op == OP_SCBRAPOS) + { + int n = GET2(scode, 1+LINK_SIZE); + uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1); + if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; + } + + /* Positive forward assertion */ + + else if (op == OP_ASSERT || op == OP_ASSERT_NA) + { + if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + } + + /* Condition. If there is no second branch, it can't be anchored. */ + + else if (op == OP_COND || op == OP_SCOND) + { + if (scode[GET(scode,1)] != OP_ALT) return FALSE; + if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Atomic groups */ + + else if (op == OP_ONCE) + { + if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert)) + return FALSE; + } + + /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and + it isn't in brackets that are or may be referenced or inside an atomic + group or an assertion. Also the pattern must not contain *PRUNE or *SKIP, + because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/ + with the subject "aab", which matches "b", i.e. not at the start of a line. + There is also an option that disables auto-anchoring. */ + + else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || + op == OP_TYPEPOSSTAR)) + { + if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || + atomcount > 0 || cb->had_pruneorskip || inassert || + (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + return FALSE; + } + + /* Check for explicit anchoring */ + + else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for starting with ^ or .* * +*************************************************/ + +/* This is called to find out if every branch starts with ^ or .* so that +"first char" processing can be done to speed things up in multiline +matching and for non-DOTALL patterns that start with .* (which must start at +the beginning or after \n). As in the case of is_anchored() (see above), we +have to take account of back references to capturing brackets that contain .* +because in that case we can't make the assumption. Also, the appearance of .* +inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE +or *SKIP does not count, because once again the assumption no longer holds. + +Arguments: + code points to start of the compiled pattern or a group + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + cb points to the compile data + atomcount atomic group level + inassert TRUE if in an assertion + +Returns: TRUE or FALSE +*/ + +static BOOL +is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, + int atomcount, BOOL inassert) +{ +do { + PCRE2_SPTR scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); + int op = *scode; + + /* If we are at the start of a conditional assertion group, *both* the + conditional assertion *and* what follows the condition must satisfy the test + for start of line. Other kinds of condition fail. Note that there may be an + auto-callout at the start of a condition. */ + + if (op == OP_COND) + { + scode += 1 + LINK_SIZE; + + if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; + else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); + + switch (*scode) + { + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FAIL: + case OP_FALSE: + case OP_TRUE: + return FALSE; + + default: /* Assertion */ + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; + do scode += GET(scode, 1); while (*scode == OP_ALT); + scode += 1 + LINK_SIZE; + break; + } + scode = first_significant_code(scode, FALSE); + op = *scode; + } + + /* Non-capturing brackets */ + + if (op == OP_BRA || op == OP_BRAPOS || + op == OP_SBRA || op == OP_SBRAPOS) + { + if (!is_startline(scode, bracket_map, cb, atomcount, inassert)) + return FALSE; + } + + /* Capturing brackets */ + + else if (op == OP_CBRA || op == OP_CBRAPOS || + op == OP_SCBRA || op == OP_SCBRAPOS) + { + int n = GET2(scode, 1+LINK_SIZE); + unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE; + } + + /* Positive forward assertions */ + + else if (op == OP_ASSERT || op == OP_ASSERT_NA) + { + if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) + return FALSE; + } + + /* Atomic brackets */ + + else if (op == OP_ONCE) + { + if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert)) + return FALSE; + } + + /* .* means "start at start or after \n" if it isn't in atomic brackets or + brackets that may be referenced or an assertion, and as long as the pattern + does not contain *PRUNE or *SKIP, because these break the feature. Consider, + for example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", + i.e. not at the start of a line. There is also an option that disables this + optimization. */ + + else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) + { + if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || + atomcount > 0 || cb->had_pruneorskip || inassert || + (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) + return FALSE; + } + + /* Check for explicit circumflex; anything else gives a FALSE result. Note + in particular that this includes atomic brackets OP_ONCE because the number + of characters matched by .* cannot be adjusted inside them. */ + + else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; + + /* Move on to the next alternative */ + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Scan compiled regex for recursion reference * +*************************************************/ + +/* This function scans through a compiled pattern until it finds an instance of +OP_RECURSE. + +Arguments: + code points to start of expression + utf TRUE in UTF mode + +Returns: pointer to the opcode for OP_RECURSE, or NULL if not found +*/ + +static PCRE2_SPTR +find_recurse(PCRE2_SPTR code, BOOL utf) +{ +for (;;) + { + PCRE2_UCHAR c = *code; + if (c == OP_END) return NULL; + if (c == OP_RECURSE) return code; + + /* XCLASS is used for classes that cannot be represented just by a bit map. + This includes negated single high-valued characters. CALLOUT_STR is used for + callouts with string arguments. In both cases the length in the table is + zero; the actual length is stored in the compiled code. */ + + if (c == OP_XCLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + + /* Otherwise, we can get the item's length from the table, except that for + repeated character types, we have to test for \p and \P, which have an extra + two code units of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, + we must add in its length. */ + + else + { + switch(c) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEPOSUPTO: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + code += 2; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; + } + + /* Add in the fixed length from the table */ + + code += PRIV(OP_lengths)[c]; + + /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may + be followed by a multi-unit character. The length in the table is a + minimum, so we have to arrange to skip the extra units. */ + +#ifdef MAYBE_UTF_MULTI + if (utf) switch(c) + { + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + case OP_UPTO: + case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_MINUPTO: + case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + case OP_STAR: + case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_POSSTAR: + case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_PLUS: + case OP_PLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + case OP_QUERY: + case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_POSQUERY: + case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); + break; + } +#else + (void)(utf); /* Keep compiler happy by referencing function argument */ +#endif /* MAYBE_UTF_MULTI */ + } + } +} + + + +/************************************************* +* Check for asserted fixed first code unit * +*************************************************/ + +/* During compilation, the "first code unit" settings from forward assertions +are discarded, because they can cause conflicts with actual literals that +follow. However, if we end up without a first code unit setting for an +unanchored pattern, it is worth scanning the regex to see if there is an +initial asserted first code unit. If all branches start with the same asserted +code unit, or with a non-conditional bracket all of whose alternatives start +with the same asserted code unit (recurse ad lib), then we return that code +unit, with the flags set to zero or REQ_CASELESS; otherwise return zero with +REQ_NONE in the flags. + +Arguments: + code points to start of compiled pattern + flags points to the first code unit flags + inassert non-zero if in an assertion + +Returns: the fixed first code unit, or 0 with REQ_NONE in flags +*/ + +static uint32_t +find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert) +{ +uint32_t c = 0; +uint32_t cflags = REQ_NONE; + +*flags = REQ_NONE; +do { + uint32_t d; + uint32_t dflags; + int xl = (*code == OP_CBRA || *code == OP_SCBRA || + *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; + PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); + PCRE2_UCHAR op = *scode; + + switch(op) + { + default: + return 0; + + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_SCBRA: + case OP_CBRAPOS: + case OP_SCBRAPOS: + case OP_ASSERT: + case OP_ASSERT_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + d = find_firstassertedcu(scode, &dflags, inassert + + ((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0)); + if (dflags >= REQ_NONE) return 0; + if (cflags >= REQ_NONE) { c = d; cflags = dflags; } + else if (c != d || cflags != dflags) return 0; + break; + + case OP_EXACT: + scode += IMM2_SIZE; + /* Fall through */ + + case OP_CHAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + if (inassert == 0) return 0; + if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; } + else if (c != scode[1]) return 0; + break; + + case OP_EXACTI: + scode += IMM2_SIZE; + /* Fall through */ + + case OP_CHARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + if (inassert == 0) return 0; + + /* If the character is more than one code unit long, we cannot set its + first code unit when matching caselessly. Later scanning may pick up + multiple code units. */ + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (scode[1] >= 0x80) return 0; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0; +#endif +#endif + + if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; } + else if (c != scode[1]) return 0; + break; + } + + code += GET(code, 1); + } +while (*code == OP_ALT); + +*flags = cflags; +return c; +} + + + +/************************************************* +* Add an entry to the name/number table * +*************************************************/ + +/* This function is called between compiling passes to add an entry to the +name/number table, maintaining alphabetical order. Checking for permitted +and forbidden duplicates has already been done. + +Arguments: + cb the compile data block + name the name to add + length the length of the name + groupno the group number + tablecount the count of names in the table so far + +Returns: nothing +*/ + +static void +add_name_to_table(compile_block *cb, PCRE2_SPTR name, int length, + unsigned int groupno, uint32_t tablecount) +{ +uint32_t i; +PCRE2_UCHAR *slot = cb->name_table; + +for (i = 0; i < tablecount; i++) + { + int crc = memcmp(name, slot+IMM2_SIZE, CU2BYTES(length)); + if (crc == 0 && slot[IMM2_SIZE+length] != 0) + crc = -1; /* Current name is a substring */ + + /* Make space in the table and break the loop for an earlier name. For a + duplicate or later name, carry on. We do this for duplicates so that in the + simple case (when ?(| is not used) they are in order of their numbers. In all + cases they are in the order in which they appear in the pattern. */ + + if (crc < 0) + { + (void)memmove(slot + cb->name_entry_size, slot, + CU2BYTES((tablecount - i) * cb->name_entry_size)); + break; + } + + /* Continue the loop for a later or duplicate name */ + + slot += cb->name_entry_size; + } + +PUT2(slot, 0, groupno); +memcpy(slot + IMM2_SIZE, name, CU2BYTES(length)); + +/* Add a terminating zero and fill the rest of the slot with zeroes so that +the memory is all initialized. Otherwise valgrind moans about uninitialized +memory when saving serialized compiled patterns. */ + +memset(slot + IMM2_SIZE + length, 0, + CU2BYTES(cb->name_entry_size - length - IMM2_SIZE)); +} + + + +/************************************************* +* Skip in parsed pattern * +*************************************************/ + +/* This function is called to skip parts of the parsed pattern when finding the +length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find +the end of the branch, it is called to skip over an internal lookaround or +(DEFINE) group, and it is also called to skip to the end of a class, during +which it will never encounter nested groups (but there's no need to have +special code for that). + +When called to find the end of a branch or group, pptr must point to the first +meta code inside the branch, not the branch-starting code. In other cases it +can point to the item that causes the function to be called. + +Arguments: + pptr current pointer to skip from + skiptype PSKIP_CLASS when skipping to end of class + PSKIP_ALT when META_ALT ends the skip + PSKIP_KET when only META_KET ends the skip + +Returns: new value of pptr + NULL if META_END is reached - should never occur + or for an unknown meta value - likewise +*/ + +static uint32_t * +parsed_skip(uint32_t *pptr, uint32_t skiptype) +{ +uint32_t nestlevel = 0; + +for (;; pptr++) + { + uint32_t meta = META_CODE(*pptr); + + switch(meta) + { + default: /* Just skip over most items */ + if (meta < META_END) continue; /* Literal */ + break; + + /* This should never occur. */ + + case META_END: + return NULL; + + /* The data for these items is variable in length. */ + + case META_BACKREF: /* Offset is present only if group >= 10 */ + if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET; + break; + + case META_ESCAPE: /* A few escapes are followed by data items. */ + switch (META_DATA(*pptr)) + { + case ESC_P: + case ESC_p: + pptr += 1; + break; + + case ESC_g: + case ESC_k: + pptr += 1 + SIZEOFFSET; + break; + } + break; + + case META_MARK: /* Add the length of the name. */ + case META_COMMIT_ARG: + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += pptr[1]; + break; + + /* These are the "active" items in this loop. */ + + case META_CLASS_END: + if (skiptype == PSKIP_CLASS) return pptr; + break; + + case META_ATOMIC: + case META_CAPTURE: + case META_COND_ASSERT: + case META_COND_DEFINE: + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + case META_COND_VERSION: + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + case META_LOOKAHEAD_NA: + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + case META_LOOKBEHIND_NA: + case META_NOCAPTURE: + case META_SCRIPT_RUN: + nestlevel++; + break; + + case META_ALT: + if (nestlevel == 0 && skiptype == PSKIP_ALT) return pptr; + break; + + case META_KET: + if (nestlevel == 0) return pptr; + nestlevel--; + break; + } + + /* The extra data item length for each meta is in a table. */ + + meta = (meta >> 16) & 0x7fff; + if (meta >= sizeof(meta_extra_lengths)) return NULL; + pptr += meta_extra_lengths[meta]; + } +/* Control never reaches here */ +return pptr; +} + + + +/************************************************* +* Find length of a parsed group * +*************************************************/ + +/* This is called for nested groups within a branch of a lookbehind whose +length is being computed. On entry, the pointer must be at the first element +after the group initializing code. On exit it points to OP_KET. Caching is used +to improve processing speed when the same capturing group occurs many times. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + minptr where to return the minimum length + isinline FALSE if a reference or recursion; TRUE for inline group + errcodeptr pointer to the errorcode + lcptr pointer to the loop counter + group number of captured group or -1 for a non-capturing group + recurses chain of recurse_check to catch mutual recursion + cb pointer to the compile data + +Returns: the maximum group length or a negative number +*/ + +static int +get_grouplength(uint32_t **pptrptr, int *minptr, BOOL isinline, int *errcodeptr, + int *lcptr, int group, parsed_recurse_check *recurses, compile_block *cb) +{ +uint32_t *gi = cb->groupinfo + 2 * group; +int branchlength, branchminlength; +int grouplength = -1; +int groupminlength = INT_MAX; + +/* The cache can be used only if there is no possibility of there being two +groups with the same number. We do not need to set the end pointer for a group +that is being processed as a back reference or recursion, but we must do so for +an inline group. */ + +if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0) + { + uint32_t groupinfo = gi[0]; + if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1; + if ((groupinfo & GI_SET_FIXED_LENGTH) != 0) + { + if (isinline) *pptrptr = parsed_skip(*pptrptr, PSKIP_KET); + *minptr = gi[1]; + return groupinfo & GI_FIXED_LENGTH_MASK; + } + } + +/* Scan the group. In this case we find the end pointer of necessity. */ + +for(;;) + { + branchlength = get_branchlength(pptrptr, &branchminlength, errcodeptr, lcptr, + recurses, cb); + if (branchlength < 0) goto ISNOTFIXED; + if (branchlength > grouplength) grouplength = branchlength; + if (branchminlength < groupminlength) groupminlength = branchminlength; + if (**pptrptr == META_KET) break; + *pptrptr += 1; /* Skip META_ALT */ + } + +if (group > 0) + { + gi[0] |= (uint32_t)(GI_SET_FIXED_LENGTH | grouplength); + gi[1] = groupminlength; + } + +*minptr = groupminlength; +return grouplength; + +ISNOTFIXED: +if (group > 0) gi[0] |= GI_NOT_FIXED_LENGTH; +return -1; +} + + + +/************************************************* +* Find length of a parsed branch * +*************************************************/ + +/* Return fixed maximum and minimum lengths for a branch in a lookbehind, +giving an error if the length is not limited. On entry, *pptrptr points to the +first element inside the branch. On exit it is set to point to the ALT or KET. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + minptr where to return the minimum length + errcodeptr pointer to error code + lcptr pointer to loop counter + recurses chain of recurse_check to catch mutual recursion + cb pointer to compile block + +Returns: the maximum length, or a negative value on error +*/ + +static int +get_branchlength(uint32_t **pptrptr, int *minptr, int *errcodeptr, int *lcptr, + parsed_recurse_check *recurses, compile_block *cb) +{ +int branchlength = 0; +int branchminlength = 0; +int grouplength, groupminlength; +uint32_t lastitemlength = 0; +uint32_t lastitemminlength = 0; +uint32_t *pptr = *pptrptr; +PCRE2_SIZE offset; +parsed_recurse_check this_recurse; + +/* A large and/or complex regex can take too long to process. This can happen +more often when (?| groups are present in the pattern because their length +cannot be cached. */ + +if ((*lcptr)++ > 2000) + { + *errcodeptr = ERR35; /* Lookbehind is too complicated */ + return -1; + } + +/* Scan the branch, accumulating the length. */ + +for (;; pptr++) + { + parsed_recurse_check *r; + uint32_t *gptr, *gptrend; + uint32_t escape; + uint32_t group = 0; + uint32_t itemlength = 0; + uint32_t itemminlength = 0; + uint32_t min, max; + + if (*pptr < META_END) + { + itemlength = itemminlength = 1; + } + + else switch (META_CODE(*pptr)) + { + case META_KET: + case META_ALT: + goto EXIT; + + /* (*ACCEPT) and (*FAIL) terminate the branch, but we must skip to the + actual termination. */ + + case META_ACCEPT: + case META_FAIL: + pptr = parsed_skip(pptr, PSKIP_ALT); + if (pptr == NULL) goto PARSED_SKIP_FAILED; + goto EXIT; + + case META_MARK: + case META_COMMIT_ARG: + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += pptr[1] + 1; + break; + + case META_CIRCUMFLEX: + case META_COMMIT: + case META_DOLLAR: + case META_PRUNE: + case META_SKIP: + case META_THEN: + break; + + case META_OPTIONS: + pptr += 2; + break; + + case META_BIGVALUE: + itemlength = itemminlength = 1; + pptr += 1; + break; + + case META_CLASS: + case META_CLASS_NOT: + itemlength = itemminlength = 1; + pptr = parsed_skip(pptr, PSKIP_CLASS); + if (pptr == NULL) goto PARSED_SKIP_FAILED; + break; + + case META_CLASS_EMPTY_NOT: + case META_DOT: + itemlength = itemminlength = 1; + break; + + case META_CALLOUT_NUMBER: + pptr += 3; + break; + + case META_CALLOUT_STRING: + pptr += 3 + SIZEOFFSET; + break; + + /* Only some escapes consume a character. Of those, \R can match one or two + characters, but \X is never allowed because it matches an unknown number of + characters. \C is allowed only in 32-bit and non-UTF 8/16-bit modes. */ + + case META_ESCAPE: + escape = META_DATA(*pptr); + if (escape == ESC_X) return -1; + if (escape == ESC_R) + { + itemminlength = 1; + itemlength = 2; + } + else if (escape > ESC_b && escape < ESC_Z) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + if ((cb->external_options & PCRE2_UTF) != 0 && escape == ESC_C) + { + *errcodeptr = ERR36; + return -1; + } +#endif + itemlength = itemminlength = 1; + if (escape == ESC_p || escape == ESC_P) pptr++; /* Skip prop data */ + } + break; + + /* Lookaheads do not contribute to the length of this branch, but they may + contain lookbehinds within them whose lengths need to be set. */ + + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + case META_LOOKAHEAD_NA: + *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr); + if (*errcodeptr != 0) return -1; + + /* Ignore any qualifiers that follow a lookahead assertion. */ + + switch (pptr[1]) + { + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + pptr++; + break; + + case META_MINMAX: + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + pptr += 3; + break; + + default: + break; + } + break; + + /* A nested lookbehind does not contribute any length to this lookbehind, + but must itself be checked and have its lengths set. */ + + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + case META_LOOKBEHIND_NA: + if (!set_lookbehind_lengths(&pptr, errcodeptr, lcptr, recurses, cb)) + return -1; + break; + + /* Back references and recursions are handled by very similar code. At this + stage, the names generated in the parsing pass are available, but the main + name table has not yet been created. So for the named varieties, scan the + list of names in order to get the number of the first one in the pattern, + and whether or not this name is duplicated. */ + + case META_BACKREF_BYNAME: + if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0) + goto ISNOTFIXED; + /* Fall through */ + + case META_RECURSE_BYNAME: + { + int i; + PCRE2_SPTR name; + BOOL is_dupname = FALSE; + named_group *ng = cb->named_groups; + uint32_t meta_code = META_CODE(*pptr); + uint32_t length = *(++pptr); + + GETPLUSOFFSET(offset, pptr); + name = cb->start_pattern + offset; + for (i = 0; i < cb->names_found; i++, ng++) + { + if (length == ng->length && PRIV(strncmp)(name, ng->name, length) == 0) + { + group = ng->number; + is_dupname = ng->isdup; + break; + } + } + + if (group == 0) + { + *errcodeptr = ERR15; /* Non-existent subpattern */ + cb->erroroffset = offset; + return -1; + } + + /* A numerical back reference can be fixed length if duplicate capturing + groups are not being used. A non-duplicate named back reference can also + be handled. */ + + if (meta_code == META_RECURSE_BYNAME || + (!is_dupname && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)) + goto RECURSE_OR_BACKREF_LENGTH; /* Handle as a numbered version. */ + } + goto ISNOTFIXED; /* Duplicate name or number */ + + /* The offset values for back references < 10 are in a separate vector + because otherwise they would use more than two parsed pattern elements on + 64-bit systems. */ + + case META_BACKREF: + if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0 || + (cb->external_flags & PCRE2_DUPCAPUSED) != 0) + goto ISNOTFIXED; + group = META_DATA(*pptr); + if (group < 10) + { + offset = cb->small_ref_offset[group]; + goto RECURSE_OR_BACKREF_LENGTH; + } + + /* Fall through */ + /* For groups >= 10 - picking up group twice does no harm. */ + + /* A true recursion implies not fixed length, but a subroutine call may + be OK. Back reference "recursions" are also failed. */ + + case META_RECURSE: + group = META_DATA(*pptr); + GETPLUSOFFSET(offset, pptr); + + RECURSE_OR_BACKREF_LENGTH: + if (group > cb->bracount) + { + cb->erroroffset = offset; + *errcodeptr = ERR15; /* Non-existent subpattern */ + return -1; + } + if (group == 0) goto ISNOTFIXED; /* Local recursion */ + for (gptr = cb->parsed_pattern; *gptr != META_END; gptr++) + { + if (META_CODE(*gptr) == META_BIGVALUE) gptr++; + else if (*gptr == (META_CAPTURE | group)) break; + } + + /* We must start the search for the end of the group at the first meta code + inside the group. Otherwise it will be treated as an enclosed group. */ + + gptrend = parsed_skip(gptr + 1, PSKIP_KET); + if (gptrend == NULL) goto PARSED_SKIP_FAILED; + if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */ + for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break; + if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */ + this_recurse.prev = recurses; + this_recurse.groupptr = gptr; + + /* We do not need to know the position of the end of the group, that is, + gptr is not used after the call to get_grouplength(). Setting the second + argument FALSE stops it scanning for the end when the length can be found + in the cache. */ + + gptr++; + grouplength = get_grouplength(&gptr, &groupminlength, FALSE, errcodeptr, + lcptr, group, &this_recurse, cb); + if (grouplength < 0) + { + if (*errcodeptr == 0) goto ISNOTFIXED; + return -1; /* Error already set */ + } + itemlength = grouplength; + itemminlength = groupminlength; + break; + + /* A (DEFINE) group is never obeyed inline and so it does not contribute to + the length of this branch. Skip from the following item to the next + unpaired ket. */ + + case META_COND_DEFINE: + pptr = parsed_skip(pptr + 1, PSKIP_KET); + break; + + /* Check other nested groups - advance past the initial data for each type + and then seek a fixed length with get_grouplength(). */ + + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + pptr += 2 + SIZEOFFSET; + goto CHECK_GROUP; + + case META_COND_ASSERT: + pptr += 1; + goto CHECK_GROUP; + + case META_COND_VERSION: + pptr += 4; + goto CHECK_GROUP; + + case META_CAPTURE: + group = META_DATA(*pptr); + /* Fall through */ + + case META_ATOMIC: + case META_NOCAPTURE: + case META_SCRIPT_RUN: + pptr++; + CHECK_GROUP: + grouplength = get_grouplength(&pptr, &groupminlength, TRUE, errcodeptr, + lcptr, group, recurses, cb); + if (grouplength < 0) return -1; + itemlength = grouplength; + itemminlength = groupminlength; + break; + + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + min = 0; + max = 1; + goto REPETITION; + + /* Exact repetition is OK; variable repetition is not. A repetition of zero + must subtract the length that has already been added. */ + + case META_MINMAX: + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + min = pptr[1]; + max = pptr[2]; + pptr += 2; + + REPETITION: + if (max != REPEAT_UNLIMITED) + { + if (lastitemlength != 0 && /* Should not occur, but just in case */ + max != 0 && + (INT_MAX - branchlength)/lastitemlength < max - 1) + { + *errcodeptr = ERR87; /* Integer overflow; lookbehind too big */ + return -1; + } + if (min == 0) branchminlength -= lastitemminlength; + else itemminlength = (min - 1) * lastitemminlength; + if (max == 0) branchlength -= lastitemlength; + else itemlength = (max - 1) * lastitemlength; + break; + } + /* Fall through */ + + /* Any other item means this branch does not have a fixed length. */ + + default: + ISNOTFIXED: + *errcodeptr = ERR25; /* Not fixed length */ + return -1; + } + + /* Add the item length to the branchlength, checking for integer overflow and + for the branch length exceeding the overall limit. Later, if there is at + least one variable-length branch in the group, there is a test for the + (smaller) variable-length branch length limit. */ + + if (INT_MAX - branchlength < (int)itemlength || + (branchlength += itemlength) > LOOKBEHIND_MAX) + { + *errcodeptr = ERR87; + return -1; + } + + branchminlength += itemminlength; + + /* Save this item length for use if the next item is a quantifier. */ + + lastitemlength = itemlength; + lastitemminlength = itemminlength; + } + +EXIT: +*pptrptr = pptr; +*minptr = branchminlength; +return branchlength; + +PARSED_SKIP_FAILED: +*errcodeptr = ERR90; +return -1; +} + + + +/************************************************* +* Set lengths in a lookbehind * +*************************************************/ + +/* This function is called for each lookbehind, to set the lengths in its +branches. An error occurs if any branch does not have a limited maximum length +that is less than the limit (65535). On exit, the pointer must be left on the +final ket. + +The function also maintains the max_lookbehind value. Any lookbehind branch +that contains a nested lookbehind may actually look further back than the +length of the branch. The additional amount is passed back from +get_branchlength() as an "extra" value. + +Arguments: + pptrptr pointer to pointer in the parsed pattern + errcodeptr pointer to error code + lcptr pointer to loop counter + recurses chain of recurse_check to catch mutual recursion + cb pointer to compile block + +Returns: TRUE if all is well + FALSE otherwise, with error code and offset set +*/ + +static BOOL +set_lookbehind_lengths(uint32_t **pptrptr, int *errcodeptr, int *lcptr, + parsed_recurse_check *recurses, compile_block *cb) +{ +PCRE2_SIZE offset; +uint32_t *bptr = *pptrptr; +uint32_t *gbptr = bptr; +int maxlength = 0; +int minlength = INT_MAX; +BOOL variable = FALSE; + +READPLUSOFFSET(offset, bptr); /* Offset for error messages */ +*pptrptr += SIZEOFFSET; + +/* Each branch can have a different maximum length, but we can keep only a +single minimum for the whole group, because there's nowhere to save individual +values in the META_ALT item. */ + +do + { + int branchlength, branchminlength; + + *pptrptr += 1; + branchlength = get_branchlength(pptrptr, &branchminlength, errcodeptr, lcptr, + recurses, cb); + + if (branchlength < 0) + { + /* The errorcode and offset may already be set from a nested lookbehind. */ + if (*errcodeptr == 0) *errcodeptr = ERR25; + if (cb->erroroffset == PCRE2_UNSET) cb->erroroffset = offset; + return FALSE; + } + + if (branchlength != branchminlength) variable = TRUE; + if (branchminlength < minlength) minlength = branchminlength; + if (branchlength > maxlength) maxlength = branchlength; + if (branchlength > cb->max_lookbehind) cb->max_lookbehind = branchlength; + *bptr |= branchlength; /* branchlength never more than 65535 */ + bptr = *pptrptr; + } +while (META_CODE(*bptr) == META_ALT); + +/* If any branch is of variable length, the whole lookbehind is of variable +length. If the maximum length of any branch exceeds the maximum for variable +lookbehinds, give an error. Otherwise, the minimum length is set in the word +that follows the original group META value. For a fixed-length lookbehind, this +is set to LOOKBEHIND_MAX, to indicate that each branch is of a fixed (but +possibly different) length. */ + +if (variable) + { + gbptr[1] = minlength; + if ((uint32_t)maxlength > cb->max_varlookbehind) + { + *errcodeptr = ERR100; + cb->erroroffset = offset; + return FALSE; + } + } +else gbptr[1] = LOOKBEHIND_MAX; + + +gbptr[1] = variable? minlength : LOOKBEHIND_MAX; +return TRUE; +} + + + +/************************************************* +* Check parsed pattern lookbehinds * +*************************************************/ + +/* This function is called at the end of parsing a pattern if any lookbehinds +were encountered. It scans the parsed pattern for them, calling +set_lookbehind_lengths() for each one. At the start, the errorcode is zero and +the error offset is marked unset. The enables the functions above not to +override settings from deeper nestings. + +This function is called recursively from get_branchlength() for lookaheads in +order to process any lookbehinds that they may contain. It stops when it hits a +non-nested closing parenthesis in this case, returning a pointer to it. + +Arguments + pptr points to where to start (start of pattern or start of lookahead) + retptr if not NULL, return the ket pointer here + recurses chain of recurse_check to catch mutual recursion + cb points to the compile block + lcptr points to loop counter + +Returns: 0 on success, or an errorcode (cb->erroroffset will be set) +*/ + +static int +check_lookbehinds(uint32_t *pptr, uint32_t **retptr, + parsed_recurse_check *recurses, compile_block *cb, int *lcptr) +{ +int errorcode = 0; +int nestlevel = 0; + +cb->erroroffset = PCRE2_UNSET; + +for (; *pptr != META_END; pptr++) + { + if (*pptr < META_END) continue; /* Literal */ + + switch (META_CODE(*pptr)) + { + default: + return ERR70; /* Unrecognized meta code */ + + case META_ESCAPE: + if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p) + pptr += 1; + break; + + case META_KET: + if (--nestlevel < 0) + { + if (retptr != NULL) *retptr = pptr; + return 0; + } + break; + + case META_ATOMIC: + case META_CAPTURE: + case META_COND_ASSERT: + case META_LOOKAHEAD: + case META_LOOKAHEADNOT: + case META_LOOKAHEAD_NA: + case META_NOCAPTURE: + case META_SCRIPT_RUN: + nestlevel++; + break; + + case META_ACCEPT: + case META_ALT: + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + case META_BACKREF: + case META_CIRCUMFLEX: + case META_CLASS: + case META_CLASS_EMPTY: + case META_CLASS_EMPTY_NOT: + case META_CLASS_END: + case META_CLASS_NOT: + case META_COMMIT: + case META_DOLLAR: + case META_DOT: + case META_FAIL: + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + case META_PRUNE: + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + case META_RANGE_ESCAPED: + case META_RANGE_LITERAL: + case META_SKIP: + case META_THEN: + break; + + case META_RECURSE: + pptr += SIZEOFFSET; + break; + + case META_BACKREF_BYNAME: + case META_RECURSE_BYNAME: + pptr += 1 + SIZEOFFSET; + break; + + case META_COND_DEFINE: + pptr += SIZEOFFSET; + nestlevel++; + break; + + case META_COND_NAME: + case META_COND_NUMBER: + case META_COND_RNAME: + case META_COND_RNUMBER: + pptr += 1 + SIZEOFFSET; + nestlevel++; + break; + + case META_COND_VERSION: + pptr += 3; + nestlevel++; + break; + + case META_CALLOUT_STRING: + pptr += 3 + SIZEOFFSET; + break; + + case META_BIGVALUE: + case META_POSIX: + case META_POSIX_NEG: + pptr += 1; + break; + + case META_MINMAX: + case META_MINMAX_QUERY: + case META_MINMAX_PLUS: + case META_OPTIONS: + pptr += 2; + break; + + case META_CALLOUT_NUMBER: + pptr += 3; + break; + + case META_MARK: + case META_COMMIT_ARG: + case META_PRUNE_ARG: + case META_SKIP_ARG: + case META_THEN_ARG: + pptr += 1 + pptr[1]; + break; + + case META_LOOKBEHIND: + case META_LOOKBEHINDNOT: + case META_LOOKBEHIND_NA: + if (!set_lookbehind_lengths(&pptr, &errorcode, lcptr, recurses, cb)) + return errorcode; + break; + } + } + +return 0; +} + + + +/************************************************* +* External function to compile a pattern * +*************************************************/ + +/* This function reads a regular expression in the form of a string and returns +a pointer to a block of store holding a compiled version of the expression. + +Arguments: + pattern the regular expression + patlen the length of the pattern, or PCRE2_ZERO_TERMINATED + options option bits + errorptr pointer to errorcode + erroroffset pointer to error offset + ccontext points to a compile context or is NULL + +Returns: pointer to compiled data block, or NULL on error, + with errorcode and erroroffset set +*/ + +PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION +pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options, + int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext) +{ +BOOL utf; /* Set TRUE for UTF mode */ +BOOL ucp; /* Set TRUE for UCP mode */ +BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */ +BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */ +pcre2_real_code *re = NULL; /* What we will return */ +compile_block cb; /* "Static" compile-time data */ +const uint8_t *tables; /* Char tables base pointer */ + +PCRE2_UCHAR *code; /* Current pointer in compiled code */ +PCRE2_SPTR codestart; /* Start of compiled code */ +PCRE2_SPTR ptr; /* Current pointer in pattern */ +uint32_t *pptr; /* Current pointer in parsed pattern */ + +PCRE2_SIZE length = 1; /* Allow for final END opcode */ +PCRE2_SIZE usedlength; /* Actual length used */ +PCRE2_SIZE re_blocksize; /* Size of memory block */ +PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ +PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ + +uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ +uint32_t firstcu, reqcu; /* Value of first/req code unit */ +uint32_t setflags = 0; /* NL and BSR set flags */ + +uint32_t skipatstart; /* When checking (*UTF) etc */ +uint32_t limit_heap = UINT32_MAX; +uint32_t limit_match = UINT32_MAX; /* Unset match limits */ +uint32_t limit_depth = UINT32_MAX; + +int newline = 0; /* Unset; can be set by the pattern */ +int bsr = 0; /* Unset; can be set by the pattern */ +int errorcode = 0; /* Initialize to avoid compiler warn */ +int regexrc; /* Return from compile */ + +uint32_t i; /* Local loop counter */ + +/* Comments at the head of this file explain about these variables. */ + +uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE]; +uint32_t stack_parsed_pattern[PARSED_PATTERN_DEFAULT_SIZE]; +named_group named_groups[NAMED_GROUP_LIST_SIZE]; + +/* The workspace is used in different ways in the different compiling phases. +It needs to be 16-bit aligned for the preliminary parsing scan. */ + +uint32_t c16workspace[C16_WORK_SIZE]; +PCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c16workspace; + + +/* -------------- Check arguments and set up the pattern ----------------- */ + +/* There must be error code and offset pointers. */ + +if (errorptr == NULL || erroroffset == NULL) return NULL; +*errorptr = ERR0; +*erroroffset = 0; + +/* There must be a pattern, but NULL is allowed with zero length. */ + +if (pattern == NULL) + { + if (patlen == 0) pattern = (PCRE2_SPTR)""; else + { + *errorptr = ERR16; + return NULL; + } + } + +/* A NULL compile context means "use a default context" */ + +if (ccontext == NULL) + ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); + +/* PCRE2_MATCH_INVALID_UTF implies UTF */ + +if ((options & PCRE2_MATCH_INVALID_UTF) != 0) options |= PCRE2_UTF; + +/* Check that all undefined public option bits are zero. */ + +if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0 || + (ccontext->extra_options & ~PUBLIC_COMPILE_EXTRA_OPTIONS) != 0) + { + *errorptr = ERR17; + return NULL; + } + +if ((options & PCRE2_LITERAL) != 0 && + ((options & ~PUBLIC_LITERAL_COMPILE_OPTIONS) != 0 || + (ccontext->extra_options & ~PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS) != 0)) + { + *errorptr = ERR92; + return NULL; + } + +/* A zero-terminated pattern is indicated by the special length value +PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */ + +if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED))) + patlen = PRIV(strlen)(pattern); + +if (patlen > ccontext->max_pattern_length) + { + *errorptr = ERR88; + return NULL; + } + +/* From here on, all returns from this function should end up going via the +EXIT label. */ + + +/* ------------ Initialize the "static" compile data -------------- */ + +tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables); + +cb.lcc = tables + lcc_offset; /* Individual */ +cb.fcc = tables + fcc_offset; /* character */ +cb.cbits = tables + cbits_offset; /* tables */ +cb.ctypes = tables + ctypes_offset; + +cb.assert_depth = 0; +cb.bracount = 0; +cb.cx = ccontext; +cb.dupnames = FALSE; +cb.end_pattern = pattern + patlen; +cb.erroroffset = 0; +cb.external_flags = 0; +cb.external_options = options; +cb.groupinfo = stack_groupinfo; +cb.had_recurse = FALSE; +cb.lastcapture = 0; +cb.max_lookbehind = 0; /* Max encountered */ +cb.max_varlookbehind = ccontext->max_varlookbehind; /* Limit */ +cb.name_entry_size = 0; +cb.name_table = NULL; +cb.named_groups = named_groups; +cb.named_group_list_size = NAMED_GROUP_LIST_SIZE; +cb.names_found = 0; +cb.parens_depth = 0; +cb.parsed_pattern = stack_parsed_pattern; +cb.req_varyopt = 0; +cb.start_code = cworkspace; +cb.start_pattern = pattern; +cb.start_workspace = cworkspace; +cb.workspace_size = COMPILE_WORK_SIZE; + +/* Maximum back reference and backref bitmap. The bitmap records up to 31 back +references to help in deciding whether (.*) can be treated as anchored or not. +*/ + +cb.top_backref = 0; +cb.backref_map = 0; + +/* Escape sequences \1 to \9 are always back references, but as they are only +two characters long, only two elements can be used in the parsed_pattern +vector. The first contains the reference, and we'd like to use the second to +record the offset in the pattern, so that forward references to non-existent +groups can be diagnosed later with an offset. However, on 64-bit systems, +PCRE2_SIZE won't fit. Instead, we have a vector of offsets for the first +occurrence of \1 to \9, indexed by the second parsed_pattern value. All other +references have enough space for the offset to be put into the parsed pattern. +*/ + +for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET; + + +/* --------------- Start looking at the pattern --------------- */ + +/* Unless PCRE2_LITERAL is set, check for global one-time option settings at +the start of the pattern, and remember the offset to the actual regex. With +valgrind support, make the terminator of a zero-terminated pattern +inaccessible. This catches bugs that would otherwise only show up for +non-zero-terminated patterns. */ + +#ifdef SUPPORT_VALGRIND +if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1)); +#endif + +ptr = pattern; +skipatstart = 0; + +if ((options & PCRE2_LITERAL) == 0) + { + while (patlen - skipatstart >= 2 && + ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && + ptr[skipatstart+1] == CHAR_ASTERISK) + { + for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) + { + uint32_t c, pp; + const pso *p = pso_list + i; + + if (patlen - skipatstart - 2 >= p->length && + PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name), + p->length) == 0) + { + skipatstart += p->length + 2; + switch(p->type) + { + case PSO_OPT: + cb.external_options |= p->value; + break; + + case PSO_FLG: + setflags |= p->value; + break; + + case PSO_NL: + newline = p->value; + setflags |= PCRE2_NL_SET; + break; + + case PSO_BSR: + bsr = p->value; + setflags |= PCRE2_BSR_SET; + break; + + case PSO_LIMM: + case PSO_LIMD: + case PSO_LIMH: + c = 0; + pp = skipatstart; + if (!IS_DIGIT(ptr[pp])) + { + errorcode = ERR60; + ptr += pp; + goto HAD_EARLY_ERROR; + } + while (IS_DIGIT(ptr[pp])) + { + if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ + c = c*10 + (ptr[pp++] - CHAR_0); + } + if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR60; + ptr += pp; + goto HAD_EARLY_ERROR; + } + if (p->type == PSO_LIMH) limit_heap = c; + else if (p->type == PSO_LIMM) limit_match = c; + else limit_depth = c; + skipatstart += pp - skipatstart; + break; + } + break; /* Out of the table scan loop */ + } + } + if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ + } + } + +/* End of pattern-start options; advance to start of real regex. */ + +ptr += skipatstart; + +/* Can't support UTF or UCP if PCRE2 was built without Unicode support. */ + +#ifndef SUPPORT_UNICODE +if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) + { + errorcode = ERR32; + goto HAD_EARLY_ERROR; + } +#endif + +/* Check UTF. We have the original options in 'options', with that value as +modified by (*UTF) etc in cb->external_options. The extra option +PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not permitted in UTF-16 mode because the +surrogate code points cannot be represented in UTF-16. */ + +utf = (cb.external_options & PCRE2_UTF) != 0; +if (utf) + { + if ((options & PCRE2_NEVER_UTF) != 0) + { + errorcode = ERR74; + goto HAD_EARLY_ERROR; + } + if ((options & PCRE2_NO_UTF_CHECK) == 0 && + (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0) + goto HAD_ERROR; /* Offset was set by valid_utf() */ + +#if PCRE2_CODE_UNIT_WIDTH == 16 + if ((ccontext->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0) + { + errorcode = ERR91; + goto HAD_EARLY_ERROR; + } +#endif + } + +/* Check UCP lockout. */ + +ucp = (cb.external_options & PCRE2_UCP) != 0; +if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0) + { + errorcode = ERR75; + goto HAD_EARLY_ERROR; + } + +/* Process the BSR setting. */ + +if (bsr == 0) bsr = ccontext->bsr_convention; + +/* Process the newline setting. */ + +if (newline == 0) newline = ccontext->newline_convention; +cb.nltype = NLTYPE_FIXED; +switch(newline) + { + case PCRE2_NEWLINE_CR: + cb.nllen = 1; + cb.nl[0] = CHAR_CR; + break; + + case PCRE2_NEWLINE_LF: + cb.nllen = 1; + cb.nl[0] = CHAR_NL; + break; + + case PCRE2_NEWLINE_NUL: + cb.nllen = 1; + cb.nl[0] = CHAR_NUL; + break; + + case PCRE2_NEWLINE_CRLF: + cb.nllen = 2; + cb.nl[0] = CHAR_CR; + cb.nl[1] = CHAR_NL; + break; + + case PCRE2_NEWLINE_ANY: + cb.nltype = NLTYPE_ANY; + break; + + case PCRE2_NEWLINE_ANYCRLF: + cb.nltype = NLTYPE_ANYCRLF; + break; + + default: + errorcode = ERR56; + goto HAD_EARLY_ERROR; + } + +/* Pre-scan the pattern to do two things: (1) Discover the named groups and +their numerical equivalents, so that this information is always available for +the remaining processing. (2) At the same time, parse the pattern and put a +processed version into the parsed_pattern vector. This has escapes interpreted +and comments removed (amongst other things). + +In all but one case, when PCRE2_AUTO_CALLOUT is not set, the number of unsigned +32-bit ints in the parsed pattern is bounded by the length of the pattern plus +one (for the terminator) plus four if PCRE2_EXTRA_WORD or PCRE2_EXTRA_LINE is +set. The exceptional case is when running in 32-bit, non-UTF mode, when literal +characters greater than META_END (0x80000000) have to be coded as two units. In +this case, therefore, we scan the pattern to check for such values. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!utf) + { + PCRE2_SPTR p; + for (p = ptr; p < cb.end_pattern; p++) if (*p >= META_END) big32count++; + } +#endif + +/* Ensure that the parsed pattern buffer is big enough. When PCRE2_AUTO_CALLOUT +is set we have to assume a numerical callout (4 elements) for each character +plus one at the end. This is overkill, but memory is plentiful these days. For +many smaller patterns the vector on the stack (which was set up above) can be +used. */ + +parsed_size_needed = patlen - skipatstart + big32count; + +if ((ccontext->extra_options & + (PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_MATCH_LINE)) != 0) + parsed_size_needed += 4; + +if ((options & PCRE2_AUTO_CALLOUT) != 0) + parsed_size_needed = (parsed_size_needed + 1) * 5; + +if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE) + { + uint32_t *heap_parsed_pattern = ccontext->memctl.malloc( + (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data); + if (heap_parsed_pattern == NULL) + { + *errorptr = ERR21; + goto EXIT; + } + cb.parsed_pattern = heap_parsed_pattern; + } +cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1; + +/* Do the parsing scan. */ + +errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb); +if (errorcode != 0) goto HAD_CB_ERROR; + +/* If there are any lookbehinds, scan the parsed pattern to figure out their +lengths. Workspace is needed to remember whether numbered groups are or are not +of limited length, and if limited, what the minimum and maximum lengths are. +This caching saves re-computing the length of any group that is referenced more +than once, which is particularly relevant when recursion is involved. +Unnumbered groups do not have this exposure because they cannot be referenced. +If there are sufficiently few groups, the default index vector on the stack, as +set up above, can be used. Otherwise we have to get/free some heap memory. The +vector must be initialized to zero. */ + +if (has_lookbehind) + { + int loopcount = 0; + if (cb.bracount >= GROUPINFO_DEFAULT_SIZE/2) + { + cb.groupinfo = ccontext->memctl.malloc( + (2 * (cb.bracount + 1))*sizeof(uint32_t), ccontext->memctl.memory_data); + if (cb.groupinfo == NULL) + { + errorcode = ERR21; + cb.erroroffset = 0; + goto HAD_CB_ERROR; + } + } + memset(cb.groupinfo, 0, (2 * cb.bracount + 1) * sizeof(uint32_t)); + errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb, &loopcount); + if (errorcode != 0) goto HAD_CB_ERROR; + } + +/* For debugging, there is a function that shows the parsed pattern vector. */ + +#ifdef DEBUG_SHOW_PARSED +fprintf(stderr, "+++ Pre-scan complete:\n"); +show_parsed(&cb); +#endif + +/* For debugging capturing information this code can be enabled. */ + +#ifdef DEBUG_SHOW_CAPTURES + { + named_group *ng = cb.named_groups; + fprintf(stderr, "+++Captures: %d\n", cb.bracount); + for (i = 0; i < cb.names_found; i++, ng++) + { + fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name); + } + } +#endif + +/* Pretend to compile the pattern while actually just accumulating the amount +of memory required in the 'length' variable. This behaviour is triggered by +passing a non-NULL final argument to compile_regex(). We pass a block of +workspace (cworkspace) for it to compile parts of the pattern into; the +compiled code is discarded when it is no longer needed, so hopefully this +workspace will never overflow, though there is a test for its doing so. + +On error, errorcode will be set non-zero, so we don't need to look at the +result of the function. The initial options have been put into the cb block, +but we still have to pass a separate options variable (the first argument) +because the options may change as the pattern is processed. */ + +cb.erroroffset = patlen; /* For any subsequent errors that do not set it */ +pptr = cb.parsed_pattern; +code = cworkspace; +*code = OP_BRA; + +(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr, + &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, NULL, + &cb, &length); + +if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */ + +/* This should be caught in compile_regex(), but just in case... */ + +if (length > MAX_PATTERN_SIZE) + { + errorcode = ERR20; + goto HAD_CB_ERROR; + } + +/* Compute the size of, then, if not too large, get and initialize the data +block for storing the compiled pattern and names table. Integer overflow should +no longer be possible because nowadays we limit the maximum value of +cb.names_found and cb.name_entry_size. */ + +re_blocksize = sizeof(pcre2_real_code) + + CU2BYTES(length + + (PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size); + +if (re_blocksize > ccontext->max_pattern_compiled_length) + { + errorcode = ERR101; + goto HAD_CB_ERROR; + } + +re = (pcre2_real_code *) + ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data); +if (re == NULL) + { + errorcode = ERR21; + goto HAD_CB_ERROR; + } + +/* The compiler may put padding at the end of the pcre2_real_code structure in +order to round it up to a multiple of 4 or 8 bytes. This means that when a +compiled pattern is copied (for example, when serialized) undefined bytes are +read, and this annoys debuggers such as valgrind. To avoid this, we explicitly +write to the last 8 bytes of the structure before setting the fields. */ + +memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8); +re->memctl = ccontext->memctl; +re->tables = tables; +re->executable_jit = NULL; +memset(re->start_bitmap, 0, 32 * sizeof(uint8_t)); +re->blocksize = re_blocksize; +re->magic_number = MAGIC_NUMBER; +re->compile_options = options; +re->overall_options = cb.external_options; +re->extra_options = ccontext->extra_options; +re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; +re->limit_heap = limit_heap; +re->limit_match = limit_match; +re->limit_depth = limit_depth; +re->first_codeunit = 0; +re->last_codeunit = 0; +re->bsr_convention = bsr; +re->newline_convention = newline; +re->max_lookbehind = 0; +re->minlength = 0; +re->top_bracket = 0; +re->top_backref = 0; +re->name_entry_size = cb.name_entry_size; +re->name_count = cb.names_found; + +/* The basic block is immediately followed by the name table, and the compiled +code follows after that. */ + +codestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + + re->name_entry_size * re->name_count; + +/* Update the compile data block for the actual compile. The starting points of +the name/number translation table and of the code are passed around in the +compile data block. The start/end pattern and initial options are already set +from the pre-compile phase, as is the name_entry_size field. */ + +cb.parens_depth = 0; +cb.assert_depth = 0; +cb.lastcapture = 0; +cb.name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); +cb.start_code = codestart; +cb.req_varyopt = 0; +cb.had_accept = FALSE; +cb.had_pruneorskip = FALSE; + +/* If any named groups were found, create the name/number table from the list +created in the pre-pass. */ + +if (cb.names_found > 0) + { + named_group *ng = cb.named_groups; + for (i = 0; i < cb.names_found; i++, ng++) + add_name_to_table(&cb, ng->name, ng->length, ng->number, i); + } + +/* Set up a starting, non-extracting bracket, then compile the expression. On +error, errorcode will be set non-zero, so we don't need to look at the result +of the function here. */ + +pptr = cb.parsed_pattern; +code = (PCRE2_UCHAR *)codestart; +*code = OP_BRA; +regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code, + &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, + NULL, &cb, NULL); +if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY; +re->top_bracket = cb.bracount; +re->top_backref = cb.top_backref; +re->max_lookbehind = cb.max_lookbehind; + +if (cb.had_accept) + { + reqcu = 0; /* Must disable after (*ACCEPT) */ + reqcuflags = REQ_NONE; + re->flags |= PCRE2_HASACCEPT; /* Disables minimum length */ + } + +/* Fill in the final opcode and check for disastrous overflow. If no overflow, +but the estimated length exceeds the really used length, adjust the value of +re->blocksize, and if valgrind support is configured, mark the extra allocated +memory as unaddressable, so that any out-of-bound reads can be detected. */ + +*code++ = OP_END; +usedlength = code - codestart; +if (usedlength > length) errorcode = ERR23; else + { + re->blocksize -= CU2BYTES(length - usedlength); +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength)); +#endif + } + +/* Scan the pattern for recursion/subroutine calls and convert the group +numbers into offsets. Maintain a small cache so that repeated groups containing +recursions are efficiently handled. */ + +#define RSCAN_CACHE_SIZE 8 + +if (errorcode == 0 && cb.had_recurse) + { + PCRE2_UCHAR *rcode; + PCRE2_SPTR rgroup; + unsigned int ccount = 0; + int start = RSCAN_CACHE_SIZE; + recurse_cache rc[RSCAN_CACHE_SIZE]; + + for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf); + rcode != NULL; + rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf)) + { + int p, groupnumber; + + groupnumber = (int)GET(rcode, 1); + if (groupnumber == 0) rgroup = codestart; else + { + PCRE2_SPTR search_from = codestart; + rgroup = NULL; + for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7) + { + if (groupnumber == rc[p].groupnumber) + { + rgroup = rc[p].group; + break; + } + + /* Group n+1 must always start to the right of group n, so we can save + search time below when the new group number is greater than any of the + previously found groups. */ + + if (groupnumber > rc[p].groupnumber) search_from = rc[p].group; + } + + if (rgroup == NULL) + { + rgroup = PRIV(find_bracket)(search_from, utf, groupnumber); + if (rgroup == NULL) + { + errorcode = ERR53; + break; + } + if (--start < 0) start = RSCAN_CACHE_SIZE - 1; + rc[start].groupnumber = groupnumber; + rc[start].group = rgroup; + if (ccount < RSCAN_CACHE_SIZE) ccount++; + } + } + + PUT(rcode, 1, rgroup - codestart); + } + } + +/* In rare debugging situations we sometimes need to look at the compiled code +at this stage. */ + +#ifdef DEBUG_CALL_PRINTINT +pcre2_printint(re, stderr, TRUE); +fprintf(stderr, "Length=%lu Used=%lu\n", length, usedlength); +#endif + +/* Unless disabled, check whether any single character iterators can be +auto-possessified. The function overwrites the appropriate opcode values, so +the type of the pointer must be cast. NOTE: the intermediate variable "temp" is +used in this code because at least one compiler gives a warning about loss of +"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the +function call. */ + +if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0) + { + PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; + if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80; + } + +/* Failed to compile, or error while post-processing. */ + +if (errorcode != 0) goto HAD_CB_ERROR; + +/* Successful compile. If the anchored option was not passed, set it if +we can determine that the pattern is anchored by virtue of ^ characters or \A +or anything else, such as starting with non-atomic .* when DOTALL is set and +there are no occurrences of *PRUNE or *SKIP (though there is an option to +disable this case). */ + +if ((re->overall_options & PCRE2_ANCHORED) == 0 && + is_anchored(codestart, 0, &cb, 0, FALSE)) + re->overall_options |= PCRE2_ANCHORED; + +/* Set up the first code unit or startline flag, the required code unit, and +then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE +is set, as the data it would create will not be used. Note that a first code +unit (but not the startline flag) is useful for anchored patterns because it +can still give a quick "no match" and also avoid searching for a last code +unit. */ + +if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + int minminlength = 0; /* For minimal minlength from first/required CU */ + + /* If we do not have a first code unit, see if there is one that is asserted + (these are not saved during the compile because they can cause conflicts with + actual literals that follow). */ + + if (firstcuflags >= REQ_NONE) + firstcu = find_firstassertedcu(codestart, &firstcuflags, 0); + + /* Save the data for a first code unit. The existence of one means the + minimum length must be at least 1. */ + + if (firstcuflags < REQ_NONE) + { + re->first_codeunit = firstcu; + re->flags |= PCRE2_FIRSTSET; + minminlength++; + + /* Handle caseless first code units. */ + + if ((firstcuflags & REQ_CASELESS) != 0) + { + if (firstcu < 128 || (!utf && !ucp && firstcu < 255)) + { + if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS; + } + + /* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise. + In 8-bit UTF mode, codepoints in the range 128-255 are introductory code + points and cannot have another case, but if UCP is set they may do. */ + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu) + re->flags |= PCRE2_FIRSTCASELESS; +#else + else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT && + UCD_OTHERCASE(firstcu) != firstcu) + re->flags |= PCRE2_FIRSTCASELESS; +#endif +#endif /* SUPPORT_UNICODE */ + } + } + + /* When there is no first code unit, for non-anchored patterns, see if we can + set the PCRE2_STARTLINE flag. This is helpful for multiline matches when all + branches start with ^ and also when all branches start with non-atomic .* for + non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option + that disables this case.) */ + + else if ((re->overall_options & PCRE2_ANCHORED) == 0 && + is_startline(codestart, 0, &cb, 0, FALSE)) + re->flags |= PCRE2_STARTLINE; + + /* Handle the "required code unit", if one is set. In the UTF case we can + increment the minimum minimum length only if we are sure this really is a + different character and not a non-starting code unit of the first character, + because the minimum length count is in characters, not code units. */ + + if (reqcuflags < REQ_NONE) + { +#if PCRE2_CODE_UNIT_WIDTH == 16 + if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ + firstcuflags >= REQ_NONE || /* First not set */ + (firstcu & 0xf800) != 0xd800 || /* First not surrogate */ + (reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */ +#elif PCRE2_CODE_UNIT_WIDTH == 8 + if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ + firstcuflags >= REQ_NONE || /* First not set */ + (firstcu & 0x80) == 0 || /* First is ASCII */ + (reqcu & 0x80) == 0) /* Req is ASCII */ +#endif + { + minminlength++; + } + + /* In the case of an anchored pattern, set up the value only if it follows + a variable length item in the pattern. */ + + if ((re->overall_options & PCRE2_ANCHORED) == 0 || + (reqcuflags & REQ_VARY) != 0) + { + re->last_codeunit = reqcu; + re->flags |= PCRE2_LASTSET; + + /* Handle caseless required code units as for first code units (above). */ + + if ((reqcuflags & REQ_CASELESS) != 0) + { + if (reqcu < 128 || (!utf && !ucp && reqcu < 255)) + { + if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS; + } +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu) + re->flags |= PCRE2_LASTCASELESS; +#else + else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT && + UCD_OTHERCASE(reqcu) != reqcu) + re->flags |= PCRE2_LASTCASELESS; +#endif +#endif /* SUPPORT_UNICODE */ + } + } + } + + /* Study the compiled pattern to set up information such as a bitmap of + starting code units and a minimum matching length. */ + + if (PRIV(study)(re) != 0) + { + errorcode = ERR31; + goto HAD_CB_ERROR; + } + + /* If study() set a bitmap of starting code units, it implies a minimum + length of at least one. */ + + if ((re->flags & PCRE2_FIRSTMAPSET) != 0 && minminlength == 0) + minminlength = 1; + + /* If the minimum length set (or not set) by study() is less than the minimum + implied by required code units, override it. */ + + if (re->minlength < minminlength) re->minlength = minminlength; + } /* End of start-of-match optimizations. */ + +/* Control ends up here in all cases. When running under valgrind, make a +pattern's terminating zero defined again. If memory was obtained for the parsed +version of the pattern, free it before returning. Also free the list of named +groups if a larger one had to be obtained, and likewise the group information +vector. */ + +EXIT: +#ifdef SUPPORT_VALGRIND +if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1)); +#endif +if (cb.parsed_pattern != stack_parsed_pattern) + ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data); +if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) + ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data); +if (cb.groupinfo != stack_groupinfo) + ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data); +return re; /* Will be NULL after an error */ + +/* Errors discovered in parse_regex() set the offset value in the compile +block. Errors discovered before it is called must compute it from the ptr +value. After parse_regex() is called, the offset in the compile block is set to +the end of the pattern, but certain errors in compile_regex() may reset it if +an offset is available in the parsed pattern. */ + +HAD_CB_ERROR: +ptr = pattern + cb.erroroffset; + +HAD_EARLY_ERROR: +*erroroffset = ptr - pattern; + +HAD_ERROR: +*errorptr = errorcode; +pcre2_code_free(re); +re = NULL; +goto EXIT; +} + +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + +/* End of pcre2_compile.c */ diff --git a/src/pcre2_config.c b/src/pcre2_config.c new file mode 100644 index 0000000..5ef103c --- /dev/null +++ b/src/pcre2_config.c @@ -0,0 +1,252 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes +its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to +be in code units. */ + +static int configured_link_size = LINK_SIZE; + +#include "pcre2_internal.h" + +/* These macros are the standard way of turning unquoted text into C strings. +They allow macros like PCRE2_MAJOR to be defined without quotes, which is +convenient for user programs that want to test their values. */ + +#define STRING(a) # a +#define XSTRING(s) STRING(s) + + +/************************************************* +* Return info about what features are configured * +*************************************************/ + +/* If where is NULL, the length of memory required is returned. + +Arguments: + what what information is required + where where to put the information + +Returns: 0 if a numerical value is returned + >= 0 if a string value + PCRE2_ERROR_BADOPTION if "where" not recognized + or JIT target requested when JIT not enabled +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_config(uint32_t what, void *where) +{ +if (where == NULL) /* Requests a length */ + { + switch(what) + { + default: + return PCRE2_ERROR_BADOPTION; + + case PCRE2_CONFIG_BSR: + case PCRE2_CONFIG_COMPILED_WIDTHS: + case PCRE2_CONFIG_DEPTHLIMIT: + case PCRE2_CONFIG_HEAPLIMIT: + case PCRE2_CONFIG_JIT: + case PCRE2_CONFIG_LINKSIZE: + case PCRE2_CONFIG_MATCHLIMIT: + case PCRE2_CONFIG_NEVER_BACKSLASH_C: + case PCRE2_CONFIG_NEWLINE: + case PCRE2_CONFIG_PARENSLIMIT: + case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */ + case PCRE2_CONFIG_TABLES_LENGTH: + case PCRE2_CONFIG_UNICODE: + return sizeof(uint32_t); + + /* These are handled below */ + + case PCRE2_CONFIG_JITTARGET: + case PCRE2_CONFIG_UNICODE_VERSION: + case PCRE2_CONFIG_VERSION: + break; + } + } + +switch (what) + { + default: + return PCRE2_ERROR_BADOPTION; + + case PCRE2_CONFIG_BSR: +#ifdef BSR_ANYCRLF + *((uint32_t *)where) = PCRE2_BSR_ANYCRLF; +#else + *((uint32_t *)where) = PCRE2_BSR_UNICODE; +#endif + break; + + case PCRE2_CONFIG_COMPILED_WIDTHS: + *((uint32_t *)where) = 0 +#ifdef SUPPORT_PCRE2_8 + + 1 +#endif +#ifdef SUPPORT_PCRE2_16 + + 2 +#endif +#ifdef SUPPORT_PCRE2_32 + + 4 +#endif + ; + break; + + case PCRE2_CONFIG_DEPTHLIMIT: + *((uint32_t *)where) = MATCH_LIMIT_DEPTH; + break; + + case PCRE2_CONFIG_HEAPLIMIT: + *((uint32_t *)where) = HEAP_LIMIT; + break; + + case PCRE2_CONFIG_JIT: +#ifdef SUPPORT_JIT + *((uint32_t *)where) = 1; +#else + *((uint32_t *)where) = 0; +#endif + break; + + case PCRE2_CONFIG_JITTARGET: +#ifdef SUPPORT_JIT + { + const char *v = PRIV(jit_get_target)(); + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); + } +#else + return PCRE2_ERROR_BADOPTION; +#endif + + case PCRE2_CONFIG_LINKSIZE: + *((uint32_t *)where) = (uint32_t)configured_link_size; + break; + + case PCRE2_CONFIG_MATCHLIMIT: + *((uint32_t *)where) = MATCH_LIMIT; + break; + + case PCRE2_CONFIG_NEWLINE: + *((uint32_t *)where) = NEWLINE_DEFAULT; + break; + + case PCRE2_CONFIG_NEVER_BACKSLASH_C: +#ifdef NEVER_BACKSLASH_C + *((uint32_t *)where) = 1; +#else + *((uint32_t *)where) = 0; +#endif + break; + + case PCRE2_CONFIG_PARENSLIMIT: + *((uint32_t *)where) = PARENS_NEST_LIMIT; + break; + + /* This is now obsolete. The stack is no longer used via recursion for + handling backtracking in pcre2_match(). */ + + case PCRE2_CONFIG_STACKRECURSE: + *((uint32_t *)where) = 0; + break; + + case PCRE2_CONFIG_TABLES_LENGTH: + *((uint32_t *)where) = TABLES_LENGTH; + break; + + case PCRE2_CONFIG_UNICODE_VERSION: + { +#if defined SUPPORT_UNICODE + const char *v = PRIV(unicode_version); +#else + const char *v = "Unicode not supported"; +#endif + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); + } + break; + + case PCRE2_CONFIG_UNICODE: +#if defined SUPPORT_UNICODE + *((uint32_t *)where) = 1; +#else + *((uint32_t *)where) = 0; +#endif + break; + + /* The hackery in setting "v" below is to cope with the case when + PCRE2_PRERELEASE is set to an empty string (which it is for real releases). + If the second alternative is used in this case, it does not leave a space + before the date. On the other hand, if all four macros are put into a single + XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. + There are problems using an "obvious" approach like this: + + XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) + XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) + + because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion + of STRING(). The C standard states: "If (before argument substitution) any + argument consists of no preprocessing tokens, the behavior is undefined." It + turns out the gcc treats this case as a single empty string - which is what + we really want - but Visual C grumbles about the lack of an argument for the + macro. Unfortunately, both are within their rights. As there seems to be no + way to test for a macro's value being empty at compile time, we have to + resort to a runtime test. */ + + case PCRE2_CONFIG_VERSION: + { + const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)? + XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : + XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); + return (int)(1 + ((where == NULL)? + strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); + } + } + +return 0; +} + +/* End of pcre2_config.c */ diff --git a/src/pcre2_context.c b/src/pcre2_context.c new file mode 100644 index 0000000..9edbd1b --- /dev/null +++ b/src/pcre2_context.c @@ -0,0 +1,510 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + + +/************************************************* +* Default malloc/free functions * +*************************************************/ + +/* Ignore the "user data" argument in each case. */ + +static void *default_malloc(size_t size, void *data) +{ +(void)data; +return malloc(size); +} + + +static void default_free(void *block, void *data) +{ +(void)data; +free(block); +} + + + +/************************************************* +* Get a block and save memory control * +*************************************************/ + +/* This internal function is called to get a block of memory in which the +memory control data is to be stored at the start for future use. + +Arguments: + size amount of memory required + memctl pointer to a memctl block or NULL + +Returns: pointer to memory or NULL on failure +*/ + +extern void * +PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl) +{ +pcre2_memctl *newmemctl; +void *yield = (memctl == NULL)? malloc(size) : + memctl->malloc(size, memctl->memory_data); +if (yield == NULL) return NULL; +newmemctl = (pcre2_memctl *)yield; +if (memctl == NULL) + { + newmemctl->malloc = default_malloc; + newmemctl->free = default_free; + newmemctl->memory_data = NULL; + } +else *newmemctl = *memctl; +return yield; +} + + + +/************************************************* +* Create and initialize contexts * +*************************************************/ + +/* Initializing for compile and match contexts is done in separate, private +functions so that these can be called from functions such as pcre2_compile() +when an external context is not supplied. The initializing functions have an +option to set up default memory management. */ + +PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION +pcre2_general_context_create(void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data) +{ +pcre2_general_context *gcontext; +if (private_malloc == NULL) private_malloc = default_malloc; +if (private_free == NULL) private_free = default_free; +gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data); +if (gcontext == NULL) return NULL; +gcontext->memctl.malloc = private_malloc; +gcontext->memctl.free = private_free; +gcontext->memctl.memory_data = memory_data; +return gcontext; +} + + +/* A default compile context is set up to save having to initialize at run time +when no context is supplied to the compile function. */ + +const pcre2_compile_context PRIV(default_compile_context) = { + { default_malloc, default_free, NULL }, /* Default memory handling */ + NULL, /* Stack guard */ + NULL, /* Stack guard data */ + PRIV(default_tables), /* Character tables */ + PCRE2_UNSET, /* Max pattern length */ + PCRE2_UNSET, /* Max pattern compiled length */ + BSR_DEFAULT, /* Backslash R default */ + NEWLINE_DEFAULT, /* Newline convention */ + PARENS_NEST_LIMIT, /* As it says */ + 0, /* Extra options */ + MAX_VARLOOKBEHIND /* As it says */ + }; + +/* The create function copies the default into the new memory, but must +override the default memory handling functions if a gcontext was provided. */ + +PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION +pcre2_compile_context_create(pcre2_general_context *gcontext) +{ +pcre2_compile_context *ccontext = PRIV(memctl_malloc)( + sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext); +if (ccontext == NULL) return NULL; +*ccontext = PRIV(default_compile_context); +if (gcontext != NULL) + *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext); +return ccontext; +} + + +/* A default match context is set up to save having to initialize at run time +when no context is supplied to a match function. */ + +const pcre2_match_context PRIV(default_match_context) = { + { default_malloc, default_free, NULL }, +#ifdef SUPPORT_JIT + NULL, /* JIT callback */ + NULL, /* JIT callback data */ +#endif + NULL, /* Callout function */ + NULL, /* Callout data */ + NULL, /* Substitute callout function */ + NULL, /* Substitute callout data */ + PCRE2_UNSET, /* Offset limit */ + HEAP_LIMIT, + MATCH_LIMIT, + MATCH_LIMIT_DEPTH }; + +/* The create function copies the default into the new memory, but must +override the default memory handling functions if a gcontext was provided. */ + +PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION +pcre2_match_context_create(pcre2_general_context *gcontext) +{ +pcre2_match_context *mcontext = PRIV(memctl_malloc)( + sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext); +if (mcontext == NULL) return NULL; +*mcontext = PRIV(default_match_context); +if (gcontext != NULL) + *((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext); +return mcontext; +} + + +/* A default convert context is set up to save having to initialize at run time +when no context is supplied to the convert function. */ + +const pcre2_convert_context PRIV(default_convert_context) = { + { default_malloc, default_free, NULL }, /* Default memory handling */ +#ifdef _WIN32 + CHAR_BACKSLASH, /* Default path separator */ + CHAR_GRAVE_ACCENT /* Default escape character */ +#else /* Not Windows */ + CHAR_SLASH, /* Default path separator */ + CHAR_BACKSLASH /* Default escape character */ +#endif + }; + +/* The create function copies the default into the new memory, but must +override the default memory handling functions if a gcontext was provided. */ + +PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION +pcre2_convert_context_create(pcre2_general_context *gcontext) +{ +pcre2_convert_context *ccontext = PRIV(memctl_malloc)( + sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext); +if (ccontext == NULL) return NULL; +*ccontext = PRIV(default_convert_context); +if (gcontext != NULL) + *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext); +return ccontext; +} + + +/************************************************* +* Context copy functions * +*************************************************/ + +PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION +pcre2_general_context_copy(pcre2_general_context *gcontext) +{ +pcre2_general_context *newcontext = + gcontext->memctl.malloc(sizeof(pcre2_real_general_context), + gcontext->memctl.memory_data); +if (newcontext == NULL) return NULL; +memcpy(newcontext, gcontext, sizeof(pcre2_real_general_context)); +return newcontext; +} + + +PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION +pcre2_compile_context_copy(pcre2_compile_context *ccontext) +{ +pcre2_compile_context *newcontext = + ccontext->memctl.malloc(sizeof(pcre2_real_compile_context), + ccontext->memctl.memory_data); +if (newcontext == NULL) return NULL; +memcpy(newcontext, ccontext, sizeof(pcre2_real_compile_context)); +return newcontext; +} + + +PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION +pcre2_match_context_copy(pcre2_match_context *mcontext) +{ +pcre2_match_context *newcontext = + mcontext->memctl.malloc(sizeof(pcre2_real_match_context), + mcontext->memctl.memory_data); +if (newcontext == NULL) return NULL; +memcpy(newcontext, mcontext, sizeof(pcre2_real_match_context)); +return newcontext; +} + + +PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION +pcre2_convert_context_copy(pcre2_convert_context *ccontext) +{ +pcre2_convert_context *newcontext = + ccontext->memctl.malloc(sizeof(pcre2_real_convert_context), + ccontext->memctl.memory_data); +if (newcontext == NULL) return NULL; +memcpy(newcontext, ccontext, sizeof(pcre2_real_convert_context)); +return newcontext; +} + + +/************************************************* +* Context free functions * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_general_context_free(pcre2_general_context *gcontext) +{ +if (gcontext != NULL) + gcontext->memctl.free(gcontext, gcontext->memctl.memory_data); +} + + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_compile_context_free(pcre2_compile_context *ccontext) +{ +if (ccontext != NULL) + ccontext->memctl.free(ccontext, ccontext->memctl.memory_data); +} + + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_match_context_free(pcre2_match_context *mcontext) +{ +if (mcontext != NULL) + mcontext->memctl.free(mcontext, mcontext->memctl.memory_data); +} + + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_convert_context_free(pcre2_convert_context *ccontext) +{ +if (ccontext != NULL) + ccontext->memctl.free(ccontext, ccontext->memctl.memory_data); +} + + +/************************************************* +* Set values in contexts * +*************************************************/ + +/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid +data is given. Only some of the functions are able to test the validity of the +data. */ + + +/* ------------ Compile context ------------ */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables) +{ +ccontext->tables = tables; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value) +{ +switch(value) + { + case PCRE2_BSR_ANYCRLF: + case PCRE2_BSR_UNICODE: + ccontext->bsr_convention = value; + return 0; + + default: + return PCRE2_ERROR_BADDATA; + } +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length) +{ +ccontext->max_pattern_length = length; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length) +{ +ccontext->max_pattern_compiled_length = length; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) +{ +switch(newline) + { + case PCRE2_NEWLINE_CR: + case PCRE2_NEWLINE_LF: + case PCRE2_NEWLINE_CRLF: + case PCRE2_NEWLINE_ANY: + case PCRE2_NEWLINE_ANYCRLF: + case PCRE2_NEWLINE_NUL: + ccontext->newline_convention = newline; + return 0; + + default: + return PCRE2_ERROR_BADDATA; + } +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, uint32_t limit) +{ +ccontext->max_varlookbehind = limit; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit) +{ +ccontext->parens_nest_limit = limit; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options) +{ +ccontext->extra_options = options; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard)(uint32_t, void *), void *user_data) +{ +ccontext->stack_guard = guard; +ccontext->stack_guard_data = user_data; +return 0; +} + + +/* ------------ Match context ------------ */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout)(pcre2_callout_block *, void *), void *callout_data) +{ +mcontext->callout = callout; +mcontext->callout_data = callout_data; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*substitute_callout)(pcre2_substitute_callout_block *, void *), + void *substitute_callout_data) +{ +mcontext->substitute_callout = substitute_callout; +mcontext->substitute_callout_data = substitute_callout_data; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit) +{ +mcontext->heap_limit = limit; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit) +{ +mcontext->match_limit = limit; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit) +{ +mcontext->depth_limit = limit; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit) +{ +mcontext->offset_limit = limit; +return 0; +} + +/* These functions became obsolete at release 10.30. The first is kept as a +synonym for backwards compatibility. The second now does nothing. Exclude both +from coverage reports. */ + +/* LCOV_EXCL_START */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) +{ +return pcre2_set_depth_limit(mcontext, limit); +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, + void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *), + void *mydata) +{ +(void)mcontext; +(void)mymalloc; +(void)myfree; +(void)mydata; +return 0; +} + +/* LCOV_EXCL_STOP */ + + +/* ------------ Convert context ------------ */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator) +{ +if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH && + separator != CHAR_DOT) return PCRE2_ERROR_BADDATA; +ccontext->glob_separator = separator; +return 0; +} + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape) +{ +if (escape > 255 || (escape != 0 && !ispunct(escape))) + return PCRE2_ERROR_BADDATA; +ccontext->glob_escape = escape; +return 0; +} + +/* End of pcre2_context.c */ + diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c new file mode 100644 index 0000000..fe396ae --- /dev/null +++ b/src/pcre2_convert.c @@ -0,0 +1,1189 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \ + PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED) + +#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \ + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ + PCRE2_CONVERT_GLOB_NO_STARSTAR| \ + TYPE_OPTIONS) + +#define DUMMY_BUFFER_SIZE 100 + +/* Generated pattern fragments */ + +#define STR_BACKSLASH_A STR_BACKSLASH STR_A +#define STR_BACKSLASH_z STR_BACKSLASH STR_z +#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET +#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN +#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS +#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS +#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS + +/* States for POSIX processing */ + +enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, + POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; + +/* Macro to add a character string to the output buffer, checking for overflow. */ + +#define PUTCHARS(string) \ + { \ + for (s = (char *)(string); *s != 0; s++) \ + { \ + if (p >= endp) return PCRE2_ERROR_NOMEMORY; \ + *p++ = *s; \ + } \ + } + +/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */ + +static const char *pcre2_escaped_literals = + STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS + STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN + STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET + STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET + STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; + +/* Recognized escaped metacharacters in POSIX basic patterns. */ + +static const char *posix_meta_escapes = + STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS + STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET + STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; + + + +/************************************************* +* Convert a POSIX pattern * +*************************************************/ + +/* This function handles both basic and extended POSIX patterns. + +Arguments: + pattype the pattern type + pattern the pattern + plength length in code units + utf TRUE if UTF + use_buffer where to put the output + use_length length of use_buffer + bufflenptr where to put the used length + dummyrun TRUE if a dummy run + ccontext the convert context + +Returns: 0 => success + !0 => error code +*/ + +static int +convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, + BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, + PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) +{ +char *s; +PCRE2_SPTR posix = pattern; +PCRE2_UCHAR *p = use_buffer; +PCRE2_UCHAR *pp = p; +PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */ +PCRE2_SIZE convlength = 0; + +uint32_t bracount = 0; +uint32_t posix_state = POSIX_START_REGEX; +uint32_t lastspecial = 0; +BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0; +BOOL nextisliteral = FALSE; + +(void)utf; /* Not used when Unicode not supported */ +(void)ccontext; /* Not currently used */ + +/* Initialize default for error offset as end of input. */ + +*bufflenptr = plength; +PUTCHARS(STR_STAR_NUL); + +/* Now scan the input. */ + +while (plength > 0) + { + uint32_t c, sc; + int clength = 1; + + /* Add in the length of the last item, then, if in the dummy run, pull the + pointer back to the start of the (temporary) buffer and then remember the + start of the next item. */ + + convlength += p - pp; + if (dummyrun) p = use_buffer; + pp = p; + + /* Pick up the next character */ + +#ifndef SUPPORT_UNICODE + c = *posix; +#else + GETCHARLENTEST(c, posix, clength); +#endif + posix += clength; + plength -= clength; + + sc = nextisliteral? 0 : c; + nextisliteral = FALSE; + + /* Handle a character within a class. */ + + if (posix_state >= POSIX_CLASS_NOT_STARTED) + { + if (c == CHAR_RIGHT_SQUARE_BRACKET) + { + PUTCHARS(STR_RIGHT_SQUARE_BRACKET); + posix_state = POSIX_NOT_BRACKET; + } + + /* Not the end of the class */ + + else + { + switch (posix_state) + { + case POSIX_CLASS_STARTED: + if (c <= 127 && islower(c)) break; /* Remain in started state */ + posix_state = POSIX_CLASS_NOT_STARTED; + if (c == CHAR_COLON && plength > 0 && + *posix == CHAR_RIGHT_SQUARE_BRACKET) + { + PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET); + plength--; + posix++; + continue; /* With next character after :] */ + } + /* Fall through */ + + case POSIX_CLASS_NOT_STARTED: + if (c == CHAR_LEFT_SQUARE_BRACKET) + posix_state = POSIX_CLASS_STARTING; + break; + + case POSIX_CLASS_STARTING: + if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED; + break; + } + + if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH); + if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; + memcpy(p, posix - clength, CU2BYTES(clength)); + p += clength; + } + } + + /* Handle a character not within a class. */ + + else switch(sc) + { + case CHAR_LEFT_SQUARE_BRACKET: + PUTCHARS(STR_LEFT_SQUARE_BRACKET); + +#ifdef NEVER + /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does + support) but they are not part of POSIX 1003.1. */ + + if (plength >= 6) + { + if (posix[0] == CHAR_LEFT_SQUARE_BRACKET && + posix[1] == CHAR_COLON && + (posix[2] == CHAR_LESS_THAN_SIGN || + posix[2] == CHAR_GREATER_THAN_SIGN) && + posix[3] == CHAR_COLON && + posix[4] == CHAR_RIGHT_SQUARE_BRACKET && + posix[5] == CHAR_RIGHT_SQUARE_BRACKET) + { + if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY; + memcpy(p, posix, CU2BYTES(6)); + p += 6; + posix += 6; + plength -= 6; + continue; /* With next character */ + } + } +#endif + + /* Handle start of "normal" character classes */ + + posix_state = POSIX_CLASS_NOT_STARTED; + + /* Handle ^ and ] as first characters */ + + if (plength > 0) + { + if (*posix == CHAR_CIRCUMFLEX_ACCENT) + { + posix++; + plength--; + PUTCHARS(STR_CIRCUMFLEX_ACCENT); + } + if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET) + { + posix++; + plength--; + PUTCHARS(STR_RIGHT_SQUARE_BRACKET); + } + } + break; + + case CHAR_BACKSLASH: + if (plength == 0) return PCRE2_ERROR_END_BACKSLASH; + if (extended) nextisliteral = TRUE; else + { + if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL) + { + if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); + if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; + lastspecial = *p++ = *posix++; + plength--; + } + else nextisliteral = TRUE; + } + break; + + case CHAR_RIGHT_PARENTHESIS: + if (!extended || bracount == 0) goto ESCAPE_LITERAL; + bracount--; + goto COPY_SPECIAL; + + case CHAR_LEFT_PARENTHESIS: + bracount++; + /* Fall through */ + + case CHAR_QUESTION_MARK: + case CHAR_PLUS: + case CHAR_LEFT_CURLY_BRACKET: + case CHAR_RIGHT_CURLY_BRACKET: + case CHAR_VERTICAL_LINE: + if (!extended) goto ESCAPE_LITERAL; + /* Fall through */ + + case CHAR_DOT: + case CHAR_DOLLAR_SIGN: + posix_state = POSIX_NOT_BRACKET; + COPY_SPECIAL: + lastspecial = c; + if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; + *p++ = c; + break; + + case CHAR_ASTERISK: + if (lastspecial != CHAR_ASTERISK) + { + if (!extended && (posix_state < POSIX_NOT_BRACKET || + lastspecial == CHAR_LEFT_PARENTHESIS)) + goto ESCAPE_LITERAL; + goto COPY_SPECIAL; + } + break; /* Ignore second and subsequent asterisks */ + + case CHAR_CIRCUMFLEX_ACCENT: + if (extended) goto COPY_SPECIAL; + if (posix_state == POSIX_START_REGEX || + lastspecial == CHAR_LEFT_PARENTHESIS) + { + posix_state = POSIX_ANCHORED; + goto COPY_SPECIAL; + } + /* Fall through */ + + default: + if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) + { + ESCAPE_LITERAL: + PUTCHARS(STR_BACKSLASH); + } + lastspecial = 0xff; /* Indicates nothing special */ + if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; + memcpy(p, posix - clength, CU2BYTES(clength)); + p += clength; + posix_state = POSIX_NOT_BRACKET; + break; + } + } + +if (posix_state >= POSIX_CLASS_NOT_STARTED) + return PCRE2_ERROR_MISSING_SQUARE_BRACKET; +convlength += p - pp; /* Final segment */ +*bufflenptr = convlength; +*p++ = 0; +return 0; +} + + +/************************************************* +* Convert a glob pattern * +*************************************************/ + +/* Context for writing the output into a buffer. */ + +typedef struct pcre2_output_context { + PCRE2_UCHAR *output; /* current output position */ + PCRE2_SPTR output_end; /* output end */ + PCRE2_SIZE output_size; /* size of the output */ + uint8_t out_str[8]; /* string copied to the output */ +} pcre2_output_context; + + +/* Write a character into the output. + +Arguments: + out output context + chr the next character +*/ + +static void +convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr) +{ +out->output_size++; + +if (out->output < out->output_end) + *out->output++ = chr; +} + + +/* Write a string into the output. + +Arguments: + out output context + length length of out->out_str +*/ + +static void +convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length) +{ +uint8_t *out_str = out->out_str; +PCRE2_UCHAR *output = out->output; +PCRE2_SPTR output_end = out->output_end; +PCRE2_SIZE output_size = out->output_size; + +do + { + output_size++; + + if (output < output_end) + *output++ = *out_str++; + } +while (--length != 0); + +out->output = output; +out->output_size = output_size; +} + + +/* Prints the separator into the output. + +Arguments: + out output context + separator glob separator + with_escape backslash is needed before separator +*/ + +static void +convert_glob_print_separator(pcre2_output_context *out, + PCRE2_UCHAR separator, BOOL with_escape) +{ +if (with_escape) + convert_glob_write(out, CHAR_BACKSLASH); + +convert_glob_write(out, separator); +} + + +/* Prints a wildcard into the output. + +Arguments: + out output context + separator glob separator + with_escape backslash is needed before separator +*/ + +static void +convert_glob_print_wildcard(pcre2_output_context *out, + PCRE2_UCHAR separator, BOOL with_escape) +{ +out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; +out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; +convert_glob_write_str(out, 2); + +convert_glob_print_separator(out, separator, with_escape); + +convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); +} + + +/* Parse a posix class. + +Arguments: + from starting point of scanning the range + pattern_end end of pattern + out output context + +Returns: >0 => class index + 0 => malformed class +*/ + +static int +convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, + pcre2_output_context *out) +{ +static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:" + "graph:lower:print:punct:space:upper:word:xdigit:"; +PCRE2_SPTR start = *from + 1; +PCRE2_SPTR pattern = start; +const char *class_ptr; +PCRE2_UCHAR c; +int class_index; + +while (TRUE) + { + if (pattern >= pattern_end) return 0; + + c = *pattern++; + + if (c < CHAR_a || c > CHAR_z) break; + } + +if (c != CHAR_COLON || pattern >= pattern_end || + *pattern != CHAR_RIGHT_SQUARE_BRACKET) + return 0; + +class_ptr = posix_classes; +class_index = 1; + +while (TRUE) + { + if (*class_ptr == CHAR_NUL) return 0; + + pattern = start; + + while (*pattern == (PCRE2_UCHAR) *class_ptr) + { + if (*pattern == CHAR_COLON) + { + pattern += 2; + start -= 2; + + do convert_glob_write(out, *start++); while (start < pattern); + + *from = pattern; + return class_index; + } + pattern++; + class_ptr++; + } + + while (*class_ptr != CHAR_COLON) class_ptr++; + class_ptr++; + class_index++; + } +} + +/* Checks whether the character is in the class. + +Arguments: + class_index class index + c character + +Returns: !0 => character is found in the class + 0 => otherwise +*/ + +static BOOL +convert_glob_char_in_class(int class_index, PCRE2_UCHAR c) +{ +#if PCRE2_CODE_UNIT_WIDTH != 8 +if (c > 0xff) + { + /* ctype functions are not sane for c > 0xff */ + return 0; + } +#endif + +switch (class_index) + { + case 1: return isalnum(c); + case 2: return isalpha(c); + case 3: return 1; + case 4: return c == CHAR_HT || c == CHAR_SPACE; + case 5: return iscntrl(c); + case 6: return isdigit(c); + case 7: return isgraph(c); + case 8: return islower(c); + case 9: return isprint(c); + case 10: return ispunct(c); + case 11: return isspace(c); + case 12: return isupper(c); + case 13: return isalnum(c) || c == CHAR_UNDERSCORE; + default: return isxdigit(c); + } +} + +/* Parse a range of characters. + +Arguments: + from starting point of scanning the range + pattern_end end of pattern + out output context + separator glob separator + with_escape backslash is needed before separator + +Returns: 0 => success + !0 => error code +*/ + +static int +convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, + pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator, + BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep) +{ +BOOL is_negative = FALSE; +BOOL separator_seen = FALSE; +BOOL has_prev_c; +PCRE2_SPTR pattern = *from; +PCRE2_SPTR char_start = NULL; +uint32_t c, prev_c; +int len, class_index; + +(void)utf; /* Avoid compiler warning. */ + +if (pattern >= pattern_end) + { + *from = pattern; + return PCRE2_ERROR_MISSING_SQUARE_BRACKET; + } + +if (*pattern == CHAR_EXCLAMATION_MARK + || *pattern == CHAR_CIRCUMFLEX_ACCENT) + { + pattern++; + + if (pattern >= pattern_end) + { + *from = pattern; + return PCRE2_ERROR_MISSING_SQUARE_BRACKET; + } + + is_negative = TRUE; + + out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; + out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; + len = 2; + + if (!no_wildsep) + { + if (with_escape) + { + out->out_str[len] = CHAR_BACKSLASH; + len++; + } + out->out_str[len] = (uint8_t) separator; + } + + convert_glob_write_str(out, len + 1); + } +else + convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET); + +has_prev_c = FALSE; +prev_c = 0; + +if (*pattern == CHAR_RIGHT_SQUARE_BRACKET) + { + out->out_str[0] = CHAR_BACKSLASH; + out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET; + convert_glob_write_str(out, 2); + has_prev_c = TRUE; + prev_c = CHAR_RIGHT_SQUARE_BRACKET; + pattern++; + } + +while (pattern < pattern_end) + { + char_start = pattern; + GETCHARINCTEST(c, pattern); + + if (c == CHAR_RIGHT_SQUARE_BRACKET) + { + convert_glob_write(out, c); + + if (!is_negative && !no_wildsep && separator_seen) + { + out->out_str[0] = CHAR_LEFT_PARENTHESIS; + out->out_str[1] = CHAR_QUESTION_MARK; + out->out_str[2] = CHAR_LESS_THAN_SIGN; + out->out_str[3] = CHAR_EXCLAMATION_MARK; + convert_glob_write_str(out, 4); + + convert_glob_print_separator(out, separator, with_escape); + convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); + } + + *from = pattern; + return 0; + } + + if (pattern >= pattern_end) break; + + if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) + { + *from = pattern; + class_index = convert_glob_parse_class(from, pattern_end, out); + + if (class_index != 0) + { + pattern = *from; + + has_prev_c = FALSE; + prev_c = 0; + + if (!is_negative && + convert_glob_char_in_class (class_index, separator)) + separator_seen = TRUE; + continue; + } + } + else if (c == CHAR_MINUS && has_prev_c && + *pattern != CHAR_RIGHT_SQUARE_BRACKET) + { + convert_glob_write(out, CHAR_MINUS); + + char_start = pattern; + GETCHARINCTEST(c, pattern); + + if (pattern >= pattern_end) break; + + if (escape != 0 && c == escape) + { + char_start = pattern; + GETCHARINCTEST(c, pattern); + } + else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) + { + *from = pattern; + return PCRE2_ERROR_CONVERT_SYNTAX; + } + + if (prev_c > c) + { + *from = pattern; + return PCRE2_ERROR_CONVERT_SYNTAX; + } + + if (prev_c < separator && separator < c) separator_seen = TRUE; + + has_prev_c = FALSE; + prev_c = 0; + } + else + { + if (escape != 0 && c == escape) + { + char_start = pattern; + GETCHARINCTEST(c, pattern); + + if (pattern >= pattern_end) break; + } + + has_prev_c = TRUE; + prev_c = c; + } + + if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET || + c == CHAR_BACKSLASH || c == CHAR_MINUS) + convert_glob_write(out, CHAR_BACKSLASH); + + if (c == separator) separator_seen = TRUE; + + do convert_glob_write(out, *char_start++); while (char_start < pattern); + } + +*from = pattern; +return PCRE2_ERROR_MISSING_SQUARE_BRACKET; +} + + +/* Prints a (*COMMIT) into the output. + +Arguments: + out output context +*/ + +static void +convert_glob_print_commit(pcre2_output_context *out) +{ +out->out_str[0] = CHAR_LEFT_PARENTHESIS; +out->out_str[1] = CHAR_ASTERISK; +out->out_str[2] = CHAR_C; +out->out_str[3] = CHAR_O; +out->out_str[4] = CHAR_M; +out->out_str[5] = CHAR_M; +out->out_str[6] = CHAR_I; +out->out_str[7] = CHAR_T; +convert_glob_write_str(out, 8); +convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); +} + + +/* Bash glob converter. + +Arguments: + pattype the pattern type + pattern the pattern + plength length in code units + utf TRUE if UTF + use_buffer where to put the output + use_length length of use_buffer + bufflenptr where to put the used length + dummyrun TRUE if a dummy run + ccontext the convert context + +Returns: 0 => success + !0 => error code +*/ + +static int +convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength, + BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, + PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) +{ +pcre2_output_context out; +PCRE2_SPTR pattern_start = pattern; +PCRE2_SPTR pattern_end = pattern + plength; +PCRE2_UCHAR separator = ccontext->glob_separator; +PCRE2_UCHAR escape = ccontext->glob_escape; +PCRE2_UCHAR c; +BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; +BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; +BOOL in_atomic = FALSE; +BOOL after_starstar = FALSE; +BOOL no_slash_z = FALSE; +BOOL with_escape, is_start, after_separator; +int result = 0; + +(void)utf; /* Avoid compiler warning. */ + +#ifdef SUPPORT_UNICODE +if (utf && (separator >= 128 || escape >= 128)) + { + /* Currently only ASCII characters are supported. */ + *bufflenptr = 0; + return PCRE2_ERROR_CONVERT_SYNTAX; + } +#endif + +with_escape = strchr(pcre2_escaped_literals, separator) != NULL; + +/* Initialize default for error offset as end of input. */ +out.output = use_buffer; +out.output_end = use_buffer + use_length; +out.output_size = 0; + +out.out_str[0] = CHAR_LEFT_PARENTHESIS; +out.out_str[1] = CHAR_QUESTION_MARK; +out.out_str[2] = CHAR_s; +out.out_str[3] = CHAR_RIGHT_PARENTHESIS; +convert_glob_write_str(&out, 4); + +is_start = TRUE; + +if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK) + { + if (no_wildsep) + is_start = FALSE; + else if (!no_starstar && pattern + 1 < pattern_end && + pattern[1] == CHAR_ASTERISK) + is_start = FALSE; + } + +if (is_start) + { + out.out_str[0] = CHAR_BACKSLASH; + out.out_str[1] = CHAR_A; + convert_glob_write_str(&out, 2); + } + +while (pattern < pattern_end) + { + c = *pattern++; + + if (c == CHAR_ASTERISK) + { + is_start = pattern == pattern_start + 1; + + if (in_atomic) + { + convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); + in_atomic = FALSE; + } + + if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK) + { + after_separator = is_start || (pattern[-2] == separator); + + do pattern++; while (pattern < pattern_end && + *pattern == CHAR_ASTERISK); + + if (pattern >= pattern_end) + { + no_slash_z = TRUE; + break; + } + + after_starstar = TRUE; + + if (after_separator && escape != 0 && *pattern == escape && + pattern + 1 < pattern_end && pattern[1] == separator) + pattern++; + + if (is_start) + { + if (*pattern != separator) continue; + + out.out_str[0] = CHAR_LEFT_PARENTHESIS; + out.out_str[1] = CHAR_QUESTION_MARK; + out.out_str[2] = CHAR_COLON; + out.out_str[3] = CHAR_BACKSLASH; + out.out_str[4] = CHAR_A; + out.out_str[5] = CHAR_VERTICAL_LINE; + convert_glob_write_str(&out, 6); + + convert_glob_print_separator(&out, separator, with_escape); + convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); + + pattern++; + continue; + } + + convert_glob_print_commit(&out); + + if (!after_separator || *pattern != separator) + { + out.out_str[0] = CHAR_DOT; + out.out_str[1] = CHAR_ASTERISK; + out.out_str[2] = CHAR_QUESTION_MARK; + convert_glob_write_str(&out, 3); + continue; + } + + out.out_str[0] = CHAR_LEFT_PARENTHESIS; + out.out_str[1] = CHAR_QUESTION_MARK; + out.out_str[2] = CHAR_COLON; + out.out_str[3] = CHAR_DOT; + out.out_str[4] = CHAR_ASTERISK; + out.out_str[5] = CHAR_QUESTION_MARK; + + convert_glob_write_str(&out, 6); + + convert_glob_print_separator(&out, separator, with_escape); + + out.out_str[0] = CHAR_RIGHT_PARENTHESIS; + out.out_str[1] = CHAR_QUESTION_MARK; + out.out_str[2] = CHAR_QUESTION_MARK; + convert_glob_write_str(&out, 3); + + pattern++; + continue; + } + + if (pattern < pattern_end && *pattern == CHAR_ASTERISK) + { + do pattern++; while (pattern < pattern_end && + *pattern == CHAR_ASTERISK); + } + + if (no_wildsep) + { + if (pattern >= pattern_end) + { + no_slash_z = TRUE; + break; + } + + /* Start check must be after the end check. */ + if (is_start) continue; + } + + if (!is_start) + { + if (after_starstar) + { + out.out_str[0] = CHAR_LEFT_PARENTHESIS; + out.out_str[1] = CHAR_QUESTION_MARK; + out.out_str[2] = CHAR_GREATER_THAN_SIGN; + convert_glob_write_str(&out, 3); + in_atomic = TRUE; + } + else + convert_glob_print_commit(&out); + } + + if (no_wildsep) + convert_glob_write(&out, CHAR_DOT); + else + convert_glob_print_wildcard(&out, separator, with_escape); + + out.out_str[0] = CHAR_ASTERISK; + out.out_str[1] = CHAR_QUESTION_MARK; + if (pattern >= pattern_end) + out.out_str[1] = CHAR_PLUS; + convert_glob_write_str(&out, 2); + continue; + } + + if (c == CHAR_QUESTION_MARK) + { + if (no_wildsep) + convert_glob_write(&out, CHAR_DOT); + else + convert_glob_print_wildcard(&out, separator, with_escape); + continue; + } + + if (c == CHAR_LEFT_SQUARE_BRACKET) + { + result = convert_glob_parse_range(&pattern, pattern_end, + &out, utf, separator, with_escape, escape, no_wildsep); + if (result != 0) break; + continue; + } + + if (escape != 0 && c == escape) + { + if (pattern >= pattern_end) + { + result = PCRE2_ERROR_CONVERT_SYNTAX; + break; + } + c = *pattern++; + } + + if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) + convert_glob_write(&out, CHAR_BACKSLASH); + + convert_glob_write(&out, c); + } + +if (result == 0) + { + if (!no_slash_z) + { + out.out_str[0] = CHAR_BACKSLASH; + out.out_str[1] = CHAR_z; + convert_glob_write_str(&out, 2); + } + + if (in_atomic) + convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS); + + convert_glob_write(&out, CHAR_NUL); + + if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer)) + result = PCRE2_ERROR_NOMEMORY; + } + +if (result != 0) + { + *bufflenptr = pattern - pattern_start; + return result; + } + +*bufflenptr = out.output_size - 1; +return 0; +} + + +/************************************************* +* Convert pattern * +*************************************************/ + +/* This is the external-facing function for converting other forms of pattern +into PCRE2 regular expression patterns. On error, the bufflenptr argument is +used to return an offset in the original pattern. + +Arguments: + pattern the input pattern + plength length of input, or PCRE2_ZERO_TERMINATED + options options bits + buffptr pointer to pointer to output buffer + bufflenptr pointer to length of output buffer + ccontext convert context or NULL + +Returns: 0 for success, else an error code (+ve or -ve) +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options, + PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr, + pcre2_convert_context *ccontext) +{ +int i, rc; +PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE]; +PCRE2_UCHAR *use_buffer = dummy_buffer; +PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE; +BOOL utf = (options & PCRE2_CONVERT_UTF) != 0; +uint32_t pattype = options & TYPE_OPTIONS; + +if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL; + +if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */ + (pattype & (~pattype+1)) != pattype || /* More than one type set */ + pattype == 0) /* No type set */ + { + *bufflenptr = 0; /* Error offset */ + return PCRE2_ERROR_BADOPTION; + } + +if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern); +if (ccontext == NULL) ccontext = + (pcre2_convert_context *)(&PRIV(default_convert_context)); + +/* Check UTF if required. */ + +#ifndef SUPPORT_UNICODE +if (utf) + { + *bufflenptr = 0; /* Error offset */ + return PCRE2_ERROR_UNICODE_NOT_SUPPORTED; + } +#else +if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0) + { + PCRE2_SIZE erroroffset; + rc = PRIV(valid_utf)(pattern, plength, &erroroffset); + if (rc != 0) + { + *bufflenptr = erroroffset; + return rc; + } + } +#endif + +/* If buffptr is not NULL, and what it points to is not NULL, we are being +provided with a buffer and a length, so set them as the buffer to use. */ + +if (buffptr != NULL && *buffptr != NULL) + { + use_buffer = *buffptr; + use_length = *bufflenptr; + } + +/* Call an individual converter, either just once (if a buffer was provided or +just the length is needed), or twice (if a memory allocation is required). */ + +for (i = 0; i < 2; i++) + { + PCRE2_UCHAR *allocated; + BOOL dummyrun = buffptr == NULL || *buffptr == NULL; + + switch(pattype) + { + case PCRE2_CONVERT_GLOB: + rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf, + use_buffer, use_length, bufflenptr, dummyrun, ccontext); + break; + + case PCRE2_CONVERT_POSIX_BASIC: + case PCRE2_CONVERT_POSIX_EXTENDED: + rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length, + bufflenptr, dummyrun, ccontext); + break; + + default: + *bufflenptr = 0; /* Error offset */ + return PCRE2_ERROR_INTERNAL; + } + + if (rc != 0 || /* Error */ + buffptr == NULL || /* Just the length is required */ + *buffptr != NULL) /* Buffer was provided or allocated */ + return rc; + + /* Allocate memory for the buffer, with hidden space for an allocator at + the start. The next time round the loop runs the conversion for real. */ + + allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + + (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext); + if (allocated == NULL) return PCRE2_ERROR_NOMEMORY; + *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl)); + + use_buffer = *buffptr; + use_length = *bufflenptr + 1; + } + +/* Control should never get here. */ + +return PCRE2_ERROR_INTERNAL; +} + + +/************************************************* +* Free converted pattern * +*************************************************/ + +/* This frees a converted pattern that was put in newly-allocated memory. + +Argument: the converted pattern +Returns: nothing +*/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_converted_pattern_free(PCRE2_UCHAR *converted) +{ +if (converted != NULL) + { + pcre2_memctl *memctl = + (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } +} + +/* End of pcre2_convert.c */ diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c new file mode 100644 index 0000000..caae652 --- /dev/null +++ b/src/pcre2_dfa_match.c @@ -0,0 +1,4119 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre2_dfa_match(), which is an +alternative matching function that uses a sort of DFA algorithm (not a true +FSM). This is NOT Perl-compatible, but it has advantages in certain +applications. */ + + +/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved +the performance of his patterns greatly. I could not use it as it stood, as it +was not thread safe, and made assumptions about pattern sizes. Also, it caused +test 7 to loop, and test 9 to crash with a segfault. + +The issue is the check for duplicate states, which is done by a simple linear +search up the state list. (Grep for "duplicate" below to find the code.) For +many patterns, there will never be many states active at one time, so a simple +linear search is fine. In patterns that have many active states, it might be a +bottleneck. The suggested code used an indexing scheme to remember which states +had previously been used for each character, and avoided the linear search when +it knew there was no chance of a duplicate. This was implemented when adding +states to the state lists. + +I wrote some thread-safe, not-limited code to try something similar at the time +of checking for duplicates (instead of when adding states), using index vectors +on the stack. It did give a 13% improvement with one specially constructed +pattern for certain subject strings, but on other strings and on many of the +simpler patterns in the test suite it did worse. The major problem, I think, +was the extra time to initialize the index. This had to be done for each call +of internal_dfa_match(). (The supplied patch used a static vector, initialized +only once - I suspect this was the cause of the problems with the tests.) + +Overall, I concluded that the gains in some cases did not outweigh the losses +in others, so I abandoned this code. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define NLBLOCK mb /* Block containing newline information */ +#define PSSTART start_subject /* Field containing processed string start */ +#define PSEND end_subject /* Field containing processed string end */ + +#include "pcre2_internal.h" + +#define PUBLIC_DFA_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \ + PCRE2_COPY_MATCHED_SUBJECT) + + +/************************************************* +* Code parameters and static tables * +*************************************************/ + +/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes +into others, under special conditions. A gap of 20 between the blocks should be +enough. The resulting opcodes don't have to be less than 256 because they are +never stored, so we push them well clear of the normal opcodes. */ + +#define OP_PROP_EXTRA 300 +#define OP_EXTUNI_EXTRA 320 +#define OP_ANYNL_EXTRA 340 +#define OP_HSPACE_EXTRA 360 +#define OP_VSPACE_EXTRA 380 + + +/* This table identifies those opcodes that are followed immediately by a +character that is to be tested in some way. This makes it possible to +centralize the loading of these characters. In the case of Type * etc, the +"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a +small value. Non-zero values in the table are the offsets from the opcode where +the character is to be found. ***NOTE*** If the start of this table is +modified, the three tables that follow must also be modified. */ + +static const uint8_t coptable[] = { + 0, /* End */ + 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ + 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ + 0, 0, 0, /* Any, AllAny, Anybyte */ + 0, 0, /* \P, \p */ + 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ + 0, /* \X */ + 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */ + 1, /* Char */ + 1, /* Chari */ + 1, /* not */ + 1, /* noti */ + /* Positive single-char repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ + 1+IMM2_SIZE, /* exact */ + 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ + 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ + 1+IMM2_SIZE, /* exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ + /* Negative single-char repeats - only for chars < 256 */ + 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ + 1+IMM2_SIZE, /* NOT exact */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ + 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ + 1+IMM2_SIZE, /* NOT exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ + /* Positive type repeats */ + 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ + 1+IMM2_SIZE, /* Type exact */ + 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ + /* Character class & ref repeats */ + 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ + 0, 0, /* CRRANGE, CRMINRANGE */ + 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */ + 0, /* CLASS */ + 0, /* NCLASS */ + 0, /* XCLASS - variable length */ + 0, /* REF */ + 0, /* REFI */ + 0, /* DNREF */ + 0, /* DNREFI */ + 0, /* RECURSE */ + 0, /* CALLOUT */ + 0, /* CALLOUT_STR */ + 0, /* Alt */ + 0, /* Ket */ + 0, /* KetRmax */ + 0, /* KetRmin */ + 0, /* KetRpos */ + 0, 0, /* Reverse, Vreverse */ + 0, /* Assert */ + 0, /* Assert not */ + 0, /* Assert behind */ + 0, /* Assert behind not */ + 0, /* NA assert */ + 0, /* NA assert behind */ + 0, /* ONCE */ + 0, /* SCRIPT_RUN */ + 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ + 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ + 0, 0, /* CREF, DNCREF */ + 0, 0, /* RREF, DNRREF */ + 0, 0, /* FALSE, TRUE */ + 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ + 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ + 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ + 0, 0, /* COMMIT, COMMIT_ARG */ + 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ + 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ + 0, 0 /* \B and \b in UCP mode */ +}; + +/* This table identifies those opcodes that inspect a character. It is used to +remember the fact that a character could have been inspected when the end of +the subject is reached. ***NOTE*** If the start of this table is modified, the +two tables that follow must also be modified. */ + +static const uint8_t poptable[] = { + 0, /* End */ + 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ + 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ + 1, 1, 1, /* Any, AllAny, Anybyte */ + 1, 1, /* \P, \p */ + 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ + 1, /* \X */ + 0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */ + 1, /* Char */ + 1, /* Chari */ + 1, /* not */ + 1, /* noti */ + /* Positive single-char repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* upto, minupto, exact */ + 1, 1, 1, 1, /* *+, ++, ?+, upto+ */ + 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ + 1, 1, 1, /* upto I, minupto I, exact I */ + 1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */ + /* Negative single-char repeats - only for chars < 256 */ + 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* NOT upto, minupto, exact */ + 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ + 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ + 1, 1, 1, /* NOT upto I, minupto I, exact I */ + 1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */ + /* Positive type repeats */ + 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* Type upto, minupto, exact */ + 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */ + /* Character class & ref repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 1, 1, /* CRRANGE, CRMINRANGE */ + 1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */ + 1, /* CLASS */ + 1, /* NCLASS */ + 1, /* XCLASS - variable length */ + 0, /* REF */ + 0, /* REFI */ + 0, /* DNREF */ + 0, /* DNREFI */ + 0, /* RECURSE */ + 0, /* CALLOUT */ + 0, /* CALLOUT_STR */ + 0, /* Alt */ + 0, /* Ket */ + 0, /* KetRmax */ + 0, /* KetRmin */ + 0, /* KetRpos */ + 0, 0, /* Reverse, Vreverse */ + 0, /* Assert */ + 0, /* Assert not */ + 0, /* Assert behind */ + 0, /* Assert behind not */ + 0, /* NA assert */ + 0, /* NA assert behind */ + 0, /* ONCE */ + 0, /* SCRIPT_RUN */ + 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */ + 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ + 0, 0, /* CREF, DNCREF */ + 0, 0, /* RREF, DNRREF */ + 0, 0, /* FALSE, TRUE */ + 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ + 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ + 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ + 0, 0, /* COMMIT, COMMIT_ARG */ + 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ + 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ + 1, 1 /* \B and \b in UCP mode */ +}; + +/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, +and \w */ + +static const uint8_t toptable1[] = { + 0, 0, 0, 0, 0, 0, + ctype_digit, ctype_digit, + ctype_space, ctype_space, + ctype_word, ctype_word, + 0, 0 /* OP_ANY, OP_ALLANY */ +}; + +static const uint8_t toptable2[] = { + 0, 0, 0, 0, 0, 0, + ctype_digit, 0, + ctype_space, 0, + ctype_word, 0, + 1, 1 /* OP_ANY, OP_ALLANY */ +}; + + +/* Structure for holding data about a particular state, which is in effect the +current data for an active path through the match tree. It must consist +entirely of ints because the working vector we are passed, and which we put +these structures in, is a vector of ints. */ + +typedef struct stateblock { + int offset; /* Offset to opcode (-ve has meaning) */ + int count; /* Count for repeats */ + int data; /* Some use extra data */ +} stateblock; + +#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) + + +/* Before version 10.32 the recursive calls of internal_dfa_match() were passed +local working space and output vectors that were created on the stack. This has +caused issues for some patterns, especially in small-stack environments such as +Windows. A new scheme is now in use which sets up a vector on the stack, but if +this is too small, heap memory is used, up to the heap_limit. The main +parameters are all numbers of ints because the workspace is a vector of ints. + +The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is +defined in pcre2_internal.h so as to be available to pcre2test when it is +finding the minimum heap requirement for a match. */ + +#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int)) + +#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */ +#define RWS_RSIZE 1000 /* Work size for recursion */ +#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */ +#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */ + +/* This structure is at the start of each workspace block. */ + +typedef struct RWS_anchor { + struct RWS_anchor *next; + uint32_t size; /* Number of ints */ + uint32_t free; /* Number of ints */ +} RWS_anchor; + +#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int)) + + + +/************************************************* +* Process a callout * +*************************************************/ + +/* This function is called to perform a callout. + +Arguments: + code current code pointer + offsets points to current capture offsets + current_subject start of current subject match + ptr current position in subject + mb the match block + extracode extra code offset when called from condition + lengthptr where to return the callout length + +Returns: the return from the callout +*/ + +static int +do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, + PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode, + PCRE2_SIZE *lengthptr) +{ +pcre2_callout_block *cb = mb->cb; + +*lengthptr = (code[extracode] == OP_CALLOUT)? + (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] : + (PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode); + +if (mb->callout == NULL) return 0; /* No callout provided */ + +/* Fixed fields in the callout block are set once and for all at the start of +matching. */ + +cb->offset_vector = offsets; +cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject); +cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject); +cb->pattern_position = GET(code, 1 + extracode); +cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode); + +if (code[extracode] == OP_CALLOUT) + { + cb->callout_number = code[1 + 2*LINK_SIZE + extracode]; + cb->callout_string_offset = 0; + cb->callout_string = NULL; + cb->callout_string_length = 0; + } +else + { + cb->callout_number = 0; + cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode); + cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1; + cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2; + } + +return (mb->callout)(cb, mb->callout_data); +} + + + +/************************************************* +* Expand local workspace memory * +*************************************************/ + +/* This function is called when internal_dfa_match() is about to be called +recursively and there is insufficient working space left in the current +workspace block. If there's an existing next block, use it; otherwise get a new +block unless the heap limit is reached. + +Arguments: + rwsptr pointer to block pointer (updated) + ovecsize space needed for an ovector + mb the match block + +Returns: 0 rwsptr has been updated + !0 an error code +*/ + +static int +more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb) +{ +RWS_anchor *rws = *rwsptr; +RWS_anchor *new; + +if (rws->next != NULL) + { + new = rws->next; + } + +/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but +mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid +overflow. */ + +else + { + uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2; + uint32_t newsizeK = newsize/(1024/sizeof(int)); + + if (newsizeK + mb->heap_used > mb->heap_limit) + newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used); + newsize = newsizeK*(1024/sizeof(int)); + + if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE) + return PCRE2_ERROR_HEAPLIMIT; + new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data); + if (new == NULL) return PCRE2_ERROR_NOMEMORY; + mb->heap_used += newsizeK; + new->next = NULL; + new->size = newsize; + rws->next = new; + } + +new->free = new->size - RWS_ANCHOR_SIZE; +*rwsptr = new; +return 0; +} + + + +/************************************************* +* Match a Regular Expression - DFA engine * +*************************************************/ + +/* This internal function applies a compiled pattern to a subject string, +starting at a given point, using a DFA engine. This function is called from the +external one, possibly multiple times if the pattern is not anchored. The +function calls itself recursively for some kinds of subpattern. + +Arguments: + mb the match_data block with fixed information + this_start_code the opening bracket of this subexpression's code + current_subject where we currently are in the subject string + start_offset start offset in the subject string + offsets vector to contain the matching string offsets + offsetcount size of same + workspace vector of workspace + wscount size of same + rlevel function call recursion level + +Returns: > 0 => number of match offset pairs placed in offsets + = 0 => offsets overflowed; longest matches are present + -1 => failed to match + < -1 => some kind of unexpected problem + +The following macros are used for adding states to the two state vectors (one +for the current character, one for the following character). */ + +#define ADD_ACTIVE(x,y) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state++; \ + } \ + else return PCRE2_ERROR_DFA_WSSIZE + +#define ADD_ACTIVE_DATA(x,y,z) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state->data = (z); \ + next_active_state++; \ + } \ + else return PCRE2_ERROR_DFA_WSSIZE + +#define ADD_NEW(x,y) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state++; \ + } \ + else return PCRE2_ERROR_DFA_WSSIZE + +#define ADD_NEW_DATA(x,y,z) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state->data = (z); \ + next_new_state++; \ + } \ + else return PCRE2_ERROR_DFA_WSSIZE + +/* And now, here is the code */ + +static int +internal_dfa_match( + dfa_match_block *mb, + PCRE2_SPTR this_start_code, + PCRE2_SPTR current_subject, + PCRE2_SIZE start_offset, + PCRE2_SIZE *offsets, + uint32_t offsetcount, + int *workspace, + int wscount, + uint32_t rlevel, + int *RWS) +{ +stateblock *active_states, *new_states, *temp_states; +stateblock *next_active_state, *next_new_state; +const uint8_t *ctypes, *lcc, *fcc; +PCRE2_SPTR ptr; +PCRE2_SPTR end_code; +dfa_recursion_info new_recursive; +int active_count, new_count, match_count; + +/* Some fields in the mb block are frequently referenced, so we load them into +independent variables in the hope that this will perform better. */ + +PCRE2_SPTR start_subject = mb->start_subject; +PCRE2_SPTR end_subject = mb->end_subject; +PCRE2_SPTR start_code = mb->start_code; + +#ifdef SUPPORT_UNICODE +BOOL utf = (mb->poptions & PCRE2_UTF) != 0; +BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0; +#else +BOOL utf = FALSE; +#endif + +BOOL reset_could_continue = FALSE; + +if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT; +if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; +offsetcount &= (uint32_t)(-2); /* Round down */ + +wscount -= 2; +wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / + (2 * INTS_PER_STATEBLOCK); + +ctypes = mb->tables + ctypes_offset; +lcc = mb->tables + lcc_offset; +fcc = mb->tables + fcc_offset; + +match_count = PCRE2_ERROR_NOMATCH; /* A negative number */ + +active_states = (stateblock *)(workspace + 2); +next_new_state = new_states = active_states + wscount; +new_count = 0; + +/* The first thing in any (sub) pattern is a bracket of some sort. Push all +the alternative states onto the list, and find out where the end is. This +makes is possible to use this function recursively, when we want to stop at a +matching internal ket rather than at the end. + +If we are dealing with a backward assertion we have to find out the maximum +amount to move back, and set up each alternative appropriately. */ + +if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) + { + size_t max_back = 0; + size_t gone_back; + + end_code = this_start_code; + do + { + size_t back = (size_t)GET2(end_code, 2+LINK_SIZE); + if (back > max_back) max_back = back; + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + + /* If we can't go back the amount required for the longest lookbehind + pattern, go back as far as we can; some alternatives may still be viable. */ + +#ifdef SUPPORT_UNICODE + /* In character mode we have to step back character by character */ + + if (utf) + { + for (gone_back = 0; gone_back < max_back; gone_back++) + { + if (current_subject <= start_subject) break; + current_subject--; + ACROSSCHAR(current_subject > start_subject, current_subject, + current_subject--); + } + } + else +#endif + + /* In byte-mode we can do this quickly. */ + + { + size_t current_offset = (size_t)(current_subject - start_subject); + gone_back = (current_offset < max_back)? current_offset : max_back; + current_subject -= gone_back; + } + + /* Save the earliest consulted character */ + + if (current_subject < mb->start_used_ptr) + mb->start_used_ptr = current_subject; + + /* Now we can process the individual branches. There will be an OP_REVERSE at + the start of each branch, except when the length of the branch is zero. */ + + end_code = this_start_code; + do + { + uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0; + size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE); + if (back <= gone_back) + { + int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen); + ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back)); + } + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + } + +/* This is the code for a "normal" subpattern (not a backward assertion). The +start of a whole pattern is always one of these. If we are at the top level, +we may be asked to restart matching from the same point that we reached for a +previous partial match. We still have to scan through the top-level branches to +find the end state. */ + +else + { + end_code = this_start_code; + + /* Restarting */ + + if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0) + { + do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); + new_count = workspace[1]; + if (!workspace[0]) + memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock)); + } + + /* Not restarting */ + + else + { + int length = 1 + LINK_SIZE + + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || + *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) + ? IMM2_SIZE:0); + do + { + ADD_NEW((int)(end_code - start_code + length), 0); + end_code += GET(end_code, 1); + length = 1 + LINK_SIZE; + } + while (*end_code == OP_ALT); + } + } + +workspace[0] = 0; /* Bit indicating which vector is current */ + +/* Loop for scanning the subject */ + +ptr = current_subject; +for (;;) + { + int i, j; + int clen, dlen; + uint32_t c, d; + int forced_fail = 0; + BOOL partial_newline = FALSE; + BOOL could_continue = reset_could_continue; + reset_could_continue = FALSE; + + if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; + + /* Make the new state list into the active state list and empty the + new state list. */ + + temp_states = active_states; + active_states = new_states; + new_states = temp_states; + active_count = new_count; + new_count = 0; + + workspace[0] ^= 1; /* Remember for the restarting feature */ + workspace[1] = active_count; + + /* Set the pointers for adding new states */ + + next_active_state = active_states + active_count; + next_new_state = new_states; + + /* Load the current character from the subject outside the loop, as many + different states may want to look at it, and we assume that at least one + will. */ + + if (ptr < end_subject) + { + clen = 1; /* Number of data items in the character */ +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(c, ptr, clen); +#else + c = *ptr; +#endif /* SUPPORT_UNICODE */ + } + else + { + clen = 0; /* This indicates the end of the subject */ + c = NOTACHAR; /* This value should never actually be used */ + } + + /* Scan up the active states and act on each one. The result of an action + may be to add more states to the currently active list (e.g. on hitting a + parenthesis) or it may be to put states on the new list, for considering + when we move the character pointer on. */ + + for (i = 0; i < active_count; i++) + { + stateblock *current_state = active_states + i; + BOOL caseless = FALSE; + PCRE2_SPTR code; + uint32_t codevalue; + int state_offset = current_state->offset; + int rrc; + int count; + + /* A negative offset is a special case meaning "hold off going to this + (negated) state until the number of characters in the data field have + been skipped". If the could_continue flag was passed over from a previous + state, arrange for it to passed on. */ + + if (state_offset < 0) + { + if (current_state->data > 0) + { + ADD_NEW_DATA(state_offset, current_state->count, + current_state->data - 1); + if (could_continue) reset_could_continue = TRUE; + continue; + } + else + { + current_state->offset = state_offset = -state_offset; + } + } + + /* Check for a duplicate state with the same count, and skip if found. + See the note at the head of this module about the possibility of improving + performance here. */ + + for (j = 0; j < i; j++) + { + if (active_states[j].offset == state_offset && + active_states[j].count == current_state->count) + goto NEXT_ACTIVE_STATE; + } + + /* The state offset is the offset to the opcode */ + + code = start_code + state_offset; + codevalue = *code; + + /* If this opcode inspects a character, but we are at the end of the + subject, remember the fact for use when testing for a partial match. */ + + if (clen == 0 && poptable[codevalue] != 0) + could_continue = TRUE; + + /* If this opcode is followed by an inline character, load it. It is + tempting to test for the presence of a subject character here, but that + is wrong, because sometimes zero repetitions of the subject are + permitted. + + We also use this mechanism for opcodes such as OP_TYPEPLUS that take an + argument that is not a data character - but is always one byte long because + the values are small. We have to take special action to deal with \P, \p, + \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert + these ones to new opcodes. */ + + if (coptable[codevalue] > 0) + { + dlen = 1; +#ifdef SUPPORT_UNICODE + if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else +#endif /* SUPPORT_UNICODE */ + d = code[coptable[codevalue]]; + if (codevalue >= OP_TYPESTAR) + { + switch(d) + { + case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM; + case OP_NOTPROP: + case OP_PROP: codevalue += OP_PROP_EXTRA; break; + case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break; + case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break; + case OP_NOT_HSPACE: + case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break; + case OP_NOT_VSPACE: + case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break; + default: break; + } + } + } + else + { + dlen = 0; /* Not strictly necessary, but compilers moan */ + d = NOTACHAR; /* if these variables are not set. */ + } + + + /* Now process the individual opcodes */ + + switch (codevalue) + { +/* ========================================================================== */ + /* These cases are never obeyed. This is a fudge that causes a compile- + time error if the vectors coptable or poptable, which are indexed by + opcode, are not the correct length. It seems to be the only way to do + such a check at compile time, as the sizeof() operator does not work + in the C preprocessor. */ + + case OP_TABLE_LENGTH: + case OP_TABLE_LENGTH + + ((sizeof(coptable) == OP_TABLE_LENGTH) && + (sizeof(poptable) == OP_TABLE_LENGTH)): + return 0; + +/* ========================================================================== */ + /* Reached a closing bracket. If not at the end of the pattern, carry + on with the next opcode. For repeating opcodes, also add the repeat + state. Note that KETRPOS will always be encountered at the end of the + subpattern, because the possessive subpattern repeats are always handled + using recursive calls. Thus, it never adds any new states. + + At the end of the (sub)pattern, unless we have an empty string and + PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the + start of the subject, save the match data, shifting up all previous + matches so we always have the longest first. */ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + case OP_KETRPOS: + if (code != end_code) + { + ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); + if (codevalue != OP_KET) + { + ADD_ACTIVE(state_offset - (int)GET(code, 1), 0); + } + } + else + { + if (ptr > current_subject || + ((mb->moptions & PCRE2_NOTEMPTY) == 0 && + ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 || + current_subject > start_subject + mb->start_offset))) + { + if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0; + else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) + match_count = 0; + count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; + if (count > 0) (void)memmove(offsets + 2, offsets, + (size_t)count * sizeof(PCRE2_SIZE)); + if (offsetcount >= 2) + { + offsets[0] = (PCRE2_SIZE)(current_subject - start_subject); + offsets[1] = (PCRE2_SIZE)(ptr - start_subject); + } + if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count; + } + } + break; + +/* ========================================================================== */ + /* These opcodes add to the current list of states without looking + at the current character. */ + + /*-----------------------------------------------------------------*/ + case OP_ALT: + do { code += GET(code, 1); } while (*code == OP_ALT); + ADD_ACTIVE((int)(code - start_code), 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_BRA: + case OP_SBRA: + do + { + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); + code += GET(code, 1); + } + while (*code == OP_ALT); + break; + + /*-----------------------------------------------------------------*/ + case OP_CBRA: + case OP_SCBRA: + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); + code += GET(code, 1); + while (*code == OP_ALT) + { + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); + code += GET(code, 1); + } + break; + + /*-----------------------------------------------------------------*/ + case OP_BRAZERO: + case OP_BRAMINZERO: + ADD_ACTIVE(state_offset + 1, 0); + code += 1 + GET(code, 2); + while (*code == OP_ALT) code += GET(code, 1); + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_SKIPZERO: + code += 1 + GET(code, 2); + while (*code == OP_ALT) code += GET(code, 1); + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_CIRC: + if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_CIRCM: + if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) || + ((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 ) + && WAS_NEWLINE(ptr))) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_EOD: + if (ptr >= end_subject) + { + if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0) + return PCRE2_ERROR_PARTIAL; + else { ADD_ACTIVE(state_offset + 1, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_SOD: + if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_SOM: + if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + +/* ========================================================================== */ + /* These opcodes inspect the next subject character, and sometimes + the previous one as well, but do not have an argument. The variable + clen contains the length of the current character and is zero if we are + at the end of the subject. */ + + /*-----------------------------------------------------------------*/ + case OP_ANY: + if (clen > 0 && !IS_NEWLINE(ptr)) + { + if (ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else + { + ADD_NEW(state_offset + 1, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_ALLANY: + if (clen > 0) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_EODN: + if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen)) + { + if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0) + return PCRE2_ERROR_PARTIAL; + ADD_ACTIVE(state_offset + 1, 0); + } + break; + + /*-----------------------------------------------------------------*/ + case OP_DOLL: + if ((mb->moptions & PCRE2_NOTEOL) == 0) + { + if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) + could_continue = TRUE; + else if (clen == 0 || + ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && + (ptr == end_subject - mb->nllen) + )) + { ADD_ACTIVE(state_offset + 1, 0); } + else if (ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0) + { + reset_could_continue = TRUE; + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else could_continue = partial_newline = TRUE; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_DOLLM: + if ((mb->moptions & PCRE2_NOTEOL) == 0) + { + if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) + could_continue = TRUE; + else if (clen == 0 || + ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) + { ADD_ACTIVE(state_offset + 1, 0); } + else if (ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0) + { + reset_could_continue = TRUE; + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else could_continue = partial_newline = TRUE; + } + } + else if (IS_NEWLINE(ptr)) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + + case OP_DIGIT: + case OP_WHITESPACE: + case OP_WORDCHAR: + if (clen > 0 && c < 256 && + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + if (clen > 0 && (c >= 256 || + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + { + int left_word, right_word; + + if (ptr > start_subject) + { + PCRE2_SPTR temp = ptr - 1; + if (temp < mb->start_used_ptr) mb->start_used_ptr = temp; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (utf) { BACKCHAR(temp); } +#endif + GETCHARTEST(d, temp); +#ifdef SUPPORT_UNICODE + if (codevalue == OP_UCP_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(d); + int category = PRIV(ucp_gentype)[chartype]; + left_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif + left_word = d < 256 && (ctypes[d] & ctype_word) != 0; + } + else left_word = FALSE; + + if (clen > 0) + { + if (ptr >= mb->last_used_ptr) + { + PCRE2_SPTR temp = ptr + 1; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (utf) { FORWARDCHARTEST(temp, mb->end_subject); } +#endif + mb->last_used_ptr = temp; + } +#ifdef SUPPORT_UNICODE + if (codevalue == OP_UCP_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(c); + int category = PRIV(ucp_gentype)[chartype]; + right_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif + right_word = c < 256 && (ctypes[c] & ctype_word) != 0; + } + else right_word = FALSE; + + if ((left_word == right_word) == + (codevalue == OP_NOT_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY)) + { ADD_ACTIVE(state_offset + 1, 0); } + } + break; + + + /*-----------------------------------------------------------------*/ + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. + */ + +#ifdef SUPPORT_UNICODE + case OP_PROP: + case OP_NOTPROP: + if (clen > 0) + { + BOOL OK; + int chartype; + const uint32_t *cp; + const ucd_record * prop = GET_UCD(c); + switch(code[1]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; + break; + + case PT_GC: + OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; + break; + + case PT_PC: + OK = prop->chartype == code[2]; + break; + + case PT_SC: + OK = prop->script == code[2]; + break; + + case PT_SCX: + OK = (prop->script == code[2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0); + break; + + /* These are specials for combination cases. */ + + case PT_ALNUM: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z; + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif + cp = PRIV(ucd_caseless_sets) + code[2]; + for (;;) + { + if (c < *cp) { OK = FALSE; break; } + if (c == *cp++) { OK = TRUE; break; } + } + break; + + case PT_UCNC: + OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[2]) != 0; + break; + + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); } + } + break; +#endif + + + +/* ========================================================================== */ + /* These opcodes likewise inspect the subject character, but have an + argument that is not a data character. It is one of these opcodes: + OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, + OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */ + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + if (d == OP_ANY && ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (count > 0 && codevalue == OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + if (d == OP_ANY && ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + 2, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + if (d == OP_ANY && ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSSTAR) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEEXACT: + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + if (d == OP_ANY && ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + if (d == OP_ANY && ptr + 1 >= mb->end_subject && + (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + c == NLBLOCK->nl[0]) + { + could_continue = partial_newline = TRUE; + } + else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || !IS_NEWLINE(ptr)) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (codevalue == OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + +/* ========================================================================== */ + /* These are virtual opcodes that are used when something like + OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its + argument. It keeps the code above fast for the other cases. The argument + is in the d variable. */ + +#ifdef SUPPORT_UNICODE + case OP_PROP_EXTRA + OP_TYPEPLUS: + case OP_PROP_EXTRA + OP_TYPEMINPLUS: + case OP_PROP_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); } + if (clen > 0) + { + BOOL OK; + int chartype; + const uint32_t *cp; + const ucd_record * prop = GET_UCD(c); + switch(code[2]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; + break; + + case PT_GC: + OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; + break; + + case PT_PC: + OK = prop->chartype == code[3]; + break; + + case PT_SC: + OK = prop->script == code[3]; + break; + + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + + /* These are specials for combination cases. */ + + case PT_ALNUM: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z; + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif + cp = PRIV(ucd_caseless_sets) + code[3]; + for (;;) + { + if (c < *cp) { OK = FALSE; break; } + if (c == *cp++) { OK = TRUE; break; } + } + break; + + case PT_UCNC: + OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEPLUS: + case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: + case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + int ncount = 0; + if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + &ncount); + count++; + ADD_NEW_DATA(-state_offset, count, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEPLUS: + case OP_ANYNL_EXTRA + OP_TYPEMINPLUS: + case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + int ncount = 0; + switch (c) + { + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break; + goto ANYNL01; + + case CHAR_CR: + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; + /* Fall through */ + + ANYNL01: + case CHAR_LF: + if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, ncount); + break; + + default: + break; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEPLUS: + case OP_VSPACE_EXTRA + OP_TYPEMINPLUS: + case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + BOOL OK; + switch (c) + { + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_VSPACE)) + { + if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEPLUS: + case OP_HSPACE_EXTRA + OP_TYPEMINPLUS: + case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + BOOL OK; + switch (c) + { + HSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW_DATA(-state_offset, count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ +#ifdef SUPPORT_UNICODE + case OP_PROP_EXTRA + OP_TYPEQUERY: + case OP_PROP_EXTRA + OP_TYPEMINQUERY: + case OP_PROP_EXTRA + OP_TYPEPOSQUERY: + count = 4; + goto QS1; + + case OP_PROP_EXTRA + OP_TYPESTAR: + case OP_PROP_EXTRA + OP_TYPEMINSTAR: + case OP_PROP_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS1: + + ADD_ACTIVE(state_offset + 4, 0); + if (clen > 0) + { + BOOL OK; + int chartype; + const uint32_t *cp; + const ucd_record * prop = GET_UCD(c); + switch(code[2]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; + break; + + case PT_GC: + OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; + break; + + case PT_PC: + OK = prop->chartype == code[3]; + break; + + case PT_SC: + OK = prop->script == code[3]; + break; + + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + + /* These are specials for combination cases. */ + + case PT_ALNUM: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z; + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif + cp = PRIV(ucd_caseless_sets) + code[3]; + for (;;) + { + if (c < *cp) { OK = FALSE; break; } + if (c == *cp++) { OK = TRUE; break; } + } + break; + + case PT_UCNC: + OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + count, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEQUERY: + case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: + case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS2; + + case OP_EXTUNI_EXTRA + OP_TYPESTAR: + case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: + case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS2: + + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + int ncount = 0; + if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + &ncount); + ADD_NEW_DATA(-(state_offset + count), 0, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEQUERY: + case OP_ANYNL_EXTRA + OP_TYPEMINQUERY: + case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS3; + + case OP_ANYNL_EXTRA + OP_TYPESTAR: + case OP_ANYNL_EXTRA + OP_TYPEMINSTAR: + case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS3: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + int ncount = 0; + switch (c) + { + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break; + goto ANYNL02; + + case CHAR_CR: + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; + /* Fall through */ + + ANYNL02: + case CHAR_LF: + if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount); + break; + + default: + break; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEQUERY: + case OP_VSPACE_EXTRA + OP_TYPEMINQUERY: + case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS4; + + case OP_VSPACE_EXTRA + OP_TYPESTAR: + case OP_VSPACE_EXTRA + OP_TYPEMINSTAR: + case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS4: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + BOOL OK; + switch (c) + { + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + if (OK == (d == OP_VSPACE)) + { + if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEQUERY: + case OP_HSPACE_EXTRA + OP_TYPEMINQUERY: + case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY: + count = 2; + goto QS5; + + case OP_HSPACE_EXTRA + OP_TYPESTAR: + case OP_HSPACE_EXTRA + OP_TYPEMINSTAR: + case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR: + count = 0; + + QS5: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + BOOL OK; + switch (c) + { + HSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR || + codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW_DATA(-(state_offset + (int)count), 0, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ +#ifdef SUPPORT_UNICODE + case OP_PROP_EXTRA + OP_TYPEEXACT: + case OP_PROP_EXTRA + OP_TYPEUPTO: + case OP_PROP_EXTRA + OP_TYPEMINUPTO: + case OP_PROP_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + int chartype; + const uint32_t *cp; + const ucd_record * prop = GET_UCD(c); + switch(code[1 + IMM2_SIZE + 1]) + { + case PT_ANY: + OK = TRUE; + break; + + case PT_LAMP: + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; + break; + + case PT_GC: + OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; + break; + + case PT_PC: + OK = prop->chartype == code[1 + IMM2_SIZE + 2]; + break; + + case PT_SC: + OK = prop->script == code[1 + IMM2_SIZE + 2]; + break; + + case PT_SCX: + OK = (prop->script == code[1 + IMM2_SIZE + 2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), + code[1 + IMM2_SIZE + 2]) != 0); + break; + + /* These are specials for combination cases. */ + + case PT_ALNUM: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z; + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif + cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; + for (;;) + { + if (c < *cp) { OK = FALSE; break; } + if (c == *cp++) { OK = TRUE; break; } + } + break; + + case PT_UCNC: + OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0; + break; + + /* Should never occur, but keep compilers from grumbling. */ + + default: + OK = codevalue != OP_PROP; + break; + } + + if (OK == (d == OP_PROP)) + { + if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEEXACT: + case OP_EXTUNI_EXTRA + OP_TYPEUPTO: + case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: + case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + PCRE2_SPTR nptr; + int ncount = 0; + if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + &ncount); + if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; + if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } + else + { ADD_NEW_DATA(-state_offset, count, ncount); } + } + break; +#endif + + /*-----------------------------------------------------------------*/ + case OP_ANYNL_EXTRA + OP_TYPEEXACT: + case OP_ANYNL_EXTRA + OP_TYPEUPTO: + case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: + case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + int ncount = 0; + switch (c) + { + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break; + goto ANYNL03; + + case CHAR_CR: + if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1; + /* Fall through */ + + ANYNL03: + case CHAR_LF: + if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } + else + { ADD_NEW_DATA(-state_offset, count, ncount); } + break; + + default: + break; + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE_EXTRA + OP_TYPEEXACT: + case OP_VSPACE_EXTRA + OP_TYPEUPTO: + case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: + case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + switch (c) + { + VSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + } + + if (OK == (d == OP_VSPACE)) + { + if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } + else + { ADD_NEW_DATA(-state_offset, count, 0); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE_EXTRA + OP_TYPEEXACT: + case OP_HSPACE_EXTRA + OP_TYPEUPTO: + case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: + case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: + if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + BOOL OK; + switch (c) + { + HSPACE_CASES: + OK = TRUE; + break; + + default: + OK = FALSE; + break; + } + + if (OK == (d == OP_HSPACE)) + { + if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } + else + { ADD_NEW_DATA(-state_offset, count, 0); } + } + } + break; + +/* ========================================================================== */ + /* These opcodes are followed by a character that is usually compared + to the current subject character; it is loaded into d. We still get + here even if there is no subject character, because in some cases zero + repetitions are permitted. */ + + /*-----------------------------------------------------------------*/ + case OP_CHAR: + if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_CHARI: + if (clen == 0) break; + +#ifdef SUPPORT_UNICODE + if (utf_or_ucp) + { + if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else + { + unsigned int othercase; + if (c < 128) + othercase = fcc[c]; + else + othercase = UCD_OTHERCASE(c); + if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } + } + } + else +#endif /* SUPPORT_UNICODE */ + /* Not UTF or UCP mode */ + { + if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) + { ADD_NEW(state_offset + 2, 0); } + } + break; + + +#ifdef SUPPORT_UNICODE + /*-----------------------------------------------------------------*/ + /* This is a tricky one because it can match more than one character. + Find out how many characters to skip, and then set up a negative state + to wait for them to pass before continuing. */ + + case OP_EXTUNI: + if (clen > 0) + { + int ncount = 0; + PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, + end_subject, utf, &ncount); + if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; + ADD_NEW_DATA(-(state_offset + 1), 0, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + /* This is a tricky like EXTUNI because it too can match more than one + character (when CR is followed by LF). In this case, set up a negative + state to wait for one character to pass before continuing. */ + + case OP_ANYNL: + if (clen > 0) switch(c) + { + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break; + /* Fall through */ + + case CHAR_LF: + ADD_NEW(state_offset + 1, 0); + break; + + case CHAR_CR: + if (ptr + 1 >= end_subject) + { + ADD_NEW(state_offset + 1, 0); + if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0) + reset_could_continue = TRUE; + } + else if (UCHAR21TEST(ptr + 1) == CHAR_LF) + { + ADD_NEW_DATA(-(state_offset + 1), 0, 1); + } + else + { + ADD_NEW(state_offset + 1, 0); + } + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_VSPACE: + if (clen > 0) switch(c) + { + VSPACE_CASES: + break; + + default: + ADD_NEW(state_offset + 1, 0); + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_VSPACE: + if (clen > 0) switch(c) + { + VSPACE_CASES: + ADD_NEW(state_offset + 1, 0); + break; + + default: + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_HSPACE: + if (clen > 0) switch(c) + { + HSPACE_CASES: + break; + + default: + ADD_NEW(state_offset + 1, 0); + break; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_HSPACE: + if (clen > 0) switch(c) + { + HSPACE_CASES: + ADD_NEW(state_offset + 1, 0); + break; + + default: + break; + } + break; + + /*-----------------------------------------------------------------*/ + /* Match a negated single character casefully. */ + + case OP_NOT: + if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + /* Match a negated single character caselessly. */ + + case OP_NOTI: + if (clen > 0) + { + uint32_t otherd; +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + if (c != d && c != otherd) + { ADD_NEW(state_offset + dlen + 1, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSPLUSI: + caseless = TRUE; + codevalue -= OP_STARI - OP_STAR; + + /* Fall through */ + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } + if (clen > 0) + { + uint32_t otherd = NOTACHAR; + if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (count > 0 && + (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + caseless = TRUE; + codevalue -= OP_STARI - OP_STAR; + /* Fall through */ + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { + uint32_t otherd = NOTACHAR; + if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset + dlen + 1, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_STARI: + case OP_MINSTARI: + case OP_POSSTARI: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPOSSTARI: + caseless = TRUE; + codevalue -= OP_STARI - OP_STAR; + /* Fall through */ + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPOSSTAR: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { + uint32_t otherd = NOTACHAR; + if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXACTI: + case OP_NOTEXACTI: + caseless = TRUE; + codevalue -= OP_STARI - OP_STAR; + /* Fall through */ + case OP_EXACT: + case OP_NOTEXACT: + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + uint32_t otherd = NOTACHAR; + if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_UPTOI: + case OP_MINUPTOI: + case OP_POSUPTOI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTPOSUPTOI: + caseless = TRUE; + codevalue -= OP_STARI - OP_STAR; + /* Fall through */ + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTPOSUPTO: + ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + uint32_t otherd = NOTACHAR; + if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf_or_ucp && d >= 128) + otherd = UCD_OTHERCASE(d); + else +#endif /* SUPPORT_UNICODE */ + otherd = TABLE_GET(d, fcc, d); + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + if (++count >= (int)GET2(code, 1)) + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + +/* ========================================================================== */ + /* These are the class-handling opcodes */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + BOOL isinclass = FALSE; + int next_state_offset; + PCRE2_SPTR ecode; + + /* For a simple class, there is always just a 32-byte table, and we + can set isinclass from it. */ + + if (codevalue != OP_XCLASS) + { + ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); + if (clen > 0) + { + isinclass = (c > 255)? (codevalue == OP_NCLASS) : + ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0); + } + } + + /* An extended class may have a table or a list of single characters, + ranges, or both, and it may be positive or negative. There's a + function that sorts all this out. */ + + else + { + ecode = code + GET(code, 1); + if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); + } + + /* At this point, isinclass is set for all kinds of class, and ecode + points to the byte after the end of the class. If there is a + quantifier, this is where it will be. */ + + next_state_offset = (int)(ecode - start_code); + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + ADD_ACTIVE(next_state_offset + 1, 0); + if (isinclass) + { + if (*ecode == OP_CRPOSSTAR) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(state_offset, 0); + } + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } + if (isinclass) + { + if (count > 0 && *ecode == OP_CRPOSPLUS) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + count++; + ADD_NEW(state_offset, count); + } + break; + + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + ADD_ACTIVE(next_state_offset + 1, 0); + if (isinclass) + { + if (*ecode == OP_CRPOSQUERY) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + ADD_NEW(next_state_offset + 1, 0); + } + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + count = current_state->count; /* Already matched */ + if (count >= (int)GET2(ecode, 1)) + { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } + if (isinclass) + { + int max = (int)GET2(ecode, 1 + IMM2_SIZE); + + if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1)) + { + active_count--; /* Remove non-match possibility */ + next_active_state--; + } + + if (++count >= max && max != 0) /* Max 0 => no limit */ + { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } + else + { ADD_NEW(state_offset, count); } + } + break; + + default: + if (isinclass) { ADD_NEW(next_state_offset, 0); } + break; + } + } + break; + +/* ========================================================================== */ + /* These are the opcodes for fancy brackets of various kinds. We have + to use recursion in order to handle them. The "always failing" assertion + (?!) is optimised to OP_FAIL when compiling, so we have to support that, + though the other "backtracking verbs" are not supported. */ + + case OP_FAIL: + forced_fail++; /* Count FAILs for multiple states */ + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + PCRE2_SPTR endasscode = code + GET(code, 1); + RWS_anchor *rws = (RWS_anchor *)RWS; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_match( + mb, /* static match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ + local_offsets, /* offset vector */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ + local_workspace, /* workspace vector */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; + + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; + if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) + { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_COND: + case OP_SCOND: + { + int codelink = (int)GET(code, 1); + PCRE2_UCHAR condcode; + + /* Because of the way auto-callout works during compile, a callout item + is inserted between OP_COND and an assertion condition. This does not + happen for the other conditions. */ + + if (code[LINK_SIZE + 1] == OP_CALLOUT + || code[LINK_SIZE + 1] == OP_CALLOUT_STR) + { + PCRE2_SIZE callout_length; + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, + 1 + LINK_SIZE, &callout_length); + if (rrc < 0) return rrc; /* Abandon */ + if (rrc > 0) break; /* Fail this thread */ + code += callout_length; /* Skip callout data */ + } + + condcode = code[LINK_SIZE+1]; + + /* Back reference conditions and duplicate named recursion conditions + are not supported */ + + if (condcode == OP_CREF || condcode == OP_DNCREF || + condcode == OP_DNRREF) + return PCRE2_ERROR_DFA_UCOND; + + /* The DEFINE condition is always false, and the assertion (?!) is + converted to OP_FAIL. */ + + if (condcode == OP_FALSE || condcode == OP_FAIL) + { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } + + /* There is also an always-true condition */ + + else if (condcode == OP_TRUE) + { ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); } + + /* The only supported version of OP_RREF is for the value RREF_ANY, + which means "test if in any recursion". We can't test for specifically + recursed groups. */ + + else if (condcode == OP_RREF) + { + unsigned int value = GET2(code, LINK_SIZE + 2); + if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND; + if (mb->recursive != NULL) + { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } + else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } + } + + /* Otherwise, the condition is an assertion */ + + else + { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + PCRE2_SPTR asscode = code + LINK_SIZE + 1; + PCRE2_SPTR endasscode = asscode + GET(asscode, 1); + RWS_anchor *rws = (RWS_anchor *)RWS; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_match( + mb, /* fixed match data */ + asscode, /* this subexpression's code */ + ptr, /* where we currently are */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ + local_offsets, /* offset vector */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ + local_workspace, /* workspace vector */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; + + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; + if ((rc >= 0) == + (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) + { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); } + else + { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_RECURSE: + { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + RWS_anchor *rws = (RWS_anchor *)RWS; + PCRE2_SPTR callpat = start_code + GET(code, 1); + uint32_t recno = (callpat == mb->start_code)? 0 : + GET2(callpat, 1 + LINK_SIZE); + + if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE; + + /* Check for repeating a recursion without advancing the subject + pointer or last used character. This should catch convoluted mutual + recursions. (Some simple cases are caught at compile time.) */ + + for (dfa_recursion_info *ri = mb->recursive; + ri != NULL; + ri = ri->prevrec) + { + if (recno == ri->group_num && ptr == ri->subject_position && + mb->last_used_ptr == ri->last_used_ptr) + return PCRE2_ERROR_RECURSELOOP; + } + + /* Remember this recursion and where we started it so as to + catch infinite loops. */ + + new_recursive.group_num = recno; + new_recursive.subject_position = ptr; + new_recursive.last_used_ptr = mb->last_used_ptr; + new_recursive.prevrec = mb->recursive; + mb->recursive = &new_recursive; + + rc = internal_dfa_match( + mb, /* fixed match data */ + callpat, /* this subexpression's code */ + ptr, /* where we currently are */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ + local_offsets, /* offset vector */ + RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */ + local_workspace, /* workspace vector */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_RSIZE; + mb->recursive = new_recursive.prevrec; /* Done this recursion */ + + /* Ran out of internal offsets */ + + if (rc == 0) return PCRE2_ERROR_DFA_RECURSE; + + /* For each successful matched substring, set up the next state with a + count of characters to skip before trying it. Note that the count is in + characters, not bytes. */ + + if (rc > 0) + { + for (rc = rc*2 - 2; rc >= 0; rc -= 2) + { + PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc]; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (utf) + { + PCRE2_SPTR p = start_subject + local_offsets[rc]; + PCRE2_SPTR pp = start_subject + local_offsets[rc+1]; + while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; + } +#endif + if (charcount > 0) + { + ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, + (int)(charcount - 1)); + } + else + { + ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); + } + } + } + else if (rc != PCRE2_ERROR_NOMATCH) return rc; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_BRAPOS: + case OP_SBRAPOS: + case OP_CBRAPOS: + case OP_SCBRAPOS: + case OP_BRAPOSZERO: + { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + PCRE2_SIZE charcount, matched_count; + PCRE2_SPTR local_ptr = ptr; + RWS_anchor *rws = (RWS_anchor *)RWS; + BOOL allow_zero; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + + if (codevalue == OP_BRAPOSZERO) + { + allow_zero = TRUE; + codevalue = *(++code); /* Codevalue will be one of above BRAs */ + } + else allow_zero = FALSE; + + /* Loop to match the subpattern as many times as possible as if it were + a complete pattern. */ + + for (matched_count = 0;; matched_count++) + { + rc = internal_dfa_match( + mb, /* fixed match data */ + code, /* this subexpression's code */ + local_ptr, /* where we currently are */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ + local_offsets, /* offset vector */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ + local_workspace, /* workspace vector */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + /* Failed to match */ + + if (rc < 0) + { + if (rc != PCRE2_ERROR_NOMATCH) return rc; + break; + } + + /* Matched: break the loop if zero characters matched. */ + + charcount = local_offsets[1] - local_offsets[0]; + if (charcount == 0) break; + local_ptr += charcount; /* Advance temporary position ptr */ + } + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; + + /* At this point we have matched the subpattern matched_count + times, and local_ptr is pointing to the character after the end of the + last match. */ + + if (matched_count > 0 || allow_zero) + { + PCRE2_SPTR end_subpattern = code; + int next_state_offset; + + do { end_subpattern += GET(end_subpattern, 1); } + while (*end_subpattern == OP_ALT); + next_state_offset = + (int)(end_subpattern - start_code + LINK_SIZE + 1); + + /* Optimization: if there are no more active states, and there + are no new states yet set up, then skip over the subject string + right here, to save looping. Otherwise, set up the new state to swing + into action when the end of the matched substring is reached. */ + + if (i + 1 >= active_count && new_count == 0) + { + ptr = local_ptr; + clen = 0; + ADD_NEW(next_state_offset, 0); + } + else + { + PCRE2_SPTR p = ptr; + PCRE2_SPTR pp = local_ptr; + charcount = (PCRE2_SIZE)(pp - p); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; +#endif + ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1)); + } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_ONCE: + { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + RWS_anchor *rws = (RWS_anchor *)RWS; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + + rc = internal_dfa_match( + mb, /* fixed match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ + (PCRE2_SIZE)(ptr - start_subject), /* start offset */ + local_offsets, /* offset vector */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ + local_workspace, /* workspace vector */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; + + if (rc >= 0) + { + PCRE2_SPTR end_subpattern = code; + PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0]; + int next_state_offset, repeat_state_offset; + + do { end_subpattern += GET(end_subpattern, 1); } + while (*end_subpattern == OP_ALT); + next_state_offset = + (int)(end_subpattern - start_code + LINK_SIZE + 1); + + /* If the end of this subpattern is KETRMAX or KETRMIN, we must + arrange for the repeat state also to be added to the relevant list. + Calculate the offset, or set -1 for no repeat. */ + + repeat_state_offset = (*end_subpattern == OP_KETRMAX || + *end_subpattern == OP_KETRMIN)? + (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1; + + /* If we have matched an empty string, add the next state at the + current character pointer. This is important so that the duplicate + checking kicks in, which is what breaks infinite loops that match an + empty string. */ + + if (charcount == 0) + { + ADD_ACTIVE(next_state_offset, 0); + } + + /* Optimization: if there are no more active states, and there + are no new states yet set up, then skip over the subject string + right here, to save looping. Otherwise, set up the new state to swing + into action when the end of the matched substring is reached. */ + + else if (i + 1 >= active_count && new_count == 0) + { + ptr += charcount; + clen = 0; + ADD_NEW(next_state_offset, 0); + + /* If we are adding a repeat state at the new character position, + we must fudge things so that it is the only current state. + Otherwise, it might be a duplicate of one we processed before, and + that would cause it to be skipped. */ + + if (repeat_state_offset >= 0) + { + next_active_state = active_states; + active_count = 0; + i = -1; + ADD_ACTIVE(repeat_state_offset, 0); + } + } + else + { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (utf) + { + PCRE2_SPTR p = start_subject + local_offsets[0]; + PCRE2_SPTR pp = start_subject + local_offsets[1]; + while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--; + } +#endif + ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1)); + if (repeat_state_offset >= 0) + { ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); } + } + } + else if (rc != PCRE2_ERROR_NOMATCH) return rc; + } + break; + + +/* ========================================================================== */ + /* Handle callouts */ + + case OP_CALLOUT: + case OP_CALLOUT_STR: + { + PCRE2_SIZE callout_length; + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0, + &callout_length); + if (rrc < 0) return rrc; /* Abandon */ + if (rrc == 0) + { ADD_ACTIVE(state_offset + (int)callout_length, 0); } + } + break; + + +/* ========================================================================== */ + default: /* Unsupported opcode */ + return PCRE2_ERROR_DFA_UITEM; + } + + NEXT_ACTIVE_STATE: continue; + + } /* End of loop scanning active states */ + + /* We have finished the processing at the current subject character. If no + new states have been set for the next character, we have found all the + matches that we are going to find. If partial matching has been requested, + check for appropriate conditions. + + The "forced_ fail" variable counts the number of (*F) encountered for the + character. If it is equal to the original active_count (saved in + workspace[1]) it means that (*F) was found on every active state. In this + case we don't want to give a partial match. + + The "could_continue" variable is true if a state could have continued but + for the fact that the end of the subject was reached. */ + + if (new_count <= 0) + { + if (could_continue && /* Some could go on, and */ + forced_fail != workspace[1] && /* Not all forced fail & */ + ( /* either... */ + (mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */ + || /* or... */ + ((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */ + match_count < 0) /* no matches */ + ) && /* And... */ + ( + partial_newline || /* Either partial NL */ + ( /* or ... */ + ptr >= end_subject && /* End of subject and */ + ( /* either */ + ptr > mb->start_used_ptr || /* Inspected non-empty string */ + mb->allowemptypartial /* or pattern has lookbehind */ + ) /* or could match empty */ + ) + )) + match_count = PCRE2_ERROR_PARTIAL; + break; /* Exit from loop along the subject string */ + } + + /* One or more states are active for the next character. */ + + ptr += clen; /* Advance to next subject character */ + } /* Loop to move along the subject string */ + +/* Control gets here from "break" a few lines above. If we have a match and +PCRE2_ENDANCHORED is set, the match fails. */ + +if (match_count >= 0 && + ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 && + ptr < end_subject) + match_count = PCRE2_ERROR_NOMATCH; + +return match_count; +} + + + +/************************************************* +* Match a pattern using the DFA algorithm * +*************************************************/ + +/* This function matches a compiled pattern to a subject string, using the +alternate matching algorithm that finds all matches at once. + +Arguments: + code points to the compiled pattern + subject subject string + length length of subject string + startoffset where to start matching in the subject + options option bits + match_data points to a match data structure + gcontext points to a match context + workspace pointer to workspace + wscount size of workspace + +Returns: > 0 => number of match offset pairs placed in offsets + = 0 => offsets overflowed; longest matches are present + -1 => failed to match + < -1 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount) +{ +int rc; +int was_zero_terminated = 0; + +const pcre2_real_code *re = (const pcre2_real_code *)code; + +PCRE2_SPTR start_match; +PCRE2_SPTR end_subject; +PCRE2_SPTR bumpalong_limit; +PCRE2_SPTR req_cu_ptr; + +BOOL utf, anchored, startline, firstline; +BOOL has_first_cu = FALSE; +BOOL has_req_cu = FALSE; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_SPTR memchr_found_first_cu = NULL; +PCRE2_SPTR memchr_found_first_cu2 = NULL; +#endif + +PCRE2_UCHAR first_cu = 0; +PCRE2_UCHAR first_cu2 = 0; +PCRE2_UCHAR req_cu = 0; +PCRE2_UCHAR req_cu2 = 0; + +const uint8_t *start_bits = NULL; + +/* We need to have mb pointing to a match block, because the IS_NEWLINE macro +is used below, and it expects NLBLOCK to be defined as a pointer. */ + +pcre2_callout_block cb; +dfa_match_block actual_match_block; +dfa_match_block *mb = &actual_match_block; + +/* Set up a starting block of memory for use during recursive calls to +internal_dfa_match(). By putting this on the stack, it minimizes resource use +in the case when it is not needed. If this is too small, more memory is +obtained from the heap. At the start of each block is an anchor structure.*/ + +int base_recursion_workspace[RWS_BASE_SIZE]; +RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace; +rws->next = NULL; +rws->size = RWS_BASE_SIZE; +rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; + +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; + +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } + +if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; +if (start_offset > length) return PCRE2_ERROR_BADOFFSET; + +/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same +time. */ + +if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && + ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) + return PCRE2_ERROR_BADOPTION; + +/* Invalid UTF support is not available for DFA matching. */ + +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + return PCRE2_ERROR_DFA_UINVALID_UTF; + +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE2_ERROR_BADMAGIC. */ + +if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + +/* Check the code unit width. */ + +if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) + return PCRE2_ERROR_BADMODE; + +/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the +options variable for this function. Users of PCRE2 who are not calling the +function directly would like to have a way of setting these flags, in the same +way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and +(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be +transferred to the options for this function. The bits are guaranteed to be +adjacent, but do not have the same values. This bit of Boolean trickery assumes +that the match-time bits are not more significant than the flag bits. If by +accident this is not the case, a compile-time division by zero error will +occur. */ + +#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) +#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) +options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); +#undef FF +#undef OO + +/* If restarting after a partial match, do some sanity checks on the contents +of the workspace. */ + +if ((options & PCRE2_DFA_RESTART) != 0) + { + if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || + workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK)) + return PCRE2_ERROR_DFA_BADRESTART; + } + +/* Set some local values */ + +utf = (re->overall_options & PCRE2_UTF) != 0; +start_match = subject + start_offset; +end_subject = subject + length; +req_cu_ptr = start_match - 1; +anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 || + (re->overall_options & PCRE2_ANCHORED) != 0; + +/* The "must be at the start of a line" flags are used in a loop when finding +where to start. */ + +startline = (re->flags & PCRE2_STARTLINE) != 0; +firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0; +bumpalong_limit = end_subject; + +/* Initialize and set up the fixed fields in the callout block, with a pointer +in the match block. */ + +mb->cb = &cb; +cb.version = 2; +cb.subject = subject; +cb.subject_length = (PCRE2_SIZE)(end_subject - subject); +cb.callout_flags = 0; +cb.capture_top = 1; /* No capture support */ +cb.capture_last = 0; +cb.mark = NULL; /* No (*MARK) support */ + +/* Get data from the match context, if present, and fill in the remaining +fields in the match block. It is an error to set an offset limit without +setting the flag at compile time. */ + +if (mcontext == NULL) + { + mb->callout = NULL; + mb->memctl = re->memctl; + mb->match_limit = PRIV(default_match_context).match_limit; + mb->match_limit_depth = PRIV(default_match_context).depth_limit; + mb->heap_limit = PRIV(default_match_context).heap_limit; + } +else + { + if (mcontext->offset_limit != PCRE2_UNSET) + { + if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + bumpalong_limit = subject + mcontext->offset_limit; + } + mb->callout = mcontext->callout; + mb->callout_data = mcontext->callout_data; + mb->memctl = mcontext->memctl; + mb->match_limit = mcontext->match_limit; + mb->match_limit_depth = mcontext->depth_limit; + mb->heap_limit = mcontext->heap_limit; + } + +if (mb->match_limit > re->limit_match) + mb->match_limit = re->limit_match; + +if (mb->match_limit_depth > re->limit_depth) + mb->match_limit_depth = re->limit_depth; + +if (mb->heap_limit > re->limit_heap) + mb->heap_limit = re->limit_heap; + +mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + + re->name_count * re->name_entry_size; +mb->tables = re->tables; +mb->start_subject = subject; +mb->end_subject = end_subject; +mb->start_offset = start_offset; +mb->allowemptypartial = (re->max_lookbehind > 0) || + (re->flags & PCRE2_MATCH_EMPTY) != 0; +mb->moptions = options; +mb->poptions = re->overall_options; +mb->match_call_count = 0; +mb->heap_used = 0; + +/* Process the \R and newline settings. */ + +mb->bsr_convention = re->bsr_convention; +mb->nltype = NLTYPE_FIXED; +switch(re->newline_convention) + { + case PCRE2_NEWLINE_CR: + mb->nllen = 1; + mb->nl[0] = CHAR_CR; + break; + + case PCRE2_NEWLINE_LF: + mb->nllen = 1; + mb->nl[0] = CHAR_NL; + break; + + case PCRE2_NEWLINE_NUL: + mb->nllen = 1; + mb->nl[0] = CHAR_NUL; + break; + + case PCRE2_NEWLINE_CRLF: + mb->nllen = 2; + mb->nl[0] = CHAR_CR; + mb->nl[1] = CHAR_NL; + break; + + case PCRE2_NEWLINE_ANY: + mb->nltype = NLTYPE_ANY; + break; + + case PCRE2_NEWLINE_ANYCRLF: + mb->nltype = NLTYPE_ANYCRLF; + break; + + default: return PCRE2_ERROR_INTERNAL; + } + +/* Check a UTF string for validity if required. For 8-bit and 16-bit strings, +we must also check that a starting offset does not point into the middle of a +multiunit character. We check only the portion of the subject that is going to +be inspected during matching - from the offset minus the maximum back reference +to the given length. This saves time when a small part of a large subject is +being matched by the use of a starting offset. Note that the maximum lookbehind +is a number of characters, not code units. */ + +#ifdef SUPPORT_UNICODE +if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) + { + PCRE2_SPTR check_subject = start_match; /* start_match includes offset */ + + if (start_offset > 0) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + unsigned int i; + if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + return PCRE2_ERROR_BADUTFOFFSET; + for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) + { + check_subject--; + while (check_subject > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*check_subject & 0xc0) == 0x80) +#else /* 16-bit */ + (*check_subject & 0xfc00) == 0xdc00) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + check_subject--; + } +#else /* In the 32-bit library, one code unit equals one character. */ + check_subject -= re->max_lookbehind; + if (check_subject < subject) check_subject = subject; +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + } + + /* Validate the relevant portion of the subject. After an error, adjust the + offset to be an absolute offset in the whole string. */ + + match_data->rc = PRIV(valid_utf)(check_subject, + length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar)); + if (match_data->rc != 0) + { + match_data->startchar += (PCRE2_SIZE)(check_subject - subject); + return match_data->rc; + } + } +#endif /* SUPPORT_UNICODE */ + +/* Set up the first code unit to match, if available. If there's no first code +unit there may be a bitmap of possible first characters. */ + +if ((re->flags & PCRE2_FIRSTSET) != 0) + { + has_first_cu = TRUE; + first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit); + if ((re->flags & PCRE2_FIRSTCASELESS) != 0) + { + first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0) + first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu); +#else + if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0)) + first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } +else + if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0) + start_bits = re->start_bitmap; + +/* There may be a "last known required code unit" set. */ + +if ((re->flags & PCRE2_LASTSET) != 0) + { + has_req_cu = TRUE; + req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit); + if ((re->flags & PCRE2_LASTCASELESS) != 0) + { + req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0) + req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu); +#else + if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0)) + req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } + +/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, +free the memory that was obtained. */ + +if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + { + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + } + +/* Fill in fields that are always returned in the match data. */ + +match_data->code = re; +match_data->subject = NULL; /* Default for no match */ +match_data->mark = NULL; +match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER; + +/* Call the main matching function, looping for a non-anchored regex after a +failed match. If not restarting, perform certain optimizations at the start of +a match. */ + +for (;;) + { + /* ----------------- Start of match optimizations ---------------- */ + + /* There are some optimizations that avoid running the match if a known + starting point is not found, or if a known later code unit is not present. + However, there is an option (settable at compile time) that disables + these, for testing and for ensuring that all callouts do actually occur. + The optimizations must also be avoided when restarting a DFA match. */ + + if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && + (options & PCRE2_DFA_RESTART) == 0) + { + /* If firstline is TRUE, the start of the match is constrained to the first + line of a multiline string. That is, the match must be before or at the + first newline following the start of matching. Temporarily adjust + end_subject so that we stop the optimization scans for a first code unit + immediately after the first character of a newline (the first code unit can + legitimately be a newline). If the match fails at the newline, later code + breaks this loop. */ + + if (firstline) + { + PCRE2_SPTR t = start_match; +#ifdef SUPPORT_UNICODE + if (utf) + { + while (t < end_subject && !IS_NEWLINE(t)) + { + t++; + ACROSSCHAR(t < end_subject, t, t++); + } + } + else +#endif + while (t < end_subject && !IS_NEWLINE(t)) t++; + end_subject = t; + } + + /* Anchored: check the first code unit if one is recorded. This may seem + pointless but it can help in detecting a no match case without scanning for + the required code unit. */ + + if (anchored) + { + if (has_first_cu || start_bits != NULL) + { + BOOL ok = start_match < end_subject; + if (ok) + { + PCRE2_UCHAR c = UCHAR21TEST(start_match); + ok = has_first_cu && (c == first_cu || c == first_cu2); + if (!ok && start_bits != NULL) + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + ok = (start_bits[c/8] & (1u << (c&7))) != 0; + } + } + if (!ok) break; + } + } + + /* Not anchored. Advance to a unique first code unit if there is one. */ + + else + { + if (has_first_cu) + { + if (first_cu != first_cu2) /* Caseless */ + { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + PCRE2_UCHAR smc; + while (start_match < end_subject && + (smc = UCHAR21TEST(start_match)) != first_cu && + smc != first_cu2) + start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ + + PCRE2_SPTR pp1 = NULL; + PCRE2_SPTR pp2 = NULL; + PCRE2_SIZE searchlength = end_subject - start_match; + + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ + + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) + { + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; + } + + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; + + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) + { + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; + } + + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + + if (pp1 == NULL) + start_match = (pp2 == NULL)? end_subject : pp2; + else + start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; + +#endif /* 8-bit handling */ + } + + /* The caseful case is much simpler. */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (start_match < end_subject && UCHAR21TEST(start_match) != + first_cu) + start_match++; +#else /* 8-bit code units */ + start_match = memchr(start_match, first_cu, end_subject - start_match); + if (start_match == NULL) start_match = end_subject; +#endif + } + + /* If we can't find the required code unit, having reached the true end + of the subject, break the bumpalong loop, to force a match failure, + except when doing partial matching, when we let the next cycle run at + the end of the subject. To see why, consider the pattern /(?<=abc)def/, + which partially matches "abc", even though the string does not contain + the starting character "d". If we have not reached the true end of the + subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified) + we also let the cycle run, because the matching string is legitimately + allowed to start with the first code unit of a newline. */ + + if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 && + start_match >= mb->end_subject) + break; + } + + /* If there's no first code unit, advance to just after a linebreak for a + multiline match if required. */ + + else if (startline) + { + if (start_match > mb->start_subject + start_offset) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + { + start_match++; + ACROSSCHAR(start_match < end_subject, start_match, start_match++); + } + } + else +#endif + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + start_match++; + + /* If we have just passed a CR and the newline option is ANY or + ANYCRLF, and we are now at a LF, advance the match position by one + more code unit. */ + + if (start_match[-1] == CHAR_CR && + (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) && + start_match < end_subject && + UCHAR21TEST(start_match) == CHAR_NL) + start_match++; + } + } + + /* If there's no first code unit or a requirement for a multiline line + start, advance to a non-unique first code unit if any have been + identified. The bitmap contains only 256 bits. When code units are 16 or + 32 bits wide, all code units greater than 254 set the 255 bit. */ + + else if (start_bits != NULL) + { + while (start_match < end_subject) + { + uint32_t c = UCHAR21TEST(start_match); +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + if ((start_bits[c/8] & (1u << (c&7))) != 0) break; + start_match++; + } + + /* See comment above in first_cu checking about the next line. */ + + if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 && + start_match >= mb->end_subject) + break; + } + } /* End of first code unit handling */ + + /* Restore fudged end_subject */ + + end_subject = mb->end_subject; + + /* The following two optimizations are disabled for partial matching. */ + + if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0) + { + PCRE2_SPTR p; + + /* The minimum matching length is a lower bound; no actual string of that + length may actually match the pattern. Although the value is, strictly, + in characters, we treat it as code units to avoid spending too much time + in this optimization. */ + + if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT; + + /* If req_cu is set, we know that that code unit must appear in the + subject for the match to succeed. If the first code unit is set, req_cu + must be later in the subject; otherwise the test starts at the match + point. This optimization can save a huge amount of backtracking in + patterns with nested unlimited repeats that aren't going to match. + Writing separate code for cased/caseless versions makes it go faster, as + does using an autoincrement and backing off on a match. As in the case of + the first code unit, using memchr() in the 8-bit library gives a big + speed up. Unlike the first_cu check above, we do not need to call + memchr() twice in the caseless case because we only need to check for the + presence of the character in either case, not find the first occurrence. + + The search can be skipped if the code unit was found later than the + current starting point in a previous iteration of the bumpalong loop. + + HOWEVER: when the subject string is very, very long, searching to its end + can take a long time, and give bad performance on quite ordinary + patterns. This showed up when somebody was matching something like + /^\d+C/ on a 32-megabyte string... so we don't do this when the string is + sufficiently long, but it's worth searching a lot more for unanchored + patterns. */ + + p = start_match + (has_first_cu? 1:0); + if (has_req_cu && p > req_cu_ptr) + { + PCRE2_SIZE check_length = end_subject - start_match; + + if (check_length < REQ_CU_MAX || + (!anchored && check_length < REQ_CU_MAX * 1000)) + { + if (req_cu != req_cu2) /* Caseless */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + uint32_t pp = UCHAR21INCTEST(p); + if (pp == req_cu || pp == req_cu2) { p--; break; } + } +#else /* 8-bit code units */ + PCRE2_SPTR pp = p; + p = memchr(pp, req_cu, end_subject - pp); + if (p == NULL) + { + p = memchr(pp, req_cu2, end_subject - pp); + if (p == NULL) p = end_subject; + } +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ + } + + /* The caseful case */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + if (UCHAR21INCTEST(p) == req_cu) { p--; break; } + } + +#else /* 8-bit code units */ + p = memchr(p, req_cu, end_subject - p); + if (p == NULL) p = end_subject; +#endif + } + + /* If we can't find the required code unit, break the matching loop, + forcing a match failure. */ + + if (p >= end_subject) break; + + /* If we have found the required code unit, save the point where we + found it, so that we don't search again next time round the loop if + the start hasn't passed this code unit yet. */ + + req_cu_ptr = p; + } + } + } + } + + /* ------------ End of start of match optimizations ------------ */ + + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) break; + + /* OK, now we can do the business */ + + mb->start_used_ptr = start_match; + mb->last_used_ptr = start_match; + mb->recursive = NULL; + + rc = internal_dfa_match( + mb, /* fixed match data */ + mb->start_code, /* this subexpression's code */ + start_match, /* where we currently are */ + start_offset, /* start offset in subject */ + match_data->ovector, /* offset vector */ + (uint32_t)match_data->oveccount * 2, /* actual size of same */ + workspace, /* workspace vector */ + (int)wscount, /* size of same */ + 0, /* function recurse level */ + base_recursion_workspace); /* initial workspace for recursion */ + + /* Anything other than "no match" means we are done, always; otherwise, carry + on only if not anchored. */ + + if (rc != PCRE2_ERROR_NOMATCH || anchored) + { + if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0) + { + match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject); + match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject); + } + match_data->subject_length = length; + match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); + match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject); + match_data->startchar = (PCRE2_SIZE)(start_match - subject); + match_data->rc = rc; + + if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy((void *)match_data->subject, subject, length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } + else + { + if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject; + } + goto EXIT; + } + + /* Advance to the next subject character unless we are at the end of a line + and firstline is set. */ + + if (firstline && IS_NEWLINE(start_match)) break; + start_match++; +#ifdef SUPPORT_UNICODE + if (utf) + { + ACROSSCHAR(start_match < end_subject, start_match, start_match++); + } +#endif + if (start_match > end_subject) break; + + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF + or ANY or ANYCRLF, advance the match position by one more character. */ + + if (UCHAR21TEST(start_match - 1) == CHAR_CR && + start_match < end_subject && + UCHAR21TEST(start_match) == CHAR_NL && + (re->flags & PCRE2_HASCRORLF) == 0 && + (mb->nltype == NLTYPE_ANY || + mb->nltype == NLTYPE_ANYCRLF || + mb->nllen == 2)) + start_match++; + + } /* "Bumpalong" loop */ + +NOMATCH_EXIT: +rc = PCRE2_ERROR_NOMATCH; + +EXIT: +while (rws->next != NULL) + { + RWS_anchor *next = rws->next; + rws->next = next->next; + mb->memctl.free(next, mb->memctl.memory_data); + } + +return rc; +} + +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + +/* End of pcre2_dfa_match.c */ diff --git a/src/pcre2_dftables.c b/src/pcre2_dftables.c new file mode 100644 index 0000000..0f9aedf --- /dev/null +++ b/src/pcre2_dftables.c @@ -0,0 +1,297 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This is a freestanding support program to generate a file containing +character tables for PCRE2. The tables are built using the pcre2_maketables() +function, which is part of the PCRE2 API. By default, the system's "C" locale +is used rather than what the building user happens to have set, but the -L +option can be used to select the current locale from the LC_ALL environment +variable. By default, the tables are written in source form, but if -b is +given, they are written in binary. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#define PCRE2_DFTABLES /* for pcre2_internal.h, pcre2_maketables.c */ + +#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */ +#include "pcre2_internal.h" + +#include "pcre2_maketables.c" + + +static const char *classlist[] = + { + "space", "xdigit", "digit", "upper", "lower", + "word", "graph", "print", "punct", "cntrl" + }; + + + +/************************************************* +* Usage * +*************************************************/ + +static void +usage(void) +{ +(void)fprintf(stderr, + "Usage: pcre2_dftables [options] \n" + " -b Write output in binary (default is source code)\n" + " -L Use locale from LC_ALL (default is \"C\" locale)\n" + ); +} + + + +/************************************************* +* Entry point * +*************************************************/ + +int main(int argc, char **argv) +{ +FILE *f; +int i; +int nclass = 0; +BOOL binary = FALSE; +char *env = (char *)"C"; +const uint8_t *tables; +const uint8_t *base_of_tables; + +/* Process options */ + +for (i = 1; i < argc; i++) + { + char *arg = argv[i]; + if (*arg != '-') break; + + if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0) + { + usage(); + return 0; + } + + else if (strcmp(arg, "-L") == 0) + { + if (setlocale(LC_ALL, "") == NULL) + { + (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n"); + return 1; + } + env = getenv("LC_ALL"); + } + + else if (strcmp(arg, "-b") == 0) + binary = TRUE; + + else + { + (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg); + return 1; + } + } + +if (i != argc - 1) + { + (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n"); + return 1; + } + +/* Make the tables */ + +tables = maketables(); +base_of_tables = tables; + +f = fopen(argv[i], "wb"); +if (f == NULL) + { + fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]); + return 1; + } + +/* If -b was specified, we write the tables in binary. */ + +if (binary) + { + int yield = 0; + size_t len = fwrite(tables, 1, TABLES_LENGTH, f); + if (len != TABLES_LENGTH) + { + (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d " + "instead of %d\n", (int)len, TABLES_LENGTH); + yield = 1; + } + fclose(f); + free((void *)base_of_tables); + return yield; + } + +/* Write the tables as source code for inclusion in the PCRE2 library. There +are several fprintf() calls here, because gcc in pedantic mode complains about +the very long string otherwise. */ + +(void)fprintf(f, + "/*************************************************\n" + "* Perl-Compatible Regular Expressions *\n" + "*************************************************/\n\n" + "/* This file was automatically written by the pcre2_dftables auxiliary\n" + "program. It contains character tables that are used when no external\n" + "tables are passed to PCRE2 by the application that calls it. The tables\n" + "are used only for characters whose code values are less than 256, and\n" + "only relevant if not in UCP mode. */\n\n"); + +(void)fprintf(f, + "/* This set of tables was written in the %s locale. */\n\n", env); + +(void)fprintf(f, + "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n" + "to build alternative versions of this file. This is necessary if you are\n" + "running in an EBCDIC environment, or if you want to default to a different\n" + "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n" + "these tables in the \"C\" locale by default. This happens automatically if\n" + "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n" + "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n" + "locale. */\n\n"); + +/* Force config.h in z/OS */ + +#if defined NATIVE_ZOS +(void)fprintf(f, + "/* For z/OS, config.h is forced */\n" + "#ifndef HAVE_CONFIG_H\n" + "#define HAVE_CONFIG_H 1\n" + "#endif\n\n"); +#endif + +(void)fprintf(f, + "#ifdef HAVE_CONFIG_H\n" + "#include \"config.h\"\n" + "#endif\n\n" + "#include \"pcre2_internal.h\"\n\n"); + +(void)fprintf(f, + "const uint8_t PRIV(default_tables)[] = {\n\n" + "/* This table is a lower casing table. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, "/* This table is a case flipping table. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, + "/* This table contains bit maps for various character classes. Each map is 32\n" + "bytes long and the bits run from the least significant end of each byte. The\n" + "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n" + "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < cbit_length; i++) + { + if ((i & 7) == 0 && i != 0) + { + if ((i & 31) == 0) (void)fprintf(f, "\n"); + if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]); + (void)fprintf(f, "\n "); + } + (void)fprintf(f, "0x%02x", *tables++); + if (i != cbit_length - 1) (void)fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, + "/* This table identifies various classes of character by individual bits:\n" + " 0x%02x white space character\n" + " 0x%02x letter\n" + " 0x%02x lower case letter\n" + " 0x%02x decimal digit\n" + " 0x%02x word (alphanumeric or '_')\n*/\n\n", + ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) + { + (void)fprintf(f, " /* "); + if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); + else (void)fprintf(f, "%3d-", i-8); + if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); + else (void)fprintf(f, "%3d", i-1); + (void)fprintf(f, " */\n "); + } + (void)fprintf(f, "0x%02x", *tables++); + if (i != 255) (void)fprintf(f, ","); + } + +(void)fprintf(f, "};/* "); +if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); + else (void)fprintf(f, "%3d-", i-8); +if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); + else (void)fprintf(f, "%3d", i-1); +(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n"); + +fclose(f); +free((void *)base_of_tables); +return 0; +} + +/* End of pcre2_dftables.c */ diff --git a/src/pcre2_error.c b/src/pcre2_error.c new file mode 100644 index 0000000..7fa997a --- /dev/null +++ b/src/pcre2_error.c @@ -0,0 +1,345 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +#define STRING(a) # a +#define XSTRING(s) STRING(s) + +/* The texts of compile-time error messages. Compile-time error numbers start +at COMPILE_ERROR_BASE (100). + +This used to be a table of strings, but in order to reduce the number of +relocations needed when a shared library is loaded dynamically, it is now one +long string. We cannot use a table of offsets, because the lengths of inserts +such as XSTRING(MAX_NAME_SIZE) are not known. Instead, +pcre2_get_error_message() counts through to the one it wants - this isn't a +performance issue because these strings are used only when there is an error. + +Each substring ends with \0 to insert a null character. This includes the final +substring, so that the whole string ends with \0\0, which can be detected when +counting through. */ + +static const unsigned char compile_error_texts[] = + "no error\0" + "\\ at end of pattern\0" + "\\c at end of pattern\0" + "unrecognized character follows \\\0" + "numbers out of order in {} quantifier\0" + /* 5 */ + "number too big in {} quantifier\0" + "missing terminating ] for character class\0" + "escape sequence is invalid in character class\0" + "range out of order in character class\0" + "quantifier does not follow a repeatable item\0" + /* 10 */ + "internal error: unexpected repeat\0" + "unrecognized character after (? or (?-\0" + "POSIX named classes are supported only within a class\0" + "POSIX collating elements are not supported\0" + "missing closing parenthesis\0" + /* 15 */ + "reference to non-existent subpattern\0" + "pattern passed as NULL with non-zero length\0" + "unrecognised compile-time option bit(s)\0" + "missing ) after (?# comment\0" + "parentheses are too deeply nested\0" + /* 20 */ + "regular expression is too large\0" + "failed to allocate heap memory\0" + "unmatched closing parenthesis\0" + "internal error: code overflow\0" + "missing closing parenthesis for condition\0" + /* 25 */ + "length of lookbehind assertion is not limited\0" + "a relative value of zero is not allowed\0" + "conditional subpattern contains more than two branches\0" + "assertion expected after (?( or (?(?C)\0" + "digit expected after (?+ or (?-\0" + /* 30 */ + "unknown POSIX class name\0" + "internal error in pcre2_study(): should not occur\0" + "this version of PCRE2 does not have Unicode support\0" + "parentheses are too deeply nested (stack check)\0" + "character code point value in \\x{} or \\o{} is too large\0" + /* 35 */ + "lookbehind is too complicated\0" + "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" + "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" + "number after (?C is greater than 255\0" + "closing parenthesis for (?C expected\0" + /* 40 */ + "invalid escape sequence in (*VERB) name\0" + "unrecognized character after (?P\0" + "syntax error in subpattern name (missing terminator?)\0" + "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" + "subpattern name must start with a non-digit\0" + /* 45 */ + "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" + "malformed \\P or \\p sequence\0" + "unknown property after \\P or \\p\0" + "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" + "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" + /* 50 */ + "invalid range in character class\0" + "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" + "internal error: overran compiling workspace\0" + "internal error: previously-checked referenced subpattern not found\0" + "DEFINE subpattern contains more than one branch\0" + /* 55 */ + "missing opening brace after \\o\0" + "internal error: unknown newline setting\0" + "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" + "(?R (recursive pattern call) must be followed by a closing parenthesis\0" + /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ + "obsolete error (should not occur)\0" /* Was the above */ + /* 60 */ + "(*VERB) not recognized or malformed\0" + "subpattern number is too big\0" + "subpattern name expected\0" + "internal error: parsed pattern overflow\0" + "non-octal character in \\o{} (closing brace missing?)\0" + /* 65 */ + "different names for subpatterns of the same number are not allowed\0" + "(*MARK) must have an argument\0" + "non-hex character in \\x{} (closing brace missing?)\0" +#ifndef EBCDIC + "\\c must be followed by a printable ASCII character\0" +#else + "\\c must be followed by a letter or one of [\\]^_?\0" +#endif + "\\k is not followed by a braced, angle-bracketed, or quoted name\0" + /* 70 */ + "internal error: unknown meta code in check_lookbehinds()\0" + "\\N is not supported in a class\0" + "callout string is too long\0" + "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" + "using UTF is disabled by the application\0" + /* 75 */ + "using UCP is disabled by the application\0" + "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" + "character code point value in \\u.... sequence is too large\0" + "digits missing in \\x{} or \\o{} or \\N{U+}\0" + "syntax error or number too big in (?(VERSION condition\0" + /* 80 */ + "internal error: unknown opcode in auto_possessify()\0" + "missing terminating delimiter for callout with string argument\0" + "unrecognized string delimiter follows (?C\0" + "using \\C is disabled by the application\0" + "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" + /* 85 */ + "using \\C is disabled in this PCRE2 library\0" + "regular expression is too complicated\0" + "lookbehind assertion is too long\0" + "pattern string is longer than the limit set by the application\0" + "internal error: unknown code in parsed pattern\0" + /* 90 */ + "internal error: bad code value in parsed_skip()\0" + "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" + "invalid option bits with PCRE2_LITERAL\0" + "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" + "invalid hyphen in option setting\0" + /* 95 */ + "(*alpha_assertion) not recognized\0" + "script runs require Unicode support, which this version of PCRE2 does not have\0" + "too many capturing groups (maximum 65535)\0" + "atomic assertion expected after (?( or (?(?C)\0" + "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" + /* 100 */ + "branch too long in variable-length lookbehind assertion\0" + "compiled pattern would be longer than the limit set by the application\0" + ; + +/* Match-time and UTF error texts are in the same format. */ + +static const unsigned char match_error_texts[] = + "no error\0" + "no match\0" + "partial match\0" + "UTF-8 error: 1 byte missing at end\0" + "UTF-8 error: 2 bytes missing at end\0" + /* 5 */ + "UTF-8 error: 3 bytes missing at end\0" + "UTF-8 error: 4 bytes missing at end\0" + "UTF-8 error: 5 bytes missing at end\0" + "UTF-8 error: byte 2 top bits not 0x80\0" + "UTF-8 error: byte 3 top bits not 0x80\0" + /* 10 */ + "UTF-8 error: byte 4 top bits not 0x80\0" + "UTF-8 error: byte 5 top bits not 0x80\0" + "UTF-8 error: byte 6 top bits not 0x80\0" + "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" + "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" + /* 15 */ + "UTF-8 error: code points greater than 0x10ffff are not defined\0" + "UTF-8 error: code points 0xd800-0xdfff are not defined\0" + "UTF-8 error: overlong 2-byte sequence\0" + "UTF-8 error: overlong 3-byte sequence\0" + "UTF-8 error: overlong 4-byte sequence\0" + /* 20 */ + "UTF-8 error: overlong 5-byte sequence\0" + "UTF-8 error: overlong 6-byte sequence\0" + "UTF-8 error: isolated byte with 0x80 bit set\0" + "UTF-8 error: illegal byte (0xfe or 0xff)\0" + "UTF-16 error: missing low surrogate at end\0" + /* 25 */ + "UTF-16 error: invalid low surrogate\0" + "UTF-16 error: isolated low surrogate\0" + "UTF-32 error: code points 0xd800-0xdfff are not defined\0" + "UTF-32 error: code points greater than 0x10ffff are not defined\0" + "bad data value\0" + /* 30 */ + "patterns do not all use the same character tables\0" + "magic number missing\0" + "pattern compiled in wrong mode: 8/16/32-bit error\0" + "bad offset value\0" + "bad option value\0" + /* 35 */ + "invalid replacement string\0" + "bad offset into UTF string\0" + "callout error code\0" /* Never returned by PCRE2 itself */ + "invalid data in workspace for DFA restart\0" + "too much recursion for DFA matching\0" + /* 40 */ + "backreference condition or recursion test is not supported for DFA matching\0" + "function is not supported for DFA matching\0" + "pattern contains an item that is not supported for DFA matching\0" + "workspace size exceeded in DFA matching\0" + "internal error - pattern overwritten?\0" + /* 45 */ + "bad JIT option\0" + "JIT stack limit reached\0" + "match limit exceeded\0" + "no more memory\0" + "unknown substring\0" + /* 50 */ + "non-unique substring name\0" + "NULL argument passed with non-zero length\0" + "nested recursion at the same subject position\0" + "matching depth limit exceeded\0" + "requested value is not available\0" + /* 55 */ + "requested value is not set\0" + "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" + "bad escape sequence in replacement string\0" + "expected closing curly bracket in replacement string\0" + "bad substitution in replacement string\0" + /* 60 */ + "match with end before start or start moved backwards is not supported\0" + "too many replacements (more than INT_MAX)\0" + "bad serialized data\0" + "heap limit exceeded\0" + "invalid syntax\0" + /* 65 */ + "internal error - duplicate substitution match\0" + "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" + "INTERNAL ERROR: invalid substring offset\0" + ; + + +/************************************************* +* Return error message * +*************************************************/ + +/* This function copies an error message into a buffer whose units are of an +appropriate width. Error numbers are positive for compile-time errors, and +negative for match-time errors (except for UTF errors), but the numbers are all +distinct. + +Arguments: + enumber error number + buffer where to put the message (zero terminated) + size size of the buffer in code units + +Returns: length of message if all is well + negative on error +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) +{ +const unsigned char *message; +PCRE2_SIZE i; +int n; + +if (size == 0) return PCRE2_ERROR_NOMEMORY; + +if (enumber >= COMPILE_ERROR_BASE) /* Compile error */ + { + message = compile_error_texts; + n = enumber - COMPILE_ERROR_BASE; + } +else if (enumber < 0) /* Match or UTF error */ + { + message = match_error_texts; + n = -enumber; + } +else /* Invalid error number */ + { + message = (unsigned char *)"\0"; /* Empty message list */ + n = 1; + } + +for (; n > 0; n--) + { + while (*message++ != CHAR_NUL) {}; + if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA; + } + +for (i = 0; *message != 0; i++) + { + if (i >= size - 1) + { + buffer[i] = 0; /* Terminate partial message */ + return PCRE2_ERROR_NOMEMORY; + } + buffer[i] = *message++; + } + +buffer[i] = 0; +return (int)i; +} + +/* End of pcre2_error.c */ diff --git a/src/pcre2_extuni.c b/src/pcre2_extuni.c new file mode 100644 index 0000000..4ed9f00 --- /dev/null +++ b/src/pcre2_extuni.c @@ -0,0 +1,162 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains an internal function that is used to match a Unicode +extended grapheme sequence. It is used by both pcre2_match() and +pcre2_def_match(). However, it is called only when Unicode support is being +compiled. Nevertheless, we provide a dummy function when there is no Unicode +support, because some compilers do not like functionless source files. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "pcre2_internal.h" + + +/* Dummy function */ + +#ifndef SUPPORT_UNICODE +PCRE2_SPTR +PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, + PCRE2_SPTR end_subject, BOOL utf, int *xcount) +{ +(void)c; +(void)eptr; +(void)start_subject; +(void)end_subject; +(void)utf; +(void)xcount; +return NULL; +} +#else + + +/************************************************* +* Match an extended grapheme sequence * +*************************************************/ + +/* NOTE: The logic contained in this function is replicated in three special- +purpose functions in the pcre2_jit_compile.c module. If the logic below is +changed, they must be kept in step so that the interpreter and the JIT have the +same behaviour. + +Arguments: + c the first character + eptr pointer to next character + start_subject pointer to start of subject + end_subject pointer to end of subject + utf TRUE if in UTF mode + xcount pointer to count of additional characters, + or NULL if count not needed + +Returns: pointer after the end of the sequence +*/ + +PCRE2_SPTR +PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, + PCRE2_SPTR end_subject, BOOL utf, int *xcount) +{ +BOOL was_ep_ZWJ = FALSE; +int lgb = UCD_GRAPHBREAK(c); + +while (eptr < end_subject) + { + int rgb; + int len = 1; + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } + rgb = UCD_GRAPHBREAK(c); + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + int ricount = 0; + PCRE2_SPTR bptr = eptr - 1; + if (utf) BACKCHAR(bptr); + + /* bptr is pointing to the left-hand character */ + + while (bptr > start_subject) + { + bptr--; + if (utf) + { + BACKCHAR(bptr); + GETCHAR(c, bptr); + } + else + c = *bptr; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; + ricount++; + } + if ((ricount & 1) != 0) break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb; + + eptr += len; + if (xcount != NULL) *xcount += 1; + } + +return eptr; +} + +#endif /* SUPPORT_UNICODE */ + +/* End of pcre2_extuni.c */ diff --git a/src/pcre2_find_bracket.c b/src/pcre2_find_bracket.c new file mode 100644 index 0000000..1290c5e --- /dev/null +++ b/src/pcre2_find_bracket.c @@ -0,0 +1,219 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains a single function that scans through a compiled pattern +until it finds a capturing bracket with the given number, or, if the number is +negative, an instance of OP_REVERSE or OP_VREVERSE for a lookbehind. The +function is called from pcre2_compile.c and also from pcre2_study.c when +finding the minimum matching length. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/************************************************* +* Scan compiled regex for specific bracket * +*************************************************/ + +/* +Arguments: + code points to start of expression + utf TRUE in UTF mode + number the required bracket number or negative to find a lookbehind + +Returns: pointer to the opcode for the bracket, or NULL if not found +*/ + +PCRE2_SPTR +PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) +{ +for (;;) + { + PCRE2_UCHAR c = *code; + + if (c == OP_END) return NULL; + + /* XCLASS is used for classes that cannot be represented just by a bit map. + This includes negated single high-valued characters. CALLOUT_STR is used for + callouts with string arguments. In both cases the length in the table is + zero; the actual length is stored in the compiled code. */ + + if (c == OP_XCLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); + + /* Handle lookbehind */ + + else if (c == OP_REVERSE || c == OP_VREVERSE) + { + if (number < 0) return (PCRE2_UCHAR *)code; + code += PRIV(OP_lengths)[c]; + } + + /* Handle capturing bracket */ + + else if (c == OP_CBRA || c == OP_SCBRA || + c == OP_CBRAPOS || c == OP_SCBRAPOS) + { + int n = (int)GET2(code, 1+LINK_SIZE); + if (n == number) return (PCRE2_UCHAR *)code; + code += PRIV(OP_lengths)[c]; + } + + /* Otherwise, we can get the item's length from the table, except that for + repeated character types, we have to test for \p and \P, which have an extra + two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we + must add in its length. */ + + else + { + switch(c) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + code += 2; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + code += code[1]; + break; + } + + /* Add in the fixed length from the table */ + + code += PRIV(OP_lengths)[c]; + + /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be + followed by a multi-byte character. The length in the table is a minimum, so + we have to arrange to skip the extra bytes. */ + +#ifdef MAYBE_UTF_MULTI + if (utf) switch(c) + { + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + case OP_UPTO: + case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_MINUPTO: + case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + case OP_STAR: + case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_POSSTAR: + case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_PLUS: + case OP_PLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + case OP_QUERY: + case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_POSQUERY: + case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); + break; + } +#else + (void)(utf); /* Keep compiler happy by referencing function argument */ +#endif /* MAYBE_UTF_MULTI */ + } + } +} + +/* End of pcre2_find_bracket.c */ diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c new file mode 100644 index 0000000..cd78435 --- /dev/null +++ b/src/pcre2_fuzzsupport.c @@ -0,0 +1,792 @@ +/*************************************************************************** +Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it +tries to compile and match it, deriving options from the string itself. If +STANDALONE is defined, a main program that calls the driver with the contents +of specified files is compiled, and commentary on what is happening is output. +If an argument starts with '=' the rest of it it is taken as a literal string +rather than a file name. This allows easy testing of short strings. + +Written by Philip Hazel, October 2016 +Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support) +Further updates March/April/May 2024 by PH +***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +/* stack size adjustment */ +#include +#include + +#define STACK_SIZE_MB 256 +#define JIT_SIZE_LIMIT (200 * 1024) + +#ifndef PCRE2_CODE_UNIT_WIDTH +#define PCRE2_CODE_UNIT_WIDTH 8 +#endif + +#include "config.h" +#include "pcre2.h" +#include "pcre2_internal.h" + +#define MAX_MATCH_SIZE 1000 + +#define DFA_WORKSPACE_COUNT 100 + +/* When adding new compile or match options, remember to update the functions +below that output them. */ + +#define ALLOWED_COMPILE_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ + PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \ + PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \ + PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ + PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ + PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) + +#define ALLOWED_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT) + +#define BASE_MATCH_OPTIONS \ + (PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK) + + +#if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE) +static void print_compile_options(FILE *stream, uint32_t compile_options) +{ +fprintf(stream, "Compile options %s%.8x =", + (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "", + compile_options); + +fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", + ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", + ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", + ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", + ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "", + ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "", + ((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "", + ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", + ((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "", + ((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "", + ((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", + ((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "", + ((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "", + ((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "", + ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "", + ((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "", + ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "", + ((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "", + ((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "", + ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", + ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "", + ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "", + ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", + ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", + ((compile_options & PCRE2_UCP) != 0)? " ucp" : "", + ((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", + ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", + ((compile_options & PCRE2_UTF) != 0)? " utf" : ""); +} + +static void print_match_options(FILE *stream, uint32_t match_options) +{ +fprintf(stream, "Match options %s%.8x =", + (match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options); + +fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "", + ((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", + ((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); +} + + +/* This function can print an error message at all code unit widths. */ + +static void print_error(FILE *f, int errorcode, const char *text, ...) +{ +PCRE2_UCHAR buffer[256]; +PCRE2_UCHAR *p = buffer; +va_list ap; +va_start(ap, text); +vfprintf(f, text, ap); +va_end(ap); +pcre2_get_error_message(errorcode, buffer, 256); +while (*p != 0) fprintf(f, "%c", *p++); +printf("\n"); +} +#endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */ + + +#ifdef SUPPORT_JIT +#ifdef SUPPORT_DIFF_FUZZ +static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data) +{ +int errorcode; + +for (int index = 0; index < count; index++) + { + PCRE2_UCHAR *bufferptr = NULL; + PCRE2_SIZE bufflen = 0; + + errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr, + &bufflen); + + if (errorcode >= 0) + { + fprintf(stream, "Match %d (hex encoded): ", index); + for (PCRE2_SIZE i = 0; i < bufflen; i++) + { + fprintf(stream, "%02x", bufferptr[i]); + } + fprintf(stream, "\n"); + } + else + { + print_error(stream, errorcode, "Match %d failed: ", index); + } + } +} + +/* This function describes the current test case being evaluated, then aborts */ + +static void describe_failure( + const char *task, + const unsigned char *data, + size_t size, + uint32_t compile_options, + uint32_t match_options, + int errorcode, + int errorcode_jit, + int matches, + int matches_jit, + pcre2_match_data *match_data, + pcre2_match_data *match_data_jit +) { + +fprintf(stderr, "Encountered failure while performing %s; context:\n", task); + +fprintf(stderr, "Pattern/sample string (hex encoded): "); +for (size_t i = 0; i < size; i++) + { + fprintf(stderr, "%02x", data[i]); + } +fprintf(stderr, "\n"); + +print_compile_options(stderr, compile_options); +print_match_options(stderr, match_options); + +if (errorcode < 0) + { + print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: "); + } + +if (matches >= 0) + { + fprintf(stderr, "Non-JIT'd operation did not emit an error.\n"); + if (match_data != NULL) + { + fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches); + dump_matches(stderr, matches, match_data); + fprintf(stderr, "\n"); + } + } + +if (errorcode_jit < 0) + { + print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:", + errorcode_jit); + } + +if (matches_jit >= 0) + { + fprintf(stderr, "JIT'd operation did not emit an error.\n"); + if (match_data_jit != NULL) + { + fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit); + dump_matches(stderr, matches_jit, match_data_jit); + fprintf(stderr, "\n"); + } + } + +abort(); +} +#endif /* SUPPORT_DIFF_FUZZ */ +#endif /* SUPPORT_JIT */ + +/* This is the callout function. Its only purpose is to halt matching if there +are more than 100 callouts, as one way of stopping too much time being spent on +fruitless matches. The callout data is a pointer to the counter. */ + +static int callout_function(pcre2_callout_block *cb, void *callout_data) +{ +(void)cb; /* Avoid unused parameter warning */ +*((uint32_t *)callout_data) += 1; +return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0; +} + +/* Putting in this apparently unnecessary prototype prevents gcc from giving a +"no previous prototype" warning when compiling at high warning level. */ + +int LLVMFuzzerInitialize(int *, char ***); + +int LLVMFuzzerTestOneInput(unsigned char *, size_t); + +int LLVMFuzzerInitialize(int *argc, char ***argv) +{ +int rc; +struct rlimit rlim; +getrlimit(RLIMIT_STACK, &rlim); +rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024; +if (rlim.rlim_cur > rlim.rlim_max) + { + fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n"); + _exit(1); + } +rc = setrlimit(RLIMIT_STACK, &rlim); +if (rc != 0) + { + fprintf(stderr, "Failed to expand stack size\n"); + _exit(1); + } + +(void)argc; /* Avoid "unused parameter" warnings */ +(void)argv; +return 0; +} + +/* Here's the driving function. */ + +int LLVMFuzzerTestOneInput(unsigned char *data, size_t size) +{ +PCRE2_UCHAR *wdata; +PCRE2_UCHAR *newwdata = NULL; +uint32_t compile_options; +uint32_t match_options; +uint64_t random_options; +pcre2_match_data *match_data = NULL; +#ifdef SUPPORT_JIT +pcre2_match_data *match_data_jit = NULL; +#endif +pcre2_compile_context *compile_context = NULL; +pcre2_match_context *match_context = NULL; +size_t match_size; +int dfa_workspace[DFA_WORKSPACE_COUNT]; + +if (size < sizeof(random_options)) return -1; + +random_options = *(uint64_t *)(data); +data += sizeof(random_options); +wdata = (PCRE2_UCHAR *)data; +size -= sizeof(random_options); +size /= PCRE2_CODE_UNIT_WIDTH / 8; + +/* PCRE2 compiles quantified groups by replicating them. In certain cases of +very large quantifiers this can lead to unacceptably long JIT compile times. To +get around this, we scan the data string for large quantifiers that follow a +closing parenthesis, and reduce the value of the quantifier to 10, assuming +that this will make minimal difference to the detection of bugs. + +Do the same for quantifiers that follow a closing square bracket, because +classes that contain a number of non-ascii characters can take a lot of time +when matching. + +We have to make a copy of the input because oss-fuzz complains if we overwrite +the original. Start the scan at the second character so there can be a +lookbehind for a backslash, and end it before the end so that the next +character can be checked for an opening brace. */ + +if (size > 3) + { + newwdata = malloc(size * sizeof(PCRE2_UCHAR)); + memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR)); + wdata = newwdata; + + for (size_t i = 1; i < size - 2; i++) + { + size_t j; + + if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' || + wdata[i+1] != '{') + continue; + i++; /* Points to '{' */ + + /* Loop for two values a quantifier. Offset i points to brace or comma at the + start of the loop.*/ + + for (int ii = 0; ii < 2; ii++) + { + int q = 0; + + if (i >= size - 1) goto END_QSCAN; /* Can happen for , */ + + /* Ignore leading spaces */ + + while (wdata[i+1] == ' ' || wdata[i+1] == '\t') + { + i++; + if (i >= size - 1) goto END_QSCAN; + } + + /* Scan for a number ending in brace or comma in the first iteration, + optionally preceded by space. */ + + for (j = i + 1; j < size && j < i + 7; j++) + { + if (wdata[j] == ' ' || wdata[j] == '\t') + { + j++; + while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++; + if (j >= size) goto OUTERLOOP; + if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP; + } + if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break; + if (wdata[j] < '0' || wdata[j] > '9') + { + j--; /* Ensure this character is checked next. The */ + goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */ + } + q = q * 10 + wdata[j] - '0'; + } + + if (j >= size) goto END_QSCAN; /* End of data */ + + /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is + the maximum quantifier. Leave such numbers alone. */ + + if (j >= i + 7 || q > 65535) goto OUTERLOOP; + + /* Limit the quantifier size to 10 */ + + if (q > 10) + { +#ifdef STANDALONE + printf("Reduced quantifier value %d to 10.\n", q); +#endif + for (size_t k = i + 1; k < j; k++) wdata[k] = '0'; + wdata[j - 2] = '1'; + } + + /* Advance to end of number and break if reached closing brace (continue + after comma, which is only valid in the first time round this loop). */ + + i = j; + if (wdata[i] == '}') break; + } + + /* Continue along the data string */ + + OUTERLOOP: + i = j; + continue; + } + } +END_QSCAN: + +/* Limiting the length of the subject for matching stops fruitless searches +in large trees taking too much time. */ + +match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size; + +/* Create a compile context, and set a limit on the size of the compiled +pattern. This stops the fuzzer using vast amounts of memory. */ + +compile_context = pcre2_compile_context_create(NULL); +if (compile_context == NULL) + { +#ifdef STANDALONE + fprintf(stderr, "** Failed to create compile context block\n"); +#endif + abort(); + } +pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024); + +/* Ensure that all undefined option bits are zero (waste of time trying them) +and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the +input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is +no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set +because \C in random patterns is highly likely to cause a crash. */ + +compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) | + PCRE2_NEVER_BACKSLASH_C; +match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) | + BASE_MATCH_OPTIONS; + +/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not +allowed together and just give an immediate error return. */ + +if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0) + match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT); + +/* Do the compile with and without the options, and after a successful compile, +likewise do the match with and without the options. */ + +for (int i = 0; i < 2; i++) + { + uint32_t callout_count; + int errorcode; +#ifdef SUPPORT_JIT + int errorcode_jit; +#ifdef SUPPORT_DIFF_FUZZ + int matches = 0; + int matches_jit = 0; +#endif +#endif + PCRE2_SIZE erroroffset; + pcre2_code *code; + +#ifdef STANDALONE + printf("\n"); + print_compile_options(stdout, compile_options); +#endif + + code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options, + &errorcode, &erroroffset, compile_context); + + /* Compilation succeeded */ + + if (code != NULL) + { + int j; + uint32_t save_match_options = match_options; + + /* Call JIT compile only if the compiled pattern is not too big. */ + +#ifdef SUPPORT_JIT + int jit_ret = -1; + if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT) + { +#ifdef STANDALONE + printf("Compile succeeded; calling JIT compile\n"); +#endif + jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE); +#ifdef STANDALONE + if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret); +#endif + } + else + { +#ifdef STANDALONE + printf("Not calling JIT: compiled pattern is too long " + "(%ld bytes; limit=%d)\n", + ((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT); +#endif + } +#endif /* SUPPORT_JIT */ + + /* Create match data and context blocks only when we first need them. Set + low match and depth limits to avoid wasting too much searching large + pattern trees. Almost all matches are going to fail. */ + + if (match_data == NULL) + { + match_data = pcre2_match_data_create(32, NULL); +#ifdef SUPPORT_JIT + match_data_jit = pcre2_match_data_create(32, NULL); + if (match_data == NULL || match_data_jit == NULL) +#else + if (match_data == NULL) +#endif + { +#ifdef STANDALONE + fprintf(stderr, "** Failed to create match data block\n"); +#endif + abort(); + } + } + + if (match_context == NULL) + { + match_context = pcre2_match_context_create(NULL); + if (match_context == NULL) + { +#ifdef STANDALONE + fprintf(stderr, "** Failed to create match context block\n"); +#endif + abort(); + } + (void)pcre2_set_match_limit(match_context, 100); + (void)pcre2_set_depth_limit(match_context, 100); + (void)pcre2_set_callout(match_context, callout_function, &callout_count); + } + + /* Match twice, with and without options. */ + +#ifdef STANDALONE + printf("\n"); +#endif + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + print_match_options(stdout, match_options); +#endif + + callout_count = 0; + errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0, + match_options, match_data, match_context); + +#ifdef STANDALONE + if (errorcode >= 0) printf("Match returned %d\n", errorcode); else + print_error(stdout, errorcode, "Match failed: error %d: ", errorcode); +#endif + +/* If JIT is enabled, do a JIT match and, if appropriately compiled, compare +with the interpreter. */ + +#ifdef SUPPORT_JIT + if (jit_ret >= 0) + { +#ifdef STANDALONE + printf("Matching with JIT\n"); +#endif + callout_count = 0; + errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0, + match_options & ~PCRE2_NO_JIT, match_data_jit, match_context); + +#ifdef STANDALONE + if (errorcode_jit >= 0) + printf("Match returned %d\n", errorcode_jit); + else + print_error(stdout, errorcode_jit, "JIT match failed: error %d: ", + errorcode_jit); +#else + (void)errorcode_jit; /* Avoid compiler warning */ +#endif /* STANDALONE */ + +/* With differential matching enabled, compare with interpreter. */ + +#ifdef SUPPORT_DIFF_FUZZ + matches = errorcode; + matches_jit = errorcode_jit; + + if (errorcode_jit != errorcode) + { + if (!(errorcode < 0 && errorcode_jit < 0) && + errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT && + errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT) + { + describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit); + } + } + else + { + for (int index = 0; index < errorcode; index++) + { + PCRE2_UCHAR *bufferptr, *bufferptr_jit; + PCRE2_SIZE bufflen, bufflen_jit; + + bufferptr = bufferptr_jit = NULL; + bufflen = bufflen_jit = 0; + + errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen); + errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit); + + if (errorcode != errorcode_jit) + { + describe_failure("match entry errorcode comparison", wdata, size, + compile_options, match_options, errorcode, errorcode_jit, + matches, matches_jit, match_data, match_data_jit); + } + + if (errorcode >= 0) + { + if (bufflen != bufflen_jit) + { + describe_failure("match entry length comparison", wdata, size, + compile_options, match_options, errorcode, errorcode_jit, + matches, matches_jit, match_data, match_data_jit); + } + + if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0) + { + describe_failure("match entry content comparison", wdata, size, + compile_options, match_options, errorcode, errorcode_jit, + matches, matches_jit, match_data, match_data_jit); + } + } + + pcre2_substring_free(bufferptr); + pcre2_substring_free(bufferptr_jit); + } + } +#endif /* SUPPORT_DIFF_FUZZ */ + } +#endif /* SUPPORT_JIT */ + + if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */ + match_options = BASE_MATCH_OPTIONS; /* For second time */ + } + + /* Match with DFA twice, with and without options, but remove options that + are not allowed with DFA. */ + + match_options = save_match_options & ~BASE_MATCH_OPTIONS; + +#ifdef STANDALONE + printf("\n"); +#endif + + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + printf("DFA match options %.8x =", match_options); + printf("%s%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); +#endif + + callout_count = 0; + errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata, + (PCRE2_SIZE)match_size, 0, match_options, match_data, + match_context, dfa_workspace, DFA_WORKSPACE_COUNT); + +#ifdef STANDALONE + if (errorcode >= 0) + printf("Match returned %d\n", errorcode); + else + print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode); +#endif + + if (match_options == 0) break; /* No point doing same twice */ + match_options = 0; /* For second time */ + } + + match_options = save_match_options; /* Reset for the second compile */ + pcre2_code_free(code); + } + + /* Compilation failed */ + + else + { +#ifdef STANDALONE + print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode, + erroroffset); +#else + if (errorcode == PCRE2_ERROR_INTERNAL) abort(); +#endif + } + + if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */ + compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */ + } + +/* Tidy up before exiting */ + +if (match_data != NULL) pcre2_match_data_free(match_data); +#ifdef SUPPORT_JIT +if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit); +free(newwdata); +#endif +if (match_context != NULL) pcre2_match_context_free(match_context); +if (compile_context != NULL) pcre2_compile_context_free(compile_context); +return 0; +} + + +/* Optional main program. */ + +#ifdef STANDALONE +int main(int argc, char **argv) +{ +LLVMFuzzerInitialize(&argc, &argv); + +if (argc < 2) + { + printf("** No arguments given\n"); + return 0; + } + +for (int i = 1; i < argc; i++) + { + size_t filelen; + size_t readsize; + unsigned char *buffer; + FILE *f; + + /* Handle a literal string. Copy to an exact size buffer so that checks for + overrunning work. */ + + if (argv[i][0] == '=') + { + readsize = strlen(argv[i]) - 1; + printf("------ ------\n"); + printf("Length = %lu\n", readsize); + printf("%.*s\n", (int)readsize, argv[i]+1); + buffer = (unsigned char *)malloc(readsize); + if (buffer == NULL) + printf("** Failed to allocate %lu bytes of memory\n", readsize); + else + { + memcpy(buffer, argv[i]+1, readsize); + LLVMFuzzerTestOneInput(buffer, readsize); + free(buffer); + } + continue; + } + + /* Handle a string given in a file */ + + f = fopen(argv[i], "rb"); + if (f == NULL) + { + printf("** Failed to open %s: %s\n", argv[i], strerror(errno)); + continue; + } + + printf("------ %s ------\n", argv[i]); + + fseek(f, 0, SEEK_END); + filelen = ftell(f); + fseek(f, 0, SEEK_SET); + + buffer = (unsigned char *)malloc(filelen); + if (buffer == NULL) + { + printf("** Failed to allocate %lu bytes of memory\n", filelen); + fclose(f); + continue; + } + + readsize = fread(buffer, 1, filelen, f); + fclose(f); + + if (readsize != filelen) + printf("** File size is %lu but fread() returned %lu\n", filelen, readsize); + else + { + printf("Length = %lu\n", filelen); + LLVMFuzzerTestOneInput(buffer, filelen); + } + free(buffer); + } + +return 0; +} +#endif /* STANDALONE */ + +/* End */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h new file mode 100644 index 0000000..e580818 --- /dev/null +++ b/src/pcre2_internal.h @@ -0,0 +1,2084 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD +#define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD + +/* We do not support both EBCDIC and Unicode at the same time. The "configure" +script prevents both being selected, but not everybody uses "configure". EBCDIC +is only supported for the 8-bit library, but the check for this has to be later +in this file, because the first part is not width-dependent, and is included by +pcre2test.c with CODE_UNIT_WIDTH == 0. */ + +#if defined EBCDIC && defined SUPPORT_UNICODE +#error The use of both EBCDIC and SUPPORT_UNICODE is not supported. +#endif + +/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must +be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but +other other building methods may not, so here is a check. It is cut out when +building pcre2test, bcause that sets the value to zero. No other source should +be including this file. There is no explicit way of forcing a compile to be +abandoned, but trying to include a non-existent file seems cleanest. Otherwise +there will be many irrelevant consequential errors. */ + +#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \ + (!defined PCRE2_CODE_UNIT_WIDTH || \ + (PCRE2_CODE_UNIT_WIDTH != 8 && \ + PCRE2_CODE_UNIT_WIDTH != 16 && \ + PCRE2_CODE_UNIT_WIDTH != 32)) +#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32. +#include +#endif + + +/* Standard C headers */ + +#include +#include +#include +#include +#include +#include + +/* Macros to make boolean values more obvious. The #ifndef is to pacify +compiler warnings in environments where these macros are defined elsewhere. +Unfortunately, there is no way to do the same for the typedef. */ + +typedef int BOOL; +#ifndef FALSE +#define FALSE 0 +#define TRUE 1 +#endif + +/* Valgrind (memcheck) support */ + +#ifdef SUPPORT_VALGRIND +#include +#endif + +/* -ftrivial-auto-var-init support supports initializing all local variables +to avoid some classes of bug, but this can cause an unacceptable slowdown +for large on-stack arrays in hot functions. This macro lets us annotate +such arrays. */ + +#ifdef HAVE_ATTRIBUTE_UNINITIALIZED +#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized)) +#else +#define PCRE2_KEEP_UNINITIALIZED +#endif + +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#endif + +/* When compiling a DLL for Windows, the exported symbols have to be declared +using some MS magic. I found some useful information on this web page: +http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the +information there, using __declspec(dllexport) without "extern" we have a +definition; with "extern" we have a declaration. The settings here override the +setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL, +which is all that is needed for applications (they just import the symbols). We +use: + + PCRE2_EXP_DECL for declarations + PCRE2_EXP_DEFN for definitions + +The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test, +which is an application, but needs to import this file in order to "peek" at +internals, can #include pcre2.h first to get an application's-eye view. + +In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, +special-purpose environments) might want to stick other stuff in front of +exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN +only if it is not already set. */ + +#ifndef PCRE2_EXP_DECL +# ifdef _WIN32 +# ifndef PCRE2_STATIC +# define PCRE2_EXP_DECL extern __declspec(dllexport) +# define PCRE2_EXP_DEFN __declspec(dllexport) +# else +# define PCRE2_EXP_DECL extern PCRE2_EXPORT +# define PCRE2_EXP_DEFN +# endif +# else +# ifdef __cplusplus +# define PCRE2_EXP_DECL extern "C" PCRE2_EXPORT +# else +# define PCRE2_EXP_DECL extern PCRE2_EXPORT +# endif +# ifndef PCRE2_EXP_DEFN +# define PCRE2_EXP_DEFN PCRE2_EXP_DECL +# endif +# endif +#endif + +/* Include the public PCRE2 header and the definitions of UCP character +property values. This must follow the setting of PCRE2_EXP_DECL above. */ + +#include "pcre2.h" +#include "pcre2_ucp.h" + +/* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced +with a custom type. This makes it possible, for example, to allow pcre2_match() +to process subject strings that are discontinuous by using a smart pointer +class. It must always be possible to inspect all of the subject string in +pcre2_match() because of the way it backtracks. */ + +/* WARNING: This is as yet untested for PCRE2. */ + +#ifdef CUSTOM_SUBJECT_PTR +#undef PCRE2_SPTR +#define PCRE2_SPTR CUSTOM_SUBJECT_PTR +#endif + +/* When checking for integer overflow, we need to handle large integers. +If a 64-bit integer type is available, we can use that. +Otherwise we have to cast to double, which of course requires floating point +arithmetic. Handle this by defining a macro for the appropriate type. */ + +#if defined INT64_MAX || defined int64_t +#define INT64_OR_DOUBLE int64_t +#else +#define INT64_OR_DOUBLE double +#endif + +/* External (in the C sense) functions and tables that are private to the +libraries are always referenced using the PRIV macro. This makes it possible +for pcre2test.c to include some of the source files from the libraries using a +different PRIV definition to avoid name clashes. It also makes it clear in the +code that a non-static object is being referenced. */ + +#ifndef PRIV +#define PRIV(name) _pcre2_##name +#endif + +/* When compiling for use with the Virtual Pascal compiler, these functions +need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT +option on the command line. */ + +#ifdef VPCOMPAT +#define strlen(s) _strlen(s) +#define strncmp(s1,s2,m) _strncmp(s1,s2,m) +#define memcmp(s,c,n) _memcmp(s,c,n) +#define memcpy(d,s,n) _memcpy(d,s,n) +#define memmove(d,s,n) _memmove(d,s,n) +#define memset(s,c,n) _memset(s,c,n) +#else /* VPCOMPAT */ + +/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define +a macro that calls an emulating function. */ + +#ifndef HAVE_MEMMOVE +#undef memmove /* Some systems may have a macro */ +#define memmove(a, b, c) PRIV(memmove)(a, b, c) +#endif /* not HAVE_MEMMOVE */ +#endif /* not VPCOMPAT */ + +/* This is an unsigned int value that no UTF character can ever have, as +Unicode doesn't go beyond 0x0010ffff. */ + +#define NOTACHAR 0xffffffff + +/* This is the largest valid UTF/Unicode code point. */ + +#define MAX_UTF_CODE_POINT 0x10ffff + +/* Compile-time positive error numbers (all except UTF errors, which are +negative) start at this value. It should probably never be changed, in case +some application is checking for specific numbers. There is a copy of this +#define in pcre2posix.c (which now no longer includes this file). Ideally, a +way of having a single definition should be found, but as the number is +unlikely to change, this is not a pressing issue. The original reason for +having a base other than 0 was to keep the absolute values of compile-time and +run-time error numbers numerically different, but in the event the code does +not rely on this. */ + +#define COMPILE_ERROR_BASE 100 + +/* The initial frames vector for remembering pcre2_match() backtracking points +is allocated on the heap, of this size (bytes) or ten times the frame size if +larger, unless the heap limit is smaller. Typical frame sizes are a few hundred +bytes (it depends on the number of capturing parentheses) so 20KiB handles +quite a few frames. A larger vector on the heap is obtained for matches that +need more frames, subject to the heap limit. */ + +#define START_FRAMES_SIZE 20480 + +/* For DFA matching, an initial internal workspace vector is allocated on the +stack. The heap is used only if this turns out to be too small. */ + +#define DFA_START_RWS_SIZE 30720 + +/* Define the default BSR convention. */ + +#ifdef BSR_ANYCRLF +#define BSR_DEFAULT PCRE2_BSR_ANYCRLF +#else +#define BSR_DEFAULT PCRE2_BSR_UNICODE +#endif + + +/* ---------------- Basic UTF-8 macros ---------------- */ + +/* These UTF-8 macros are always defined because they are used in pcre2test for +handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library +is not supported. */ + +/* Tests whether a UTF-8 code point needs extra bytes to decode. */ + +#define HASUTF8EXTRALEN(c) ((c) >= 0xc0) + +/* The following macros were originally written in the form of loops that used +data from the tables whose names start with PRIV(utf8_table). They were +rewritten by a user so as not to use loops, because in some environments this +gives a significant performance advantage, and it seems never to do any harm. +*/ + +/* Base macro to pick up the remaining bytes of a UTF-8 character, not +advancing the pointer. */ + +#define GETUTF8(c, eptr) \ + { \ + if ((c & 0x20u) == 0) \ + c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ + else if ((c & 0x10u) == 0) \ + c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ + else if ((c & 0x08u) == 0) \ + c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ + ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ + else if ((c & 0x04u) == 0) \ + c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ + ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ + (eptr[4] & 0x3fu); \ + else \ + c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ + ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ + ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ + } + +/* Base macro to pick up the remaining bytes of a UTF-8 character, advancing +the pointer. */ + +#define GETUTF8INC(c, eptr) \ + { \ + if ((c & 0x20u) == 0) \ + c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ + else if ((c & 0x10u) == 0) \ + { \ + c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ + eptr += 2; \ + } \ + else if ((c & 0x08u) == 0) \ + { \ + c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ + ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ + eptr += 3; \ + } \ + else if ((c & 0x04u) == 0) \ + { \ + c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ + ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ + (eptr[3] & 0x3fu); \ + eptr += 4; \ + } \ + else \ + { \ + c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ + ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ + ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ + eptr += 5; \ + } \ + } + +/* Base macro to pick up the remaining bytes of a UTF-8 character, not +advancing the pointer, incrementing the length. */ + +#define GETUTF8LEN(c, eptr, len) \ + { \ + if ((c & 0x20u) == 0) \ + { \ + c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \ + len++; \ + } \ + else if ((c & 0x10u) == 0) \ + { \ + c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ + len += 2; \ + } \ + else if ((c & 0x08u) == 0) \ + {\ + c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \ + ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \ + len += 3; \ + } \ + else if ((c & 0x04u) == 0) \ + { \ + c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \ + ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \ + (eptr[4] & 0x3fu); \ + len += 4; \ + } \ + else \ + {\ + c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \ + ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \ + ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \ + len += 5; \ + } \ + } + +/* --------------- Whitespace macros ---------------- */ + +/* Tests for Unicode horizontal and vertical whitespace characters must check a +number of different values. Using a switch statement for this generates the +fastest code (no loop, no memory access), and there are several places in the +interpreter code where this happens. In order to ensure that all the case lists +remain in step, we use macros so that there is only one place where the lists +are defined. + +These values are also required as lists in pcre2_compile.c when processing \h, +\H, \v and \V in a character class. The lists are defined in pcre2_tables.c, +but macros that define the values are here so that all the definitions are +together. The lists must be in ascending character order, terminated by +NOTACHAR (which is 0xffffffff). + +Any changes should ensure that the various macros are kept in step with each +other. NOTE: The values also appear in pcre2_jit_compile.c. */ + +/* -------------- ASCII/Unicode environments -------------- */ + +#ifndef EBCDIC + +/* Character U+180E (Mongolian Vowel Separator) is not included in the list of +spaces in the Unicode file PropList.txt, and Perl does not recognize it as a +space. However, in many other sources it is listed as a space and has been in +PCRE (both APIs) for a long time. */ + +#define HSPACE_LIST \ + CHAR_HT, CHAR_SPACE, CHAR_NBSP, \ + 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \ + 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \ + NOTACHAR + +#define HSPACE_MULTIBYTE_CASES \ + case 0x1680: /* OGHAM SPACE MARK */ \ + case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ \ + case 0x2000: /* EN QUAD */ \ + case 0x2001: /* EM QUAD */ \ + case 0x2002: /* EN SPACE */ \ + case 0x2003: /* EM SPACE */ \ + case 0x2004: /* THREE-PER-EM SPACE */ \ + case 0x2005: /* FOUR-PER-EM SPACE */ \ + case 0x2006: /* SIX-PER-EM SPACE */ \ + case 0x2007: /* FIGURE SPACE */ \ + case 0x2008: /* PUNCTUATION SPACE */ \ + case 0x2009: /* THIN SPACE */ \ + case 0x200A: /* HAIR SPACE */ \ + case 0x202f: /* NARROW NO-BREAK SPACE */ \ + case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ \ + case 0x3000 /* IDEOGRAPHIC SPACE */ + +#define HSPACE_BYTE_CASES \ + case CHAR_HT: \ + case CHAR_SPACE: \ + case CHAR_NBSP + +#define HSPACE_CASES \ + HSPACE_BYTE_CASES: \ + HSPACE_MULTIBYTE_CASES + +#define VSPACE_LIST \ + CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR + +#define VSPACE_MULTIBYTE_CASES \ + case 0x2028: /* LINE SEPARATOR */ \ + case 0x2029 /* PARAGRAPH SEPARATOR */ + +#define VSPACE_BYTE_CASES \ + case CHAR_LF: \ + case CHAR_VT: \ + case CHAR_FF: \ + case CHAR_CR: \ + case CHAR_NEL + +#define VSPACE_CASES \ + VSPACE_BYTE_CASES: \ + VSPACE_MULTIBYTE_CASES + +/* -------------- EBCDIC environments -------------- */ + +#else +#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR + +#define HSPACE_BYTE_CASES \ + case CHAR_HT: \ + case CHAR_SPACE: \ + case CHAR_NBSP + +#define HSPACE_CASES HSPACE_BYTE_CASES + +#ifdef EBCDIC_NL25 +#define VSPACE_LIST \ + CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR +#else +#define VSPACE_LIST \ + CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR +#endif + +#define VSPACE_BYTE_CASES \ + case CHAR_LF: \ + case CHAR_VT: \ + case CHAR_FF: \ + case CHAR_CR: \ + case CHAR_NEL + +#define VSPACE_CASES VSPACE_BYTE_CASES +#endif /* EBCDIC */ + +/* -------------- End of whitespace macros -------------- */ + + +/* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF, +"any" and "anycrlf" at present). The following macros are used to package up +testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various +modules to indicate in which datablock the parameters exist, and what the +start/end of string field names are. */ + +#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ +#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ +#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ + +/* This macro checks for a newline at the given position */ + +#define IS_NEWLINE(p) \ + ((NLBLOCK->nltype != NLTYPE_FIXED)? \ + ((p) < NLBLOCK->PSEND && \ + PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ + &(NLBLOCK->nllen), utf)) \ + : \ + ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ + UCHAR21TEST(p) == NLBLOCK->nl[0] && \ + (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1]) \ + ) \ + ) + +/* This macro checks for a newline immediately preceding the given position */ + +#define WAS_NEWLINE(p) \ + ((NLBLOCK->nltype != NLTYPE_FIXED)? \ + ((p) > NLBLOCK->PSSTART && \ + PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ + &(NLBLOCK->nllen), utf)) \ + : \ + ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ + UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \ + (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \ + ) \ + ) + +/* Private flags containing information about the compiled pattern. The first +three must not be changed, because whichever is set is actually the number of +bytes in a code unit in that mode. */ + +#define PCRE2_MODE8 0x00000001 /* compiled in 8 bit mode */ +#define PCRE2_MODE16 0x00000002 /* compiled in 16 bit mode */ +#define PCRE2_MODE32 0x00000004 /* compiled in 32 bit mode */ +#define PCRE2_FIRSTSET 0x00000010 /* first_code unit is set */ +#define PCRE2_FIRSTCASELESS 0x00000020 /* caseless first code unit */ +#define PCRE2_FIRSTMAPSET 0x00000040 /* bitmap of first code units is set */ +#define PCRE2_LASTSET 0x00000080 /* last code unit is set */ +#define PCRE2_LASTCASELESS 0x00000100 /* caseless last code unit */ +#define PCRE2_STARTLINE 0x00000200 /* start after \n for multiline */ +#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ +#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ +#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ +#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */ +#define PCRE2_BSR_SET 0x00004000 /* BSR was set in the pattern */ +#define PCRE2_NL_SET 0x00008000 /* newline was set in the pattern */ +#define PCRE2_NOTEMPTY_SET 0x00010000 /* (*NOTEMPTY) used ) keep */ +#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ +#define PCRE2_DEREF_TABLES 0x00040000 /* release character tables */ +#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ +#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ +#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ +#define PCRE2_HASBKC 0x00400000 /* contains \C */ +#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ + +#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) + +/* Values for the matchedby field in a match data block. */ + +enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */ + PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */ + PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */ + +/* Values for the flags field in a match data block. */ + +#define PCRE2_MD_COPIED_SUBJECT 0x01u + +/* Magic number to provide a small check against being handed junk. */ + +#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ + +/* The maximum remaining length of subject we are prepared to search for a +req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is +much faster than the search loop that has to be used in 16-bit and 32-bit +modes. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define REQ_CU_MAX 5000 +#else +#define REQ_CU_MAX 2000 +#endif + +/* Offsets for the bitmap tables in the cbits set of tables. Each table +contains a set of bits for a class map. Some classes are built by combining +these tables. */ + +#define cbit_space 0 /* [:space:] or \s */ +#define cbit_xdigit 32 /* [:xdigit:] */ +#define cbit_digit 64 /* [:digit:] or \d */ +#define cbit_upper 96 /* [:upper:] */ +#define cbit_lower 128 /* [:lower:] */ +#define cbit_word 160 /* [:word:] or \w */ +#define cbit_graph 192 /* [:graph:] */ +#define cbit_print 224 /* [:print:] */ +#define cbit_punct 256 /* [:punct:] */ +#define cbit_cntrl 288 /* [:cntrl:] */ +#define cbit_length 320 /* Length of the cbits table */ + +/* Bit definitions for entries in the ctypes table. Do not change these values +without checking pcre2_jit_compile.c, which has an assertion to ensure that +ctype_word has the value 16. */ + +#define ctype_space 0x01 +#define ctype_letter 0x02 +#define ctype_lcletter 0x04 +#define ctype_digit 0x08 +#define ctype_word 0x10 /* alphanumeric or '_' */ + +/* Offsets of the various tables from the base tables pointer, and +total length of the tables. */ + +#define lcc_offset 0 /* Lower case */ +#define fcc_offset 256 /* Flip case */ +#define cbits_offset 512 /* Character classes */ +#define ctypes_offset (cbits_offset + cbit_length) /* Character types */ +#define TABLES_LENGTH (ctypes_offset + 256) + + +/* -------------------- Character and string names ------------------------ */ + +/* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal +character constants like '*' because the compiler would emit their EBCDIC code, +which is different from their ASCII/UTF-8 code. Instead we define macros for +the characters so that they always use the ASCII/UTF-8 code when UTF-8 support +is enabled. When UTF-8 support is not enabled, the definitions use character +literals. Both character and string versions of each character are needed, and +there are some longer strings as well. + +This means that, on EBCDIC platforms, the PCRE2 library can handle either +EBCDIC, or UTF-8, but not both. To support both in the same compiled library +would need different lookups depending on whether PCRE2_UTF was set or not. +This would make it impossible to use characters in switch/case statements, +which would reduce performance. For a theoretical use (which nobody has asked +for) in a minority area (EBCDIC platforms), this is not sensible. Any +application that did need both could compile two versions of the library, using +macros to give the functions distinct names. */ + +#ifndef SUPPORT_UNICODE + +/* UTF-8 support is not enabled; use the platform-dependent character literals +so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF +mode. Newline characters are problematic in EBCDIC. Though it has CR and LF +characters, a common practice has been to use its NL (0x15) character as the +line terminator in C-like processing environments. However, sometimes the LF +(0x25) character is used instead, according to this Unicode document: + +http://unicode.org/standard/reports/tr13/tr13-5.html + +PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25 +instead. Whichever is *not* chosen is defined as NEL. + +In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the +same code point. */ + +#ifdef EBCDIC + +#ifndef EBCDIC_NL25 +#define CHAR_NL '\x15' +#define CHAR_NEL '\x25' +#define STR_NL "\x15" +#define STR_NEL "\x25" +#else +#define CHAR_NL '\x25' +#define CHAR_NEL '\x15' +#define STR_NL "\x25" +#define STR_NEL "\x15" +#endif + +#define CHAR_LF CHAR_NL +#define STR_LF STR_NL + +#define CHAR_ESC '\047' +#define CHAR_DEL '\007' +#define CHAR_NBSP ((unsigned char)'\x41') +#define STR_ESC "\047" +#define STR_DEL "\007" + +#else /* Not EBCDIC */ + +/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for +compatibility. NEL is the Unicode newline character; make sure it is +a positive value. */ + +#define CHAR_LF '\n' +#define CHAR_NL CHAR_LF +#define CHAR_NEL ((unsigned char)'\x85') +#define CHAR_ESC '\033' +#define CHAR_DEL '\177' +#define CHAR_NBSP ((unsigned char)'\xa0') + +#define STR_LF "\n" +#define STR_NL STR_LF +#define STR_NEL "\x85" +#define STR_ESC "\033" +#define STR_DEL "\177" + +#endif /* EBCDIC */ + +/* The remaining definitions work in both environments. */ + +#define CHAR_NUL '\0' +#define CHAR_HT '\t' +#define CHAR_VT '\v' +#define CHAR_FF '\f' +#define CHAR_CR '\r' +#define CHAR_BS '\b' +#define CHAR_BEL '\a' + +#define CHAR_SPACE ' ' +#define CHAR_EXCLAMATION_MARK '!' +#define CHAR_QUOTATION_MARK '"' +#define CHAR_NUMBER_SIGN '#' +#define CHAR_DOLLAR_SIGN '$' +#define CHAR_PERCENT_SIGN '%' +#define CHAR_AMPERSAND '&' +#define CHAR_APOSTROPHE '\'' +#define CHAR_LEFT_PARENTHESIS '(' +#define CHAR_RIGHT_PARENTHESIS ')' +#define CHAR_ASTERISK '*' +#define CHAR_PLUS '+' +#define CHAR_COMMA ',' +#define CHAR_MINUS '-' +#define CHAR_DOT '.' +#define CHAR_SLASH '/' +#define CHAR_0 '0' +#define CHAR_1 '1' +#define CHAR_2 '2' +#define CHAR_3 '3' +#define CHAR_4 '4' +#define CHAR_5 '5' +#define CHAR_6 '6' +#define CHAR_7 '7' +#define CHAR_8 '8' +#define CHAR_9 '9' +#define CHAR_COLON ':' +#define CHAR_SEMICOLON ';' +#define CHAR_LESS_THAN_SIGN '<' +#define CHAR_EQUALS_SIGN '=' +#define CHAR_GREATER_THAN_SIGN '>' +#define CHAR_QUESTION_MARK '?' +#define CHAR_COMMERCIAL_AT '@' +#define CHAR_A 'A' +#define CHAR_B 'B' +#define CHAR_C 'C' +#define CHAR_D 'D' +#define CHAR_E 'E' +#define CHAR_F 'F' +#define CHAR_G 'G' +#define CHAR_H 'H' +#define CHAR_I 'I' +#define CHAR_J 'J' +#define CHAR_K 'K' +#define CHAR_L 'L' +#define CHAR_M 'M' +#define CHAR_N 'N' +#define CHAR_O 'O' +#define CHAR_P 'P' +#define CHAR_Q 'Q' +#define CHAR_R 'R' +#define CHAR_S 'S' +#define CHAR_T 'T' +#define CHAR_U 'U' +#define CHAR_V 'V' +#define CHAR_W 'W' +#define CHAR_X 'X' +#define CHAR_Y 'Y' +#define CHAR_Z 'Z' +#define CHAR_LEFT_SQUARE_BRACKET '[' +#define CHAR_BACKSLASH '\\' +#define CHAR_RIGHT_SQUARE_BRACKET ']' +#define CHAR_CIRCUMFLEX_ACCENT '^' +#define CHAR_UNDERSCORE '_' +#define CHAR_GRAVE_ACCENT '`' +#define CHAR_a 'a' +#define CHAR_b 'b' +#define CHAR_c 'c' +#define CHAR_d 'd' +#define CHAR_e 'e' +#define CHAR_f 'f' +#define CHAR_g 'g' +#define CHAR_h 'h' +#define CHAR_i 'i' +#define CHAR_j 'j' +#define CHAR_k 'k' +#define CHAR_l 'l' +#define CHAR_m 'm' +#define CHAR_n 'n' +#define CHAR_o 'o' +#define CHAR_p 'p' +#define CHAR_q 'q' +#define CHAR_r 'r' +#define CHAR_s 's' +#define CHAR_t 't' +#define CHAR_u 'u' +#define CHAR_v 'v' +#define CHAR_w 'w' +#define CHAR_x 'x' +#define CHAR_y 'y' +#define CHAR_z 'z' +#define CHAR_LEFT_CURLY_BRACKET '{' +#define CHAR_VERTICAL_LINE '|' +#define CHAR_RIGHT_CURLY_BRACKET '}' +#define CHAR_TILDE '~' + +#define STR_HT "\t" +#define STR_VT "\v" +#define STR_FF "\f" +#define STR_CR "\r" +#define STR_BS "\b" +#define STR_BEL "\a" + +#define STR_SPACE " " +#define STR_EXCLAMATION_MARK "!" +#define STR_QUOTATION_MARK "\"" +#define STR_NUMBER_SIGN "#" +#define STR_DOLLAR_SIGN "$" +#define STR_PERCENT_SIGN "%" +#define STR_AMPERSAND "&" +#define STR_APOSTROPHE "'" +#define STR_LEFT_PARENTHESIS "(" +#define STR_RIGHT_PARENTHESIS ")" +#define STR_ASTERISK "*" +#define STR_PLUS "+" +#define STR_COMMA "," +#define STR_MINUS "-" +#define STR_DOT "." +#define STR_SLASH "/" +#define STR_0 "0" +#define STR_1 "1" +#define STR_2 "2" +#define STR_3 "3" +#define STR_4 "4" +#define STR_5 "5" +#define STR_6 "6" +#define STR_7 "7" +#define STR_8 "8" +#define STR_9 "9" +#define STR_COLON ":" +#define STR_SEMICOLON ";" +#define STR_LESS_THAN_SIGN "<" +#define STR_EQUALS_SIGN "=" +#define STR_GREATER_THAN_SIGN ">" +#define STR_QUESTION_MARK "?" +#define STR_COMMERCIAL_AT "@" +#define STR_A "A" +#define STR_B "B" +#define STR_C "C" +#define STR_D "D" +#define STR_E "E" +#define STR_F "F" +#define STR_G "G" +#define STR_H "H" +#define STR_I "I" +#define STR_J "J" +#define STR_K "K" +#define STR_L "L" +#define STR_M "M" +#define STR_N "N" +#define STR_O "O" +#define STR_P "P" +#define STR_Q "Q" +#define STR_R "R" +#define STR_S "S" +#define STR_T "T" +#define STR_U "U" +#define STR_V "V" +#define STR_W "W" +#define STR_X "X" +#define STR_Y "Y" +#define STR_Z "Z" +#define STR_LEFT_SQUARE_BRACKET "[" +#define STR_BACKSLASH "\\" +#define STR_RIGHT_SQUARE_BRACKET "]" +#define STR_CIRCUMFLEX_ACCENT "^" +#define STR_UNDERSCORE "_" +#define STR_GRAVE_ACCENT "`" +#define STR_a "a" +#define STR_b "b" +#define STR_c "c" +#define STR_d "d" +#define STR_e "e" +#define STR_f "f" +#define STR_g "g" +#define STR_h "h" +#define STR_i "i" +#define STR_j "j" +#define STR_k "k" +#define STR_l "l" +#define STR_m "m" +#define STR_n "n" +#define STR_o "o" +#define STR_p "p" +#define STR_q "q" +#define STR_r "r" +#define STR_s "s" +#define STR_t "t" +#define STR_u "u" +#define STR_v "v" +#define STR_w "w" +#define STR_x "x" +#define STR_y "y" +#define STR_z "z" +#define STR_LEFT_CURLY_BRACKET "{" +#define STR_VERTICAL_LINE "|" +#define STR_RIGHT_CURLY_BRACKET "}" +#define STR_TILDE "~" + +#define STRING_ACCEPT0 "ACCEPT\0" +#define STRING_COMMIT0 "COMMIT\0" +#define STRING_F0 "F\0" +#define STRING_FAIL0 "FAIL\0" +#define STRING_MARK0 "MARK\0" +#define STRING_PRUNE0 "PRUNE\0" +#define STRING_SKIP0 "SKIP\0" +#define STRING_THEN "THEN" + +#define STRING_atomic0 "atomic\0" +#define STRING_pla0 "pla\0" +#define STRING_plb0 "plb\0" +#define STRING_napla0 "napla\0" +#define STRING_naplb0 "naplb\0" +#define STRING_nla0 "nla\0" +#define STRING_nlb0 "nlb\0" +#define STRING_sr0 "sr\0" +#define STRING_asr0 "asr\0" +#define STRING_positive_lookahead0 "positive_lookahead\0" +#define STRING_positive_lookbehind0 "positive_lookbehind\0" +#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0" +#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0" +#define STRING_negative_lookahead0 "negative_lookahead\0" +#define STRING_negative_lookbehind0 "negative_lookbehind\0" +#define STRING_script_run0 "script_run\0" +#define STRING_atomic_script_run "atomic_script_run" + +#define STRING_alpha0 "alpha\0" +#define STRING_lower0 "lower\0" +#define STRING_upper0 "upper\0" +#define STRING_alnum0 "alnum\0" +#define STRING_ascii0 "ascii\0" +#define STRING_blank0 "blank\0" +#define STRING_cntrl0 "cntrl\0" +#define STRING_digit0 "digit\0" +#define STRING_graph0 "graph\0" +#define STRING_print0 "print\0" +#define STRING_punct0 "punct\0" +#define STRING_space0 "space\0" +#define STRING_word0 "word\0" +#define STRING_xdigit "xdigit" + +#define STRING_DEFINE "DEFINE" +#define STRING_VERSION "VERSION" +#define STRING_WEIRD_STARTWORD "[:<:]]" +#define STRING_WEIRD_ENDWORD "[:>:]]" + +#define STRING_CR_RIGHTPAR "CR)" +#define STRING_LF_RIGHTPAR "LF)" +#define STRING_CRLF_RIGHTPAR "CRLF)" +#define STRING_ANY_RIGHTPAR "ANY)" +#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" +#define STRING_NUL_RIGHTPAR "NUL)" +#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" +#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" +#define STRING_UTF8_RIGHTPAR "UTF8)" +#define STRING_UTF16_RIGHTPAR "UTF16)" +#define STRING_UTF32_RIGHTPAR "UTF32)" +#define STRING_UTF_RIGHTPAR "UTF)" +#define STRING_UCP_RIGHTPAR "UCP)" +#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)" +#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)" +#define STRING_NO_JIT_RIGHTPAR "NO_JIT)" +#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" +#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" +#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" +#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP=" +#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" +#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" +#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" +#define STRING_MARK "MARK" + +#define STRING_bc "bc" +#define STRING_bidiclass "bidiclass" +#define STRING_sc "sc" +#define STRING_script "script" +#define STRING_scriptextensions "scriptextensions" +#define STRING_scx "scx" + +#else /* SUPPORT_UNICODE */ + +/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This +works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode +only. */ + +#define CHAR_HT '\011' +#define CHAR_VT '\013' +#define CHAR_FF '\014' +#define CHAR_CR '\015' +#define CHAR_LF '\012' +#define CHAR_NL CHAR_LF +#define CHAR_NEL ((unsigned char)'\x85') +#define CHAR_BS '\010' +#define CHAR_BEL '\007' +#define CHAR_ESC '\033' +#define CHAR_DEL '\177' + +#define CHAR_NUL '\0' +#define CHAR_SPACE '\040' +#define CHAR_EXCLAMATION_MARK '\041' +#define CHAR_QUOTATION_MARK '\042' +#define CHAR_NUMBER_SIGN '\043' +#define CHAR_DOLLAR_SIGN '\044' +#define CHAR_PERCENT_SIGN '\045' +#define CHAR_AMPERSAND '\046' +#define CHAR_APOSTROPHE '\047' +#define CHAR_LEFT_PARENTHESIS '\050' +#define CHAR_RIGHT_PARENTHESIS '\051' +#define CHAR_ASTERISK '\052' +#define CHAR_PLUS '\053' +#define CHAR_COMMA '\054' +#define CHAR_MINUS '\055' +#define CHAR_DOT '\056' +#define CHAR_SLASH '\057' +#define CHAR_0 '\060' +#define CHAR_1 '\061' +#define CHAR_2 '\062' +#define CHAR_3 '\063' +#define CHAR_4 '\064' +#define CHAR_5 '\065' +#define CHAR_6 '\066' +#define CHAR_7 '\067' +#define CHAR_8 '\070' +#define CHAR_9 '\071' +#define CHAR_COLON '\072' +#define CHAR_SEMICOLON '\073' +#define CHAR_LESS_THAN_SIGN '\074' +#define CHAR_EQUALS_SIGN '\075' +#define CHAR_GREATER_THAN_SIGN '\076' +#define CHAR_QUESTION_MARK '\077' +#define CHAR_COMMERCIAL_AT '\100' +#define CHAR_A '\101' +#define CHAR_B '\102' +#define CHAR_C '\103' +#define CHAR_D '\104' +#define CHAR_E '\105' +#define CHAR_F '\106' +#define CHAR_G '\107' +#define CHAR_H '\110' +#define CHAR_I '\111' +#define CHAR_J '\112' +#define CHAR_K '\113' +#define CHAR_L '\114' +#define CHAR_M '\115' +#define CHAR_N '\116' +#define CHAR_O '\117' +#define CHAR_P '\120' +#define CHAR_Q '\121' +#define CHAR_R '\122' +#define CHAR_S '\123' +#define CHAR_T '\124' +#define CHAR_U '\125' +#define CHAR_V '\126' +#define CHAR_W '\127' +#define CHAR_X '\130' +#define CHAR_Y '\131' +#define CHAR_Z '\132' +#define CHAR_LEFT_SQUARE_BRACKET '\133' +#define CHAR_BACKSLASH '\134' +#define CHAR_RIGHT_SQUARE_BRACKET '\135' +#define CHAR_CIRCUMFLEX_ACCENT '\136' +#define CHAR_UNDERSCORE '\137' +#define CHAR_GRAVE_ACCENT '\140' +#define CHAR_a '\141' +#define CHAR_b '\142' +#define CHAR_c '\143' +#define CHAR_d '\144' +#define CHAR_e '\145' +#define CHAR_f '\146' +#define CHAR_g '\147' +#define CHAR_h '\150' +#define CHAR_i '\151' +#define CHAR_j '\152' +#define CHAR_k '\153' +#define CHAR_l '\154' +#define CHAR_m '\155' +#define CHAR_n '\156' +#define CHAR_o '\157' +#define CHAR_p '\160' +#define CHAR_q '\161' +#define CHAR_r '\162' +#define CHAR_s '\163' +#define CHAR_t '\164' +#define CHAR_u '\165' +#define CHAR_v '\166' +#define CHAR_w '\167' +#define CHAR_x '\170' +#define CHAR_y '\171' +#define CHAR_z '\172' +#define CHAR_LEFT_CURLY_BRACKET '\173' +#define CHAR_VERTICAL_LINE '\174' +#define CHAR_RIGHT_CURLY_BRACKET '\175' +#define CHAR_TILDE '\176' +#define CHAR_NBSP ((unsigned char)'\xa0') + +#define STR_HT "\011" +#define STR_VT "\013" +#define STR_FF "\014" +#define STR_CR "\015" +#define STR_NL "\012" +#define STR_BS "\010" +#define STR_BEL "\007" +#define STR_ESC "\033" +#define STR_DEL "\177" + +#define STR_SPACE "\040" +#define STR_EXCLAMATION_MARK "\041" +#define STR_QUOTATION_MARK "\042" +#define STR_NUMBER_SIGN "\043" +#define STR_DOLLAR_SIGN "\044" +#define STR_PERCENT_SIGN "\045" +#define STR_AMPERSAND "\046" +#define STR_APOSTROPHE "\047" +#define STR_LEFT_PARENTHESIS "\050" +#define STR_RIGHT_PARENTHESIS "\051" +#define STR_ASTERISK "\052" +#define STR_PLUS "\053" +#define STR_COMMA "\054" +#define STR_MINUS "\055" +#define STR_DOT "\056" +#define STR_SLASH "\057" +#define STR_0 "\060" +#define STR_1 "\061" +#define STR_2 "\062" +#define STR_3 "\063" +#define STR_4 "\064" +#define STR_5 "\065" +#define STR_6 "\066" +#define STR_7 "\067" +#define STR_8 "\070" +#define STR_9 "\071" +#define STR_COLON "\072" +#define STR_SEMICOLON "\073" +#define STR_LESS_THAN_SIGN "\074" +#define STR_EQUALS_SIGN "\075" +#define STR_GREATER_THAN_SIGN "\076" +#define STR_QUESTION_MARK "\077" +#define STR_COMMERCIAL_AT "\100" +#define STR_A "\101" +#define STR_B "\102" +#define STR_C "\103" +#define STR_D "\104" +#define STR_E "\105" +#define STR_F "\106" +#define STR_G "\107" +#define STR_H "\110" +#define STR_I "\111" +#define STR_J "\112" +#define STR_K "\113" +#define STR_L "\114" +#define STR_M "\115" +#define STR_N "\116" +#define STR_O "\117" +#define STR_P "\120" +#define STR_Q "\121" +#define STR_R "\122" +#define STR_S "\123" +#define STR_T "\124" +#define STR_U "\125" +#define STR_V "\126" +#define STR_W "\127" +#define STR_X "\130" +#define STR_Y "\131" +#define STR_Z "\132" +#define STR_LEFT_SQUARE_BRACKET "\133" +#define STR_BACKSLASH "\134" +#define STR_RIGHT_SQUARE_BRACKET "\135" +#define STR_CIRCUMFLEX_ACCENT "\136" +#define STR_UNDERSCORE "\137" +#define STR_GRAVE_ACCENT "\140" +#define STR_a "\141" +#define STR_b "\142" +#define STR_c "\143" +#define STR_d "\144" +#define STR_e "\145" +#define STR_f "\146" +#define STR_g "\147" +#define STR_h "\150" +#define STR_i "\151" +#define STR_j "\152" +#define STR_k "\153" +#define STR_l "\154" +#define STR_m "\155" +#define STR_n "\156" +#define STR_o "\157" +#define STR_p "\160" +#define STR_q "\161" +#define STR_r "\162" +#define STR_s "\163" +#define STR_t "\164" +#define STR_u "\165" +#define STR_v "\166" +#define STR_w "\167" +#define STR_x "\170" +#define STR_y "\171" +#define STR_z "\172" +#define STR_LEFT_CURLY_BRACKET "\173" +#define STR_VERTICAL_LINE "\174" +#define STR_RIGHT_CURLY_BRACKET "\175" +#define STR_TILDE "\176" + +#define STRING_ACCEPT0 STR_A STR_C STR_C STR_E STR_P STR_T "\0" +#define STRING_COMMIT0 STR_C STR_O STR_M STR_M STR_I STR_T "\0" +#define STRING_F0 STR_F "\0" +#define STRING_FAIL0 STR_F STR_A STR_I STR_L "\0" +#define STRING_MARK0 STR_M STR_A STR_R STR_K "\0" +#define STRING_PRUNE0 STR_P STR_R STR_U STR_N STR_E "\0" +#define STRING_SKIP0 STR_S STR_K STR_I STR_P "\0" +#define STRING_THEN STR_T STR_H STR_E STR_N + +#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0" +#define STRING_pla0 STR_p STR_l STR_a "\0" +#define STRING_plb0 STR_p STR_l STR_b "\0" +#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0" +#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0" +#define STRING_nla0 STR_n STR_l STR_a "\0" +#define STRING_nlb0 STR_n STR_l STR_b "\0" +#define STRING_sr0 STR_s STR_r "\0" +#define STRING_asr0 STR_a STR_s STR_r "\0" +#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" +#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" +#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" +#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" +#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0" +#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0" +#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0" +#define STRING_atomic_script_run STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n + +#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0" +#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0" +#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0" +#define STRING_alnum0 STR_a STR_l STR_n STR_u STR_m "\0" +#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0" +#define STRING_blank0 STR_b STR_l STR_a STR_n STR_k "\0" +#define STRING_cntrl0 STR_c STR_n STR_t STR_r STR_l "\0" +#define STRING_digit0 STR_d STR_i STR_g STR_i STR_t "\0" +#define STRING_graph0 STR_g STR_r STR_a STR_p STR_h "\0" +#define STRING_print0 STR_p STR_r STR_i STR_n STR_t "\0" +#define STRING_punct0 STR_p STR_u STR_n STR_c STR_t "\0" +#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0" +#define STRING_word0 STR_w STR_o STR_r STR_d "\0" +#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t + +#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E +#define STRING_VERSION STR_V STR_E STR_R STR_S STR_I STR_O STR_N +#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET +#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET + +#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS +#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS +#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS +#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS +#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS +#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS +#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS +#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS +#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS +#define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS +#define STRING_NO_JIT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS +#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS +#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS +#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS +#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN +#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN +#define STRING_MARK STR_M STR_A STR_R STR_K + +#define STRING_bc STR_b STR_c +#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s +#define STRING_sc STR_s STR_c +#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t +#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s +#define STRING_scx STR_s STR_c STR_x + + +#endif /* SUPPORT_UNICODE */ + +/* -------------------- End of character and string names -------------------*/ + +/* -------------------- Definitions for compiled patterns -------------------*/ + +/* Codes for different types of Unicode property. If these definitions are +changed, the autopossessifying table in pcre2_auto_possess.c must be updated to +match. */ + +#define PT_ANY 0 /* Any property - matches all chars */ +#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ +#define PT_GC 2 /* Specified general characteristic (e.g. L) */ +#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ +#define PT_SC 4 /* Script only (e.g. Han) */ +#define PT_SCX 5 /* Script extensions (includes SC) */ +#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ +#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ +#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ +#define PT_WORD 9 /* Word - L, N, Mn, or Pc */ +#define PT_CLIST 10 /* Pseudo-property: match character list */ +#define PT_UCNC 11 /* Universal Character nameable character */ +#define PT_BIDICL 12 /* Specified bidi class */ +#define PT_BOOL 13 /* Boolean property */ +#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ + +/* The following special properties are used only in XCLASS items, when POSIX +classes are specified and PCRE2_UCP is set - in other words, for Unicode +handling of these classes. They are not available via the \p or \P escapes like +those in the above list, and so they do not take part in the autopossessifying +table. */ + +#define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */ +#define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */ +#define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */ +#define PT_PXXDIGIT 17 /* [:xdigit:] - hex digits */ + +/* This value is used when parsing \p and \P escapes to indicate that neither +\p{script:...} nor \p{scx:...} has been encountered. */ + +#define PT_NOTSCRIPT 255 + +/* Flag bits and data types for the extended class (OP_XCLASS) for classes that +contain characters with values greater than 255. */ + +#define XCL_NOT 0x01 /* Flag: this is a negative class */ +#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ +#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ + +#define XCL_END 0 /* Marks end of individual items */ +#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ + +/* These are escaped items that aren't just an encoding of a particular data +value such as \n. They must have non-zero values, as check_escape() returns 0 +for a data character. In the escapes[] table in pcre2_compile.c their values +are negated in order to distinguish them from data values. + +They must appear here in the same order as in the opcode definitions below, up +to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL +mode rather than an escape sequence. It is also used for [^] in JavaScript +compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves +like \N. + +ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not +followed by hex digits and }, in which case it should mean a literal "u" +followed by a literal "{". This hack is necessary for cases like \u{ 12} +because without it, this is interpreted as u{12} now that spaces are allowed in +quantifiers. + +Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in +check_escape(). There are tests in the code for an escape greater than ESC_b +and less than ESC_Z to detect the types that may be repeated. These are the +types that consume characters. If any new escapes are put in between that don't +consume a character, that code will have to change. */ + +enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, + ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, + ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, + ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub }; + + +/********************** Opcode definitions ******************/ + +/****** NOTE NOTE NOTE ****** + +Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in +order to the list of escapes immediately above. Furthermore, values up to +OP_DOLLM must not be changed without adjusting the table called autoposstab in +pcre2_auto_possess.c. + +Whenever this list is updated, the two macro definitions that follow must be +updated to match. The possessification table called "opcode_possessify" in +pcre2_compile.c must also be updated, and also the tables called "coptable" +and "poptable" in pcre2_dfa_match.c. + +****** NOTE NOTE NOTE ******/ + + +/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive, +are used in a table for deciding whether a repeated character type can be +auto-possessified. */ + +#define FIRST_AUTOTAB_OP OP_NOT_DIGIT +#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI +#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM + +enum { + OP_END, /* 0 End of pattern */ + + /* Values corresponding to backslashed metacharacters */ + + OP_SOD, /* 1 Start of data: \A */ + OP_SOM, /* 2 Start of match (subject + offset): \G */ + OP_SET_SOM, /* 3 Set start of match (\K) */ + OP_NOT_WORD_BOUNDARY, /* 4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */ + OP_WORD_BOUNDARY, /* 5 \b -- see also OP_UCP_WORD_BOUNDARY */ + OP_NOT_DIGIT, /* 6 \D */ + OP_DIGIT, /* 7 \d */ + OP_NOT_WHITESPACE, /* 8 \S */ + OP_WHITESPACE, /* 9 \s */ + OP_NOT_WORDCHAR, /* 10 \W */ + OP_WORDCHAR, /* 11 \w */ + + OP_ANY, /* 12 Match any character except newline (\N) */ + OP_ALLANY, /* 13 Match any character */ + OP_ANYBYTE, /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */ + OP_NOTPROP, /* 15 \P (not Unicode property) */ + OP_PROP, /* 16 \p (Unicode property) */ + OP_ANYNL, /* 17 \R (any newline sequence) */ + OP_NOT_HSPACE, /* 18 \H (not horizontal whitespace) */ + OP_HSPACE, /* 19 \h (horizontal whitespace) */ + OP_NOT_VSPACE, /* 20 \V (not vertical whitespace) */ + OP_VSPACE, /* 21 \v (vertical whitespace) */ + OP_EXTUNI, /* 22 \X (extended Unicode sequence */ + OP_EODN, /* 23 End of data or \n at end of data (\Z) */ + OP_EOD, /* 24 End of data (\z) */ + + /* Line end assertions */ + + OP_DOLL, /* 25 End of line - not multiline */ + OP_DOLLM, /* 26 End of line - multiline */ + OP_CIRC, /* 27 Start of line - not multiline */ + OP_CIRCM, /* 28 Start of line - multiline */ + + /* Single characters; caseful must precede the caseless ones, and these + must remain in this order, and adjacent. */ + + OP_CHAR, /* 29 Match one character, casefully */ + OP_CHARI, /* 30 Match one character, caselessly */ + OP_NOT, /* 31 Match one character, not the given one, casefully */ + OP_NOTI, /* 32 Match one character, not the given one, caselessly */ + + /* The following sets of 13 opcodes must always be kept in step because + the offset from the first one is used to generate the others. */ + + /* Repeated characters; caseful must precede the caseless ones */ + + OP_STAR, /* 33 The maximizing and minimizing versions of */ + OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ + OP_PLUS, /* 35 the minimizing one second. */ + OP_MINPLUS, /* 36 */ + OP_QUERY, /* 37 */ + OP_MINQUERY, /* 38 */ + + OP_UPTO, /* 39 From 0 to n matches of one character, caseful*/ + OP_MINUPTO, /* 40 */ + OP_EXACT, /* 41 Exactly n matches */ + + OP_POSSTAR, /* 42 Possessified star, caseful */ + OP_POSPLUS, /* 43 Possessified plus, caseful */ + OP_POSQUERY, /* 44 Posesssified query, caseful */ + OP_POSUPTO, /* 45 Possessified upto, caseful */ + + /* Repeated characters; caseless must follow the caseful ones */ + + OP_STARI, /* 46 */ + OP_MINSTARI, /* 47 */ + OP_PLUSI, /* 48 */ + OP_MINPLUSI, /* 49 */ + OP_QUERYI, /* 50 */ + OP_MINQUERYI, /* 51 */ + + OP_UPTOI, /* 52 From 0 to n matches of one character, caseless */ + OP_MINUPTOI, /* 53 */ + OP_EXACTI, /* 54 */ + + OP_POSSTARI, /* 55 Possessified star, caseless */ + OP_POSPLUSI, /* 56 Possessified plus, caseless */ + OP_POSQUERYI, /* 57 Posesssified query, caseless */ + OP_POSUPTOI, /* 58 Possessified upto, caseless */ + + /* The negated ones must follow the non-negated ones, and match them */ + /* Negated repeated character, caseful; must precede the caseless ones */ + + OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ + OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ + OP_NOTPLUS, /* 61 the minimizing one second. They must be in */ + OP_NOTMINPLUS, /* 62 exactly the same order as those above. */ + OP_NOTQUERY, /* 63 */ + OP_NOTMINQUERY, /* 64 */ + + OP_NOTUPTO, /* 65 From 0 to n matches, caseful */ + OP_NOTMINUPTO, /* 66 */ + OP_NOTEXACT, /* 67 Exactly n matches */ + + OP_NOTPOSSTAR, /* 68 Possessified versions, caseful */ + OP_NOTPOSPLUS, /* 69 */ + OP_NOTPOSQUERY, /* 70 */ + OP_NOTPOSUPTO, /* 71 */ + + /* Negated repeated character, caseless; must follow the caseful ones */ + + OP_NOTSTARI, /* 72 */ + OP_NOTMINSTARI, /* 73 */ + OP_NOTPLUSI, /* 74 */ + OP_NOTMINPLUSI, /* 75 */ + OP_NOTQUERYI, /* 76 */ + OP_NOTMINQUERYI, /* 77 */ + + OP_NOTUPTOI, /* 78 From 0 to n matches, caseless */ + OP_NOTMINUPTOI, /* 79 */ + OP_NOTEXACTI, /* 80 Exactly n matches */ + + OP_NOTPOSSTARI, /* 81 Possessified versions, caseless */ + OP_NOTPOSPLUSI, /* 82 */ + OP_NOTPOSQUERYI, /* 83 */ + OP_NOTPOSUPTOI, /* 84 */ + + /* Character types */ + + OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ + OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ + OP_TYPEPLUS, /* 87 the minimizing one second. These codes must */ + OP_TYPEMINPLUS, /* 88 be in exactly the same order as those above. */ + OP_TYPEQUERY, /* 89 */ + OP_TYPEMINQUERY, /* 90 */ + + OP_TYPEUPTO, /* 91 From 0 to n matches */ + OP_TYPEMINUPTO, /* 92 */ + OP_TYPEEXACT, /* 93 Exactly n matches */ + + OP_TYPEPOSSTAR, /* 94 Possessified versions */ + OP_TYPEPOSPLUS, /* 95 */ + OP_TYPEPOSQUERY, /* 96 */ + OP_TYPEPOSUPTO, /* 97 */ + + /* These are used for character classes and back references; only the + first six are the same as the sets above. */ + + OP_CRSTAR, /* 98 The maximizing and minimizing versions of */ + OP_CRMINSTAR, /* 99 all these opcodes must come in pairs, with */ + OP_CRPLUS, /* 100 the minimizing one second. These codes must */ + OP_CRMINPLUS, /* 101 be in exactly the same order as those above. */ + OP_CRQUERY, /* 102 */ + OP_CRMINQUERY, /* 103 */ + + OP_CRRANGE, /* 104 These are different to the three sets above. */ + OP_CRMINRANGE, /* 105 */ + + OP_CRPOSSTAR, /* 106 Possessified versions */ + OP_CRPOSPLUS, /* 107 */ + OP_CRPOSQUERY, /* 108 */ + OP_CRPOSRANGE, /* 109 */ + + /* End of quantifier opcodes */ + + OP_CLASS, /* 110 Match a character class, chars < 256 only */ + OP_NCLASS, /* 111 Same, but the bitmap was created from a negative + class - the difference is relevant only when a + character > 255 is encountered. */ + OP_XCLASS, /* 112 Extended class for handling > 255 chars within the + class. This does both positive and negative. */ + OP_REF, /* 113 Match a back reference, casefully */ + OP_REFI, /* 114 Match a back reference, caselessly */ + OP_DNREF, /* 115 Match a duplicate name backref, casefully */ + OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ + OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 118 Call out to external function if provided */ + OP_CALLOUT_STR, /* 119 Call out with string argument */ + + OP_ALT, /* 120 Start of alternation */ + OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 122 These two must remain together and in this */ + OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ + OP_KETRPOS, /* 124 Possessive unlimited repeat. */ + + /* The assertions must come before BRA, CBRA, ONCE, and COND. */ + + OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ + OP_VREVERSE, /* 126 Move pointer back - variable */ + OP_ASSERT, /* 127 Positive lookahead */ + OP_ASSERT_NOT, /* 128 Negative lookahead */ + OP_ASSERTBACK, /* 129 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */ + OP_ASSERT_NA, /* 131 Positive non-atomic lookahead */ + OP_ASSERTBACK_NA, /* 132 Positive non-atomic lookbehind */ + + /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come + immediately after the assertions, with ONCE first, as there's a test for >= + ONCE for a subpattern that isn't an assertion. The POS versions must + immediately follow the non-POS versions in each case. */ + + OP_ONCE, /* 133 Atomic group, contains captures */ + OP_SCRIPT_RUN, /* 134 Non-capture, but check characters' scripts */ + OP_BRA, /* 135 Start of non-capturing bracket */ + OP_BRAPOS, /* 136 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 137 Start of capturing bracket */ + OP_CBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 139 Conditional group */ + + /* These five must follow the previous five, in the same order. There's a + check for >= SBRA to distinguish the two sets. */ + + OP_SBRA, /* 140 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 141 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 142 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 143 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 144 Conditional group, check empty */ + + /* The next two pairs must (respectively) be kept together. */ + + OP_CREF, /* 145 Used to hold a capture number as condition */ + OP_DNCREF, /* 146 Used to point to duplicate names as a condition */ + OP_RREF, /* 147 Used to hold a recursion number as condition */ + OP_DNRREF, /* 148 Used to point to duplicate names as a condition */ + OP_FALSE, /* 149 Always false (used by DEFINE and VERSION) */ + OP_TRUE, /* 150 Always true (used by VERSION) */ + + OP_BRAZERO, /* 151 These two must remain together and in this */ + OP_BRAMINZERO, /* 152 order. */ + OP_BRAPOSZERO, /* 153 */ + + /* These are backtracking control verbs */ + + OP_MARK, /* 154 always has an argument */ + OP_PRUNE, /* 155 */ + OP_PRUNE_ARG, /* 156 same, but with argument */ + OP_SKIP, /* 157 */ + OP_SKIP_ARG, /* 158 same, but with argument */ + OP_THEN, /* 159 */ + OP_THEN_ARG, /* 160 same, but with argument */ + OP_COMMIT, /* 161 */ + OP_COMMIT_ARG, /* 162 same, but with argument */ + + /* These are forced failure and success verbs. FAIL and ACCEPT do accept an + argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) + without the need for a special opcode. */ + + OP_FAIL, /* 163 */ + OP_ACCEPT, /* 164 */ + OP_ASSERT_ACCEPT, /* 165 Used inside assertions */ + OP_CLOSE, /* 166 Used before OP_ACCEPT to close open captures */ + + /* This is used to skip a subpattern with a {0} quantifier */ + + OP_SKIPZERO, /* 167 */ + + /* This is used to identify a DEFINE group during compilation so that it can + be checked for having only one branch. It is changed to OP_FALSE before + compilation finishes. */ + + OP_DEFINE, /* 168 */ + + /* These opcodes replace their normal counterparts in UCP mode when + PCRE2_EXTRA_ASCII_BSW is not set. */ + + OP_NOT_UCP_WORD_BOUNDARY, /* 169 */ + OP_UCP_WORD_BOUNDARY, /* 170 */ + + /* This is not an opcode, but is used to check that tables indexed by opcode + are the correct length, in order to catch updating errors - there have been + some in the past. */ + + OP_TABLE_LENGTH + +}; + +/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro +definitions that follow must also be updated to match. There are also tables +called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in +pcre2_dfa_match.c that must be updated. */ + + +/* This macro defines textual names for all the opcodes. These are used only +for debugging, and some of them are only partial names. The macro is referenced +only in pcre2_printint.c, which fills out the full names in many cases (and in +some cases doesn't actually use these names at all). */ + +#define OP_NAME_LIST \ + "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \ + "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ + "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ + "extuni", "\\Z", "\\z", \ + "$", "$", "^", "^", "char", "chari", "not", "noti", \ + "*", "*?", "+", "+?", "?", "??", \ + "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", \ + "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", \ + "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", \ + "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ + "*+","++", "?+", "{", \ + "*", "*?", "+", "+?", "?", "??", "{", "{", \ + "*+","++", "?+", "{", \ + "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ + "Recurse", "Callout", "CalloutStr", \ + "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ + "Reverse", "VReverse", "Assert", "Assert not", \ + "Assert back", "Assert back not", \ + "Non-atomic assert", "Non-atomic assert back", \ + "Once", \ + "Script run", \ + "Bra", "BraPos", "CBra", "CBraPos", \ + "Cond", \ + "SBra", "SBraPos", "SCBra", "SCBraPos", \ + "SCond", \ + "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", \ + "Cond false", "Cond true", \ + "Brazero", "Braminzero", "Braposzero", \ + "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ + "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \ + "*ACCEPT", "*ASSERT_ACCEPT", \ + "Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)" + + +/* This macro defines the length of fixed length operations in the compiled +regex. The lengths are used when searching for specific things, and also in the +debugging printing of a compiled regex. We use a macro so that it can be +defined close to the definitions of the opcodes themselves. + +As things have been extended, some of these are no longer fixed lenths, but are +minima instead. For example, the length of a single-character repeat may vary +in UTF-8 mode. The code that uses this table must know about such things. */ + +#define OP_LENGTHS \ + 1, /* End */ \ + 1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \ + 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \ + 1, 1, 1, /* Any, AllAny, Anybyte */ \ + 3, 3, /* \P, \p */ \ + 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ + 1, /* \X */ \ + 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ + 2, /* Char - the minimum length */ \ + 2, /* Chari - the minimum length */ \ + 2, /* not */ \ + 2, /* noti */ \ + /* Positive single-char repeats ** These are */ \ + 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ + 2+IMM2_SIZE, /* exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ + 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ + 2+IMM2_SIZE, /* exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ + /* Negative single-char repeats - only for chars < 256 */ \ + 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ + 2+IMM2_SIZE, /* NOT exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ + 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ + 2+IMM2_SIZE, /* NOT exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ + /* Positive type repeats */ \ + 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ + 2+IMM2_SIZE, /* Type exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ + /* Character class & ref repeats */ \ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ + 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ + 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \ + 1+(32/sizeof(PCRE2_UCHAR)), /* CLASS */ \ + 1+(32/sizeof(PCRE2_UCHAR)), /* NCLASS */ \ + 0, /* XCLASS - variable length */ \ + 1+IMM2_SIZE, /* REF */ \ + 1+IMM2_SIZE, /* REFI */ \ + 1+2*IMM2_SIZE, /* DNREF */ \ + 1+2*IMM2_SIZE, /* DNREFI */ \ + 1+LINK_SIZE, /* RECURSE */ \ + 1+2*LINK_SIZE+1, /* CALLOUT */ \ + 0, /* CALLOUT_STR - variable length */ \ + 1+LINK_SIZE, /* Alt */ \ + 1+LINK_SIZE, /* Ket */ \ + 1+LINK_SIZE, /* KetRmax */ \ + 1+LINK_SIZE, /* KetRmin */ \ + 1+LINK_SIZE, /* KetRpos */ \ + 1+IMM2_SIZE, /* Reverse */ \ + 1+2*IMM2_SIZE, /* VReverse */ \ + 1+LINK_SIZE, /* Assert */ \ + 1+LINK_SIZE, /* Assert not */ \ + 1+LINK_SIZE, /* Assert behind */ \ + 1+LINK_SIZE, /* Assert behind not */ \ + 1+LINK_SIZE, /* NA Assert */ \ + 1+LINK_SIZE, /* NA Assert behind */ \ + 1+LINK_SIZE, /* ONCE */ \ + 1+LINK_SIZE, /* SCRIPT_RUN */ \ + 1+LINK_SIZE, /* BRA */ \ + 1+LINK_SIZE, /* BRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ + 1+LINK_SIZE, /* COND */ \ + 1+LINK_SIZE, /* SBRA */ \ + 1+LINK_SIZE, /* SBRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ + 1+LINK_SIZE, /* SCOND */ \ + 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \ + 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \ + 1, 1, /* FALSE, TRUE */ \ + 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ + 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ + 1, 3, /* SKIP, SKIP_ARG */ \ + 1, 3, /* THEN, THEN_ARG */ \ + 1, 3, /* COMMIT, COMMIT_ARG */ \ + 1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \ + 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ + 1, /* DEFINE */ \ + 1, 1 /* \B and \b in UCP mode */ + +/* A magic value for OP_RREF to indicate the "any recursion" condition. */ + +#define RREF_ANY 0xffff + + +/* ---------- Private structures that are mode-independent. ---------- */ + +/* Structure to hold data for custom memory management. */ + +typedef struct pcre2_memctl { + void * (*malloc)(size_t, void *); + void (*free)(void *, void *); + void *memory_data; +} pcre2_memctl; + +/* Structure for building a chain of open capturing subpatterns during +compiling, so that instructions to close them can be compiled when (*ACCEPT) is +encountered. */ + +typedef struct open_capitem { + struct open_capitem *next; /* Chain link */ + uint16_t number; /* Capture number */ + uint16_t assert_depth; /* Assertion depth when opened */ +} open_capitem; + +/* Layout of the UCP type table that translates property names into types and +codes. Each entry used to point directly to a name, but to reduce the number of +relocations in shared libraries, it now has an offset into a single string +instead. */ + +typedef struct { + uint16_t name_offset; + uint16_t type; + uint16_t value; +} ucp_type_table; + +/* Unicode character database (UCD) record format */ + +typedef struct { + uint8_t script; /* ucp_Arabic, etc. */ + uint8_t chartype; /* ucp_Cc, etc. (general categories) */ + uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ + uint8_t caseset; /* offset to multichar other cases or zero */ + int32_t other_case; /* offset to other case, or zero if none */ + uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */ + uint16_t bprops; /* binary properties offset */ +} ucd_record; + +/* UCD access macros */ + +#define UCD_BLOCK_SIZE 128 +#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \ + PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \ + UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) + +#if PCRE2_CODE_UNIT_WIDTH == 32 +#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \ + PRIV(dummy_ucd_record) : REAL_GET_UCD(ch)) +#else +#define GET_UCD(ch) REAL_GET_UCD(ch) +#endif + +#define UCD_SCRIPTX_MASK 0x3ff +#define UCD_BIDICLASS_SHIFT 11 +#define UCD_BPROPS_MASK 0xfff + +#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK) +#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT) +#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK) + +#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype +#define UCD_SCRIPT(ch) GET_UCD(ch)->script +#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] +#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop +#define UCD_CASESET(ch) GET_UCD(ch)->caseset +#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) +#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) +#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) +#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) + +/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words +that form a bitmap representing a list of scripts or boolean properties. These +macros test or set a bit in the map by number. */ + +#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32))) +#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32))) + +/* Header for serialized pcre2 codes. */ + +typedef struct pcre2_serialized_data { + uint32_t magic; + uint32_t version; + uint32_t config; + int32_t number_of_codes; +} pcre2_serialized_data; + + + +/* ----------------- Items that need PCRE2_CODE_UNIT_WIDTH ----------------- */ + +/* When this file is included by pcre2test, PCRE2_CODE_UNIT_WIDTH is defined as +0, so the following items are omitted. */ + +#if defined PCRE2_CODE_UNIT_WIDTH && PCRE2_CODE_UNIT_WIDTH != 0 + +/* EBCDIC is supported only for the 8-bit library. */ + +#if defined EBCDIC && PCRE2_CODE_UNIT_WIDTH != 8 +#error EBCDIC is not supported for the 16-bit or 32-bit libraries +#endif + +/* This is the largest non-UTF code point. */ + +#define MAX_NON_UTF_CHAR (0xffffffffU >> (32 - PCRE2_CODE_UNIT_WIDTH)) + +/* Internal shared data tables and variables. These are used by more than one +of the exported public functions. They have to be "external" in the C sense, +but are not part of the PCRE2 public API. Although the data for some of them is +identical in all libraries, they must have different names so that multiple +libraries can be simultaneously linked to a single application. However, UTF-8 +tables are needed only when compiling the 8-bit library. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +extern const int PRIV(utf8_table1)[]; +extern const int PRIV(utf8_table1_size); +extern const int PRIV(utf8_table2)[]; +extern const int PRIV(utf8_table3)[]; +extern const uint8_t PRIV(utf8_table4)[]; +#endif + +#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_) +#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_) +#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_) +#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_) +#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_) +#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_) +#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_) +#if PCRE2_CODE_UNIT_WIDTH == 32 +#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_) +#endif +#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) +#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) +#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) +#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) +#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) +#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) +#define _pcre2_ucd_records PCRE2_SUFFIX(_pcre2_ucd_records_) +#define _pcre2_ucd_stage1 PCRE2_SUFFIX(_pcre2_ucd_stage1_) +#define _pcre2_ucd_stage2 PCRE2_SUFFIX(_pcre2_ucd_stage2_) +#define _pcre2_ucp_gbtable PCRE2_SUFFIX(_pcre2_ucp_gbtable_) +#define _pcre2_ucp_gentype PCRE2_SUFFIX(_pcre2_ucp_gentype_) +#define _pcre2_ucp_typerange PCRE2_SUFFIX(_pcre2_ucp_typerange_) +#define _pcre2_unicode_version PCRE2_SUFFIX(_pcre2_unicode_version_) +#define _pcre2_utt PCRE2_SUFFIX(_pcre2_utt_) +#define _pcre2_utt_names PCRE2_SUFFIX(_pcre2_utt_names_) +#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_) + +extern const uint8_t PRIV(OP_lengths)[]; +extern const uint32_t PRIV(callout_end_delims)[]; +extern const uint32_t PRIV(callout_start_delims)[]; +extern const pcre2_compile_context PRIV(default_compile_context); +extern const pcre2_convert_context PRIV(default_convert_context); +extern const pcre2_match_context PRIV(default_match_context); +extern const uint8_t PRIV(default_tables)[]; +extern const uint32_t PRIV(hspace_list)[]; +extern const uint32_t PRIV(vspace_list)[]; +extern const uint32_t PRIV(ucd_boolprop_sets)[]; +extern const uint32_t PRIV(ucd_caseless_sets)[]; +extern const uint32_t PRIV(ucd_digit_sets)[]; +extern const uint32_t PRIV(ucd_script_sets)[]; +extern const ucd_record PRIV(ucd_records)[]; +#if PCRE2_CODE_UNIT_WIDTH == 32 +extern const ucd_record PRIV(dummy_ucd_record)[]; +#endif +extern const uint16_t PRIV(ucd_stage1)[]; +extern const uint16_t PRIV(ucd_stage2)[]; +extern const uint32_t PRIV(ucp_gbtable)[]; +extern const uint32_t PRIV(ucp_gentype)[]; +#ifdef SUPPORT_JIT +extern const int PRIV(ucp_typerange)[]; +#endif +extern const char *PRIV(unicode_version); +extern const ucp_type_table PRIV(utt)[]; +extern const char PRIV(utt_names)[]; +extern const size_t PRIV(utt_size); + +/* Mode-dependent macros and hidden and private structures are defined in a +separate file so that pcre2test can include them at all supported widths. When +compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can +include them at the appropriate width, after setting up suffix macros for the +private structures. */ + +#define branch_chain PCRE2_SUFFIX(branch_chain_) +#define compile_block PCRE2_SUFFIX(compile_block_) +#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_) +#define match_block PCRE2_SUFFIX(match_block_) +#define named_group PCRE2_SUFFIX(named_group_) + +#include "pcre2_intmodedep.h" + +/* Private "external" functions. These are internal functions that are called +from modules other than the one in which they are defined. They have to be +"external" in the C sense, but are not part of the PCRE2 public API. They are +not referenced from pcre2test, and must not be defined when no code unit width +is available. */ + +#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_) +#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_) +#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_) +#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_) +#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_) +#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_) +#define _pcre2_jit_free PCRE2_SUFFIX(_pcre2_jit_free_) +#define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_) +#define _pcre2_jit_get_target PCRE2_SUFFIX(_pcre2_jit_get_target_) +#define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_) +#define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_) +#define _pcre2_script_run PCRE2_SUFFIX(_pcre2_script_run_) +#define _pcre2_strcmp PCRE2_SUFFIX(_pcre2_strcmp_) +#define _pcre2_strcmp_c8 PCRE2_SUFFIX(_pcre2_strcmp_c8_) +#define _pcre2_strcpy_c8 PCRE2_SUFFIX(_pcre2_strcpy_c8_) +#define _pcre2_strlen PCRE2_SUFFIX(_pcre2_strlen_) +#define _pcre2_strncmp PCRE2_SUFFIX(_pcre2_strncmp_) +#define _pcre2_strncmp_c8 PCRE2_SUFFIX(_pcre2_strncmp_c8_) +#define _pcre2_study PCRE2_SUFFIX(_pcre2_study_) +#define _pcre2_valid_utf PCRE2_SUFFIX(_pcre2_valid_utf_) +#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) +#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) + +extern int _pcre2_auto_possessify(PCRE2_UCHAR *, + const compile_block *); +extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *, + int *, uint32_t, uint32_t, BOOL, compile_block *); +extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR, + BOOL, int *); +extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); +extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, + uint32_t *, BOOL); +extern void _pcre2_jit_free_rodata(void *, void *); +extern void _pcre2_jit_free(void *, pcre2_memctl *); +extern size_t _pcre2_jit_get_size(void *); +const char * _pcre2_jit_get_target(void); +extern void * _pcre2_memctl_malloc(size_t, pcre2_memctl *); +extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); +extern BOOL _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL); +extern int _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR); +extern int _pcre2_strcmp_c8(PCRE2_SPTR, const char *); +extern PCRE2_SIZE _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *); +extern PCRE2_SIZE _pcre2_strlen(PCRE2_SPTR); +extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t); +extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t); +extern int _pcre2_study(pcre2_real_code *); +extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); +extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, + uint32_t *, BOOL); +extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); + +/* This function is needed only when memmove() is not available. */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove) +extern void * _pcre2_memmove(void *, const void *, size_t); +#endif + +#endif /* PCRE2_CODE_UNIT_WIDTH */ + +extern BOOL PRIV(ckd_smul)(PCRE2_SIZE *, int, int); + +#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ + +/* End of pcre2_internal.h */ diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h new file mode 100644 index 0000000..9bd9e69 --- /dev/null +++ b/src/pcre2_intmodedep.h @@ -0,0 +1,940 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains mode-dependent macro and structure definitions. The +file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined. +These mode-dependent items are kept in a separate file so that they can also be +#included multiple times for different code unit widths by pcre2test in order +to have access to the hidden structures at all supported widths. + +Some of the mode-dependent macros are required at different widths for +different parts of the pcre2test code (in particular, the included +pcre_printint.c file). We undefine them here so that they can be re-defined for +multiple inclusions. Not all of these are used in pcre2test, but it's easier +just to undefine them all. */ + +#undef ACROSSCHAR +#undef BACKCHAR +#undef BYTES2CU +#undef CHMAX_255 +#undef CU2BYTES +#undef FORWARDCHAR +#undef FORWARDCHARTEST +#undef GET +#undef GET2 +#undef GETCHAR +#undef GETCHARINC +#undef GETCHARINCTEST +#undef GETCHARLEN +#undef GETCHARLENTEST +#undef GETCHARTEST +#undef GET_EXTRALEN +#undef HAS_EXTRALEN +#undef IMM2_SIZE +#undef MAX_255 +#undef MAX_MARK +#undef MAX_PATTERN_SIZE +#undef MAX_UTF_SINGLE_CU +#undef NOT_FIRSTCU +#undef PUT +#undef PUT2 +#undef PUT2INC +#undef PUTCHAR +#undef PUTINC +#undef TABLE_GET + + + +/* -------------------------- MACROS ----------------------------- */ + +/* PCRE keeps offsets in its compiled code as at least 16-bit quantities +(always stored in big-endian order in 8-bit mode) by default. These are used, +for example, to link from the start of a subpattern to its alternatives and its +end. The use of 16 bits per offset limits the size of an 8-bit compiled regex +to around 64K, which is big enough for almost everybody. However, I received a +request for an even bigger limit. For this reason, and also to make the code +easier to maintain, the storing and loading of offsets from the compiled code +unit string is now handled by the macros that are defined here. + +The macros are controlled by the value of LINK_SIZE. This defaults to 2, but +values of 3 or 4 are also supported. */ + +/* ------------------- 8-bit support ------------------ */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 + +#if LINK_SIZE == 2 +#define PUT(a,n,d) \ + (a[n] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) & 255)) +#define GET(a,n) \ + (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) +#define MAX_PATTERN_SIZE (1 << 16) + +#elif LINK_SIZE == 3 +#define PUT(a,n,d) \ + (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+2] = (PCRE2_UCHAR)((d) & 255)) +#define GET(a,n) \ + (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2]) +#define MAX_PATTERN_SIZE (1 << 24) + +#elif LINK_SIZE == 4 +#define PUT(a,n,d) \ + (a[n] = (PCRE2_UCHAR)((d) >> 24)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \ + (a[(n)+3] = (PCRE2_UCHAR)((d) & 255)) +#define GET(a,n) \ + (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) +#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ + +#else +#error LINK_SIZE must be 2, 3, or 4 +#endif + + +/* ------------------- 16-bit support ------------------ */ + +#elif PCRE2_CODE_UNIT_WIDTH == 16 + +#if LINK_SIZE == 2 +#undef LINK_SIZE +#define LINK_SIZE 1 +#define PUT(a,n,d) \ + (a[n] = (PCRE2_UCHAR)(d)) +#define GET(a,n) \ + (a[n]) +#define MAX_PATTERN_SIZE (1 << 16) + +#elif LINK_SIZE == 3 || LINK_SIZE == 4 +#undef LINK_SIZE +#define LINK_SIZE 2 +#define PUT(a,n,d) \ + (a[n] = (PCRE2_UCHAR)((d) >> 16)), \ + (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535)) +#define GET(a,n) \ + (unsigned int)(((a)[n] << 16) | (a)[(n)+1]) +#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ + +#else +#error LINK_SIZE must be 2, 3, or 4 +#endif + + +/* ------------------- 32-bit support ------------------ */ + +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#undef LINK_SIZE +#define LINK_SIZE 1 +#define PUT(a,n,d) \ + (a[n] = (d)) +#define GET(a,n) \ + (a[n]) +#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ + +#else +#error Unsupported compiling mode +#endif + + +/* --------------- Other mode-specific macros ----------------- */ + +/* PCRE uses some other (at least) 16-bit quantities that do not change when +the size of offsets changes. There are used for repeat counts and for other +things such as capturing parenthesis numbers in back references. + +Define the number of code units required to hold a 16-bit count/offset, and +macros to load and store such a value. For reasons that I do not understand, +the expression in the 8-bit GET2 macro is treated by gcc as a signed +expression, even when a is declared as unsigned. It seems that any kind of +arithmetic results in a signed value. Hence the cast. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define IMM2_SIZE 2 +#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1]) +#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255 + +#else /* Code units are 16 or 32 bits */ +#define IMM2_SIZE 1 +#define GET2(a,n) a[n] +#define PUT2(a,n,d) a[n] = d +#endif + +/* Other macros that are different for 8-bit mode. The MAX_255 macro checks +whether its argument, which is assumed to be one code unit, is less than 256. +The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK +name must fit in one code unit; currently it is set to 255 or 65535. The +TABLE_GET macro is used to access elements of tables containing exactly 256 +items. Its argument is a code unit. When code points can be greater than 255, a +check is needed before accessing these tables. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MAX_255(c) TRUE +#define MAX_MARK ((1u << 8) - 1) +#define TABLE_GET(c, table, default) ((table)[c]) +#ifdef SUPPORT_UNICODE +#define SUPPORT_WIDE_CHARS +#define CHMAX_255(c) ((c) <= 255u) +#else +#define CHMAX_255(c) TRUE +#endif /* SUPPORT_UNICODE */ + +#else /* Code units are 16 or 32 bits */ +#define CHMAX_255(c) ((c) <= 255u) +#define MAX_255(c) ((c) <= 255u) +#define MAX_MARK ((1u << 16) - 1) +#define SUPPORT_WIDE_CHARS +#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) +#endif + + +/* ----------------- Character-handling macros ----------------- */ + +/* There is a proposed future special "UTF-21" mode, in which only the lowest +21 bits of a 32-bit character are interpreted as UTF, with the remaining 11 +high-order bits available to the application for other uses. In preparation for +the future implementation of this mode, there are macros that load a data item +and, if in this special mode, mask it to 21 bits. These macros all have names +starting with UCHAR21. In all other modes, including the normal 32-bit +library, the macros all have the same simple definitions. When the new mode is +implemented, it is expected that these definitions will be varied appropriately +using #ifdef when compiling the library that supports the special mode. */ + +#define UCHAR21(eptr) (*(eptr)) +#define UCHAR21TEST(eptr) (*(eptr)) +#define UCHAR21INC(eptr) (*(eptr)++) +#define UCHAR21INCTEST(eptr) (*(eptr)++) + +/* When UTF encoding is being used, a character is no longer just a single +byte in 8-bit mode or a single short in 16-bit mode. The macros for character +handling generate simple sequences when used in the basic mode, and more +complicated ones for UTF characters. GETCHARLENTEST and other macros are not +used when UTF is not supported. To make sure they can never even appear when +UTF support is omitted, we don't even define them. */ + +#ifndef SUPPORT_UNICODE + +/* #define MAX_UTF_SINGLE_CU */ +/* #define HAS_EXTRALEN(c) */ +/* #define GET_EXTRALEN(c) */ +/* #define NOT_FIRSTCU(c) */ +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARTEST(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARINCTEST(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; +#define PUTCHAR(c, p) (*p = c, 1) +/* #define GETCHARLENTEST(c, eptr, len) */ +/* #define BACKCHAR(eptr) */ +/* #define FORWARDCHAR(eptr) */ +/* #define FORWARCCHARTEST(eptr,end) */ +/* #define ACROSSCHAR(condition, eptr, action) */ + +#else /* SUPPORT_UNICODE */ + +/* ------------------- 8-bit support ------------------ */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ + +/* The largest UTF code point that can be encoded as a single code unit. */ + +#define MAX_UTF_SINGLE_CU 127 + +/* Tests whether the code point needs extra characters to decode. */ + +#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c) + +/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. +Otherwise it has an undefined behaviour. */ + +#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu]) + +/* Returns TRUE, if the given value is not the first code unit of a UTF +sequence. */ + +#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u) + +/* Get the next UTF-8 character, not advancing the pointer. This is called when +we know we are in UTF-8 mode. */ + +#define GETCHAR(c, eptr) \ + c = *eptr; \ + if (c >= 0xc0u) GETUTF8(c, eptr); + +/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *eptr; \ + if (utf && c >= 0xc0u) GETUTF8(c, eptr); + +/* Get the next UTF-8 character, advancing the pointer. This is called when we +know we are in UTF-8 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ + if (c >= 0xc0u) GETUTF8INC(c, eptr); + +/* Get the next character, testing for UTF-8 mode, and advancing the pointer. +This is called when we don't know if we are in UTF-8 mode. */ + +#define GETCHARINCTEST(c, eptr) \ + c = *eptr++; \ + if (utf && c >= 0xc0u) GETUTF8INC(c, eptr); + +/* Get the next UTF-8 character, not advancing the pointer, incrementing length +if there are extra bytes. This is called when we know we are in UTF-8 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ + if (c >= 0xc0u) GETUTF8LEN(c, eptr, len); + +/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the +pointer, incrementing length if there are extra bytes. This is called when we +do not know if we are in UTF-8 mode. */ + +#define GETCHARLENTEST(c, eptr, len) \ + c = *eptr; \ + if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len); + +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-8 mode - we don't put a test within the macro +because almost all calls are already within a block of UTF-8 only code. */ + +#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr-- + +/* Same as above, just in the other direction. */ +#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++ +#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++ + +/* Same as above, but it allows a fully customizable form. */ +#define ACROSSCHAR(condition, eptr, action) \ + while((condition) && ((*eptr) & 0xc0u) == 0x80u) action + +/* Deposit a character into memory, returning the number of code units. */ + +#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ + PRIV(ord2utf)(c,p) : (*p = c, 1)) + + +/* ------------------- 16-bit support ------------------ */ + +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */ + +/* The largest UTF code point that can be encoded as a single code unit. */ + +#define MAX_UTF_SINGLE_CU 65535 + +/* Tests whether the code point needs extra characters to decode. */ + +#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u) + +/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. +Otherwise it has an undefined behaviour. */ + +#define GET_EXTRALEN(c) 1 + +/* Returns TRUE, if the given value is not the first code unit of a UTF +sequence. */ + +#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u) + +/* Base macro to pick up the low surrogate of a UTF-16 character, not +advancing the pointer. */ + +#define GETUTF16(c, eptr) \ + { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; } + +/* Get the next UTF-16 character, not advancing the pointer. This is called when +we know we are in UTF-16 mode. */ + +#define GETCHAR(c, eptr) \ + c = *eptr; \ + if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); + +/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *eptr; \ + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr); + +/* Base macro to pick up the low surrogate of a UTF-16 character, advancing +the pointer. */ + +#define GETUTF16INC(c, eptr) \ + { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; } + +/* Get the next UTF-16 character, advancing the pointer. This is called when we +know we are in UTF-16 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ + if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); + +/* Get the next character, testing for UTF-16 mode, and advancing the pointer. +This is called when we don't know if we are in UTF-16 mode. */ + +#define GETCHARINCTEST(c, eptr) \ + c = *eptr++; \ + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr); + +/* Base macro to pick up the low surrogate of a UTF-16 character, not +advancing the pointer, incrementing the length. */ + +#define GETUTF16LEN(c, eptr, len) \ + { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; } + +/* Get the next UTF-16 character, not advancing the pointer, incrementing +length if there is a low surrogate. This is called when we know we are in +UTF-16 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ + if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); + +/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the +pointer, incrementing length if there is a low surrogate. This is called when +we do not know if we are in UTF-16 mode. */ + +#define GETCHARLENTEST(c, eptr, len) \ + c = *eptr; \ + if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len); + +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-16 mode - we don't put a test within the +macro because almost all calls are already within a block of UTF-16 only +code. */ + +#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr-- + +/* Same as above, just in the other direction. */ +#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++ +#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++ + +/* Same as above, but it allows a fully customizable form. */ +#define ACROSSCHAR(condition, eptr, action) \ + if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action + +/* Deposit a character into memory, returning the number of code units. */ + +#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ + PRIV(ord2utf)(c,p) : (*p = c, 1)) + + +/* ------------------- 32-bit support ------------------ */ + +#else + +/* These are trivial for the 32-bit library, since all UTF-32 characters fit +into one PCRE2_UCHAR unit. */ + +#define MAX_UTF_SINGLE_CU (0x10ffffu) +#define HAS_EXTRALEN(c) (0) +#define GET_EXTRALEN(c) (0) +#define NOT_FIRSTCU(c) (0) + +/* Get the next UTF-32 character, not advancing the pointer. This is called when +we know we are in UTF-32 mode. */ + +#define GETCHAR(c, eptr) \ + c = *(eptr); + +/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *(eptr); + +/* Get the next UTF-32 character, advancing the pointer. This is called when we +know we are in UTF-32 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *((eptr)++); + +/* Get the next character, testing for UTF-32 mode, and advancing the pointer. +This is called when we don't know if we are in UTF-32 mode. */ + +#define GETCHARINCTEST(c, eptr) \ + c = *((eptr)++); + +/* Get the next UTF-32 character, not advancing the pointer, not incrementing +length (since all UTF-32 is of length 1). This is called when we know we are in +UTF-32 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + GETCHAR(c, eptr) + +/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the +pointer, not incrementing the length (since all UTF-32 is of length 1). +This is called when we do not know if we are in UTF-32 mode. */ + +#define GETCHARLENTEST(c, eptr, len) \ + GETCHARTEST(c, eptr) + +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-32 mode - we don't put a test within the +macro because almost all calls are already within a block of UTF-32 only +code. + +These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */ + +#define BACKCHAR(eptr) do { } while (0) + +/* Same as above, just in the other direction. */ + +#define FORWARDCHAR(eptr) do { } while (0) +#define FORWARDCHARTEST(eptr,end) do { } while (0) + +/* Same as above, but it allows a fully customizable form. */ + +#define ACROSSCHAR(condition, eptr, action) do { } while (0) + +/* Deposit a character into memory, returning the number of code units. */ + +#define PUTCHAR(c, p) (*p = c, 1) + +#endif /* UTF-32 character handling */ +#endif /* SUPPORT_UNICODE */ + + +/* Mode-dependent macros that have the same definition in all modes. */ + +#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8))) +#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8))) +#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE +#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE + + +/* ----------------------- HIDDEN STRUCTURES ----------------------------- */ + +/* NOTE: All these structures *must* start with a pcre2_memctl structure. The +code that uses them is simpler because it assumes this. */ + +/* The real general context structure. At present it holds only data for custom +memory control. */ + +typedef struct pcre2_real_general_context { + pcre2_memctl memctl; +} pcre2_real_general_context; + +/* The real compile context structure */ + +typedef struct pcre2_real_compile_context { + pcre2_memctl memctl; + int (*stack_guard)(uint32_t, void *); + void *stack_guard_data; + const uint8_t *tables; + PCRE2_SIZE max_pattern_length; + PCRE2_SIZE max_pattern_compiled_length; + uint16_t bsr_convention; + uint16_t newline_convention; + uint32_t parens_nest_limit; + uint32_t extra_options; + uint32_t max_varlookbehind; +} pcre2_real_compile_context; + +/* The real match context structure. */ + +typedef struct pcre2_real_match_context { + pcre2_memctl memctl; +#ifdef SUPPORT_JIT + pcre2_jit_callback jit_callback; + void *jit_callback_data; +#endif + int (*callout)(pcre2_callout_block *, void *); + void *callout_data; + int (*substitute_callout)(pcre2_substitute_callout_block *, void *); + void *substitute_callout_data; + PCRE2_SIZE offset_limit; + uint32_t heap_limit; + uint32_t match_limit; + uint32_t depth_limit; +} pcre2_real_match_context; + +/* The real convert context structure. */ + +typedef struct pcre2_real_convert_context { + pcre2_memctl memctl; + uint32_t glob_separator; + uint32_t glob_escape; +} pcre2_real_convert_context; + +/* The real compiled code structure. The type for the blocksize field is +defined specially because it is required in pcre2_serialize_decode() when +copying the size from possibly unaligned memory into a variable of the same +type. Use a macro rather than a typedef to avoid compiler warnings when this +file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the +largest lookbehind that is supported. (OP_REVERSE and OP_VREVERSE in a pattern +have 16-bit arguments in 8-bit and 16-bit modes, so we need no more than a +16-bit field here.) */ + +#undef CODE_BLOCKSIZE_TYPE +#define CODE_BLOCKSIZE_TYPE PCRE2_SIZE + +#undef LOOKBEHIND_MAX +#define LOOKBEHIND_MAX UINT16_MAX + +typedef struct pcre2_real_code { + pcre2_memctl memctl; /* Memory control fields */ + const uint8_t *tables; /* The character tables */ + void *executable_jit; /* Pointer to JIT code */ + uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ + CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */ + uint32_t magic_number; /* Paranoid and endianness check */ + uint32_t compile_options; /* Options passed to pcre2_compile() */ + uint32_t overall_options; /* Options after processing the pattern */ + uint32_t extra_options; /* Taken from compile_context */ + uint32_t flags; /* Various state flags */ + uint32_t limit_heap; /* Limit set in the pattern */ + uint32_t limit_match; /* Limit set in the pattern */ + uint32_t limit_depth; /* Limit set in the pattern */ + uint32_t first_codeunit; /* Starting code unit */ + uint32_t last_codeunit; /* This codeunit must be seen */ + uint16_t bsr_convention; /* What \R matches */ + uint16_t newline_convention; /* What is a newline? */ + uint16_t max_lookbehind; /* Longest lookbehind (characters) */ + uint16_t minlength; /* Minimum length of match */ + uint16_t top_bracket; /* Highest numbered group */ + uint16_t top_backref; /* Highest numbered back reference */ + uint16_t name_entry_size; /* Size (code units) of table entries */ + uint16_t name_count; /* Number of name entries in the table */ +} pcre2_real_code; + +/* The real match data structure. Define ovector as large as it can ever +actually be so that array bound checkers don't grumble. Memory for this +structure is obtained by calling pcre2_match_data_create(), which sets the size +as the offset of ovector plus a pair of elements for each capturable string, so +the size varies from call to call. As the maximum number of capturing +subpatterns is 65535 we must allow for 65536 strings to include the overall +match. (See also the heapframe structure below.) */ + +struct heapframe; /* Forward reference */ + +typedef struct pcre2_real_match_data { + pcre2_memctl memctl; /* Memory control fields */ + const pcre2_real_code *code; /* The pattern used for the match */ + PCRE2_SPTR subject; /* The subject that was matched */ + PCRE2_SPTR mark; /* Pointer to last mark */ + struct heapframe *heapframes; /* Backtracking frames heap memory */ + PCRE2_SIZE heapframes_size; /* Malloc-ed size */ + PCRE2_SIZE subject_length; /* Subject length */ + PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ + PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ + PCRE2_SIZE startchar; /* Offset to starting code unit */ + uint8_t matchedby; /* Type of match (normal, JIT, DFA) */ + uint8_t flags; /* Various flags */ + uint16_t oveccount; /* Number of pairs */ + int rc; /* The return code from the match */ + PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ +} pcre2_real_match_data; + + +/* ----------------------- PRIVATE STRUCTURES ----------------------------- */ + +/* These structures are not needed for pcre2test. */ + +#ifndef PCRE2_PCRE2TEST + +/* Structures for checking for mutual function recursion when scanning compiled +or parsed code. */ + +typedef struct recurse_check { + struct recurse_check *prev; + PCRE2_SPTR group; +} recurse_check; + +typedef struct parsed_recurse_check { + struct parsed_recurse_check *prev; + uint32_t *groupptr; +} parsed_recurse_check; + +/* Structure for building a cache when filling in pattern recursion offsets. */ + +typedef struct recurse_cache { + PCRE2_SPTR group; + int groupnumber; +} recurse_cache; + +/* Structure for maintaining a chain of pointers to the currently incomplete +branches, for testing for left recursion while compiling. */ + +typedef struct branch_chain { + struct branch_chain *outer; + PCRE2_UCHAR *current_branch; +} branch_chain; + +/* Structure for building a list of named groups during the first pass of +compiling. */ + +typedef struct named_group { + PCRE2_SPTR name; /* Points to the name in the pattern */ + uint32_t number; /* Group number */ + uint16_t length; /* Length of the name */ + uint16_t isdup; /* TRUE if a duplicate */ +} named_group; + +/* Structure for passing "static" information around between the functions +doing the compiling, so that they are thread-safe. */ + +typedef struct compile_block { + pcre2_real_compile_context *cx; /* Points to the compile context */ + const uint8_t *lcc; /* Points to lower casing table */ + const uint8_t *fcc; /* Points to case-flipping table */ + const uint8_t *cbits; /* Points to character type table */ + const uint8_t *ctypes; /* Points to table of type maps */ + PCRE2_SPTR start_workspace; /* The start of working space */ + PCRE2_SPTR start_code; /* The start of the compiled code */ + PCRE2_SPTR start_pattern; /* The start of the pattern */ + PCRE2_SPTR end_pattern; /* The end of the pattern */ + PCRE2_UCHAR *name_table; /* The name/number table */ + PCRE2_SIZE workspace_size; /* Size of workspace */ + PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */ + PCRE2_SIZE erroroffset; /* Offset of error in pattern */ + uint16_t names_found; /* Number of entries so far */ + uint16_t name_entry_size; /* Size of each entry */ + uint16_t parens_depth; /* Depth of nested parentheses */ + uint16_t assert_depth; /* Depth of nested assertions */ + named_group *named_groups; /* Points to vector in pre-compile */ + uint32_t named_group_list_size; /* Number of entries in the list */ + uint32_t external_options; /* External (initial) options */ + uint32_t external_flags; /* External flag bits to be set */ + uint32_t bracount; /* Count of capturing parentheses */ + uint32_t lastcapture; /* Last capture encountered */ + uint32_t *parsed_pattern; /* Parsed pattern buffer */ + uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */ + uint32_t *groupinfo; /* Group info vector */ + uint32_t top_backref; /* Maximum back reference */ + uint32_t backref_map; /* Bitmap of low back refs */ + uint32_t nltype; /* Newline type */ + uint32_t nllen; /* Newline string length */ + uint32_t class_range_start; /* Overall class range start */ + uint32_t class_range_end; /* Overall class range end */ + PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ + uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ + uint32_t max_varlookbehind; /* Limit for variable lookbehinds */ + int max_lookbehind; /* Maximum lookbehind encountered (characters) */ + BOOL had_accept; /* (*ACCEPT) encountered */ + BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ + BOOL had_recurse; /* Had a pattern recursion or subroutine call */ + BOOL dupnames; /* Duplicate names exist */ +} compile_block; + +/* Structure for keeping the properties of the in-memory stack used +by the JIT matcher. */ + +typedef struct pcre2_real_jit_stack { + pcre2_memctl memctl; + void* stack; +} pcre2_real_jit_stack; + +/* Structure for items in a linked list that represents an explicit recursive +call within the pattern when running pcre2_dfa_match(). */ + +typedef struct dfa_recursion_info { + struct dfa_recursion_info *prevrec; + PCRE2_SPTR subject_position; + PCRE2_SPTR last_used_ptr; + uint32_t group_num; +} dfa_recursion_info; + +/* Structure for "stack" frames that are used for remembering backtracking +positions during matching. As these are used in a vector, with the ovector item +being extended, the size of the structure must be a multiple of PCRE2_SIZE. The +only way to check this at compile time is to force an error by generating an +array with a negative size. By putting this in a typedef (which is never used), +we don't generate any code when all is well. */ + +typedef struct heapframe { + + /* The first set of fields are variables that have to be preserved over calls + to RRMATCH(), but which do not need to be copied to new frames. */ + + PCRE2_SPTR ecode; /* The current position in the pattern */ + PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ + PCRE2_SIZE length; /* Used for character, string, or code lengths */ + PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ + PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ + uint32_t rdepth; /* Function "recursion" depth within pcre2_match() */ + uint32_t group_frame_type; /* Type information for group frames */ + uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */ + uint8_t return_id; /* Where to go on in internal "return" */ + uint8_t op; /* Processing opcode */ + + /* At this point, the structure is 16-bit aligned. On most architectures + the alignment requirement for a pointer will ensure that the eptr field below + is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer + that is 16-bit aligned. We must therefore ensure that what comes between here + and eptr is an odd multiple of 16 bits so as to get back into 32-bit + alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs + fudges in the other cases. In the 32-bit case the padding comes first so that + the occu field itself is 32-bit aligned. Without the padding, this structure + is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 + PCRE2_UCHAR occu[6]; /* Used for other case code units */ +#elif PCRE2_CODE_UNIT_WIDTH == 16 + PCRE2_UCHAR occu[2]; /* Used for other case code units */ + uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ +#else + uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ + PCRE2_UCHAR occu[1]; /* Used for other case code units */ +#endif + + /* The rest have to be copied from the previous frame whenever a new frame + becomes current. The final field is specified as a large vector so that + runtime array bound checks don't catch references to it. However, for any + specific call to pcre2_match() the memory allocated for each frame structure + allows for exactly the right size ovector for the number of capturing + parentheses. (See also the comment for pcre2_real_match_data above.) */ + + PCRE2_SPTR eptr; /* MUST BE FIRST */ + PCRE2_SPTR start_match; /* Can be adjusted by \K */ + PCRE2_SPTR mark; /* Most recent mark on the success path */ + PCRE2_SPTR recurse_last_used; /* Last character used at time of pattern recursion */ + uint32_t current_recurse; /* Group number of current (deepest) pattern recursion */ + uint32_t capture_last; /* Most recent capture */ + PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */ + PCRE2_SIZE offset_top; /* Offset after highest capture */ + PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ +} heapframe; + +/* This typedef is a check that the size of the heapframe structure is a +multiple of PCRE2_SIZE. See various comments above. */ + +typedef char check_heapframe_size[ + ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; + +/* Structure for computing the alignment of heapframe. */ + +typedef struct heapframe_align { + char unalign; /* Completely unalign the current offset */ + heapframe frame; /* Offset is its alignment */ +} heapframe_align; + +/* This define is the minimum alignment required for a heapframe, in bytes. */ + +#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame) + +/* Structure for passing "static" information around between the functions +doing traditional NFA matching (pcre2_match() and friends). */ + +typedef struct match_block { + pcre2_memctl memctl; /* For general use */ + uint32_t heap_limit; /* As it says */ + uint32_t match_limit; /* As it says */ + uint32_t match_limit_depth; /* As it says */ + uint32_t match_call_count; /* Number of times a new frame is created */ + BOOL hitend; /* Hit the end of the subject at some point */ + BOOL hasthen; /* Pattern contains (*THEN) */ + BOOL allowemptypartial; /* Allow empty hard partial */ + const uint8_t *lcc; /* Points to lower casing table */ + const uint8_t *fcc; /* Points to case-flipping table */ + const uint8_t *ctypes; /* Points to table of type maps */ + PCRE2_SIZE start_offset; /* The start offset value */ + PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */ + uint16_t partial; /* PARTIAL options */ + uint16_t bsr_convention; /* \R interpretation */ + uint16_t name_count; /* Number of names in name table */ + uint16_t name_entry_size; /* Size of entry in names table */ + PCRE2_SPTR name_table; /* Table of group names */ + PCRE2_SPTR start_code; /* For use in pattern recursion */ + PCRE2_SPTR start_subject; /* Start of the subject string */ + PCRE2_SPTR check_subject; /* Where UTF-checked from */ + PCRE2_SPTR end_subject; /* Usable end of the subject string */ + PCRE2_SPTR true_end_subject; /* Actual end of the subject string */ + PCRE2_SPTR end_match_ptr; /* Subject position at end match */ + PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ + PCRE2_SPTR last_used_ptr; /* Latest consulted character */ + PCRE2_SPTR mark; /* Mark pointer to pass back on success */ + PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ + PCRE2_SPTR verb_ecode_ptr; /* For passing back info */ + PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */ + uint32_t verb_current_recurse; /* Current recursion group when (*VERB) happens */ + uint32_t moptions; /* Match options */ + uint32_t poptions; /* Pattern options */ + uint32_t skip_arg_count; /* For counting SKIP_ARGs */ + uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */ + uint32_t nltype; /* Newline type */ + uint32_t nllen; /* Newline string length */ + PCRE2_UCHAR nl[4]; /* Newline string when fixed */ + pcre2_callout_block *cb; /* Points to a callout block */ + void *callout_data; /* To pass back to callouts */ + int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ +} match_block; + +/* A similar structure is used for the same purpose by the DFA matching +functions. */ + +typedef struct dfa_match_block { + pcre2_memctl memctl; /* For general use */ + PCRE2_SPTR start_code; /* Start of the compiled pattern */ + PCRE2_SPTR start_subject ; /* Start of the subject string */ + PCRE2_SPTR end_subject; /* End of subject string */ + PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ + PCRE2_SPTR last_used_ptr; /* Latest consulted character */ + const uint8_t *tables; /* Character tables */ + PCRE2_SIZE start_offset; /* The start offset value */ + uint32_t heap_limit; /* As it says */ + PCRE2_SIZE heap_used; /* As it says */ + uint32_t match_limit; /* As it says */ + uint32_t match_limit_depth; /* As it says */ + uint32_t match_call_count; /* Number of calls of internal function */ + uint32_t moptions; /* Match options */ + uint32_t poptions; /* Pattern options */ + uint32_t nltype; /* Newline type */ + uint32_t nllen; /* Newline string length */ + BOOL allowemptypartial; /* Allow empty hard partial */ + PCRE2_UCHAR nl[4]; /* Newline string when fixed */ + uint16_t bsr_convention; /* \R interpretation */ + pcre2_callout_block *cb; /* Points to a callout block */ + void *callout_data; /* To pass back to callouts */ + int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ + dfa_recursion_info *recursive; /* Linked list of pattern recursion data */ +} dfa_match_block; + +#endif /* PCRE2_PCRE2TEST */ + +/* End of pcre2_intmodedep.h */ diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c new file mode 100644 index 0000000..92f4fb8 --- /dev/null +++ b/src/pcre2_jit_compile.c @@ -0,0 +1,14972 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + +#include "pcre2_internal.h" + +#ifdef SUPPORT_JIT + +/* All-in-one: Since we use the JIT compiler only from here, +we just include it. This way we don't need to touch the build +system files. */ + +#define SLJIT_CONFIG_AUTO 1 +#define SLJIT_CONFIG_STATIC 1 +#define SLJIT_VERBOSE 0 + +#ifdef PCRE2_DEBUG +#define SLJIT_DEBUG 1 +#else +#define SLJIT_DEBUG 0 +#endif + +#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data) +#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data) + +static void * pcre2_jit_malloc(size_t size, void *allocator_data) +{ +pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); +return allocator->malloc(size, allocator->memory_data); +} + +static void pcre2_jit_free(void *ptr, void *allocator_data) +{ +pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); +allocator->free(ptr, allocator->memory_data); +} + +#include "sljit/sljitLir.c" + +#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED +#error Unsupported architecture +#endif + +/* Defines for debugging purposes. */ + +/* 1 - Use unoptimized capturing brackets. + 2 - Enable capture_last_ptr (includes option 1). */ +/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ + +/* 1 - Always have a control head. */ +/* #define DEBUG_FORCE_CONTROL_HEAD 1 */ + +/* Allocate memory for the regex stack on the real machine stack. +Fast, but limited size. */ +#define MACHINE_STACK_SIZE 32768 + +/* Growth rate for stack allocated by the OS. Should be the multiply +of page size. */ +#define STACK_GROWTH_RATE 8192 + +/* Enable to check that the allocation could destroy temporaries. */ +#if defined SLJIT_DEBUG && SLJIT_DEBUG +#define DESTROY_REGISTERS 1 +#endif + +/* +Short summary about the backtracking mechanism empolyed by the jit code generator: + +The code generator follows the recursive nature of the PERL compatible regular +expressions. The basic blocks of regular expressions are condition checkers +whose execute different commands depending on the result of the condition check. +The relationship between the operators can be horizontal (concatenation) and +vertical (sub-expression) (See struct backtrack_common for more details). + + 'ab' - 'a' and 'b' regexps are concatenated + 'a+' - 'a' is the sub-expression of the '+' operator + +The condition checkers are boolean (true/false) checkers. Machine code is generated +for the checker itself and for the actions depending on the result of the checker. +The 'true' case is called as the matching path (expected path), and the other is called as +the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken +branches on the matching path. + + Greedy star operator (*) : + Matching path: match happens. + Backtrack path: match failed. + Non-greedy star operator (*?) : + Matching path: no need to perform a match. + Backtrack path: match is required. + +The following example shows how the code generated for a capturing bracket +with two alternatives. Let A, B, C, D are arbirary regular expressions, and +we have the following regular expression: + + A(B|C)D + +The generated code will be the following: + + A matching path + '(' matching path (pushing arguments to the stack) + B matching path + ')' matching path (pushing arguments to the stack) + D matching path + return with successful match + + D backtrack path + ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") + B backtrack path + C expected path + jump to D matching path + C backtrack path + A backtrack path + + Notice, that the order of backtrack code paths are the opposite of the fast + code paths. In this way the topmost value on the stack is always belong + to the current backtrack code path. The backtrack path must check + whether there is a next alternative. If so, it needs to jump back to + the matching path eventually. Otherwise it needs to clear out its own stack + frame and continue the execution on the backtrack code paths. +*/ + +/* +Saved stack frames: + +Atomic blocks and asserts require reloading the values of private data +when the backtrack mechanism performed. Because of OP_RECURSE, the data +are not necessarly known in compile time, thus we need a dynamic restore +mechanism. + +The stack frames are stored in a chain list, and have the following format: +([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] + +Thus we can restore the private data to a particular point in the stack. +*/ + +typedef struct jit_arguments { + /* Pointers first. */ + struct sljit_stack *stack; + PCRE2_SPTR str; + PCRE2_SPTR begin; + PCRE2_SPTR end; + pcre2_match_data *match_data; + PCRE2_SPTR startchar_ptr; + PCRE2_UCHAR *mark_ptr; + int (*callout)(pcre2_callout_block *, void *); + void *callout_data; + /* Everything else after. */ + sljit_uw offset_limit; + sljit_u32 limit_match; + sljit_u32 oveccount; + sljit_u32 options; +} jit_arguments; + +#define JIT_NUMBER_OF_COMPILE_MODES 3 + +typedef struct executable_functions { + void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; + void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_u32 top_bracket; + sljit_u32 limit_match; +} executable_functions; + +typedef struct jump_list { + struct sljit_jump *jump; + struct jump_list *next; +} jump_list; + +typedef struct stub_list { + struct sljit_jump *start; + struct sljit_label *quit; + struct stub_list *next; +} stub_list; + +enum frame_types { + no_frame = -1, + no_stack = -2 +}; + +enum control_types { + type_mark = 0, + type_then_trap = 1 +}; + +enum early_fail_types { + type_skip = 0, + type_fail = 1, + type_fail_range = 2 +}; + +typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args); + +/* The following structure is the key data type for the recursive +code generator. It is allocated by compile_matchingpath, and contains +the arguments for compile_backtrackingpath. Must be the first member +of its descendants. */ +typedef struct backtrack_common { + /* Backtracking path of an opcode, which falls back + to our opcode, if it cannot resume matching. */ + struct backtrack_common *prev; + /* Backtracks for opcodes without backtracking path. + These opcodes are between 'prev' and the current + opcode, and they never resume the match. */ + jump_list *simple_backtracks; + /* Internal backtracking list for block constructs + which contains other opcodes, such as brackets, + asserts, conditionals, etc. */ + struct backtrack_common *top; + /* Backtracks used internally by the opcode. For component + opcodes, this list is also used by those opcodes without + backtracking path which follows the 'top' backtrack. */ + jump_list *own_backtracks; + /* Opcode pointer. */ + PCRE2_SPTR cc; +} backtrack_common; + +typedef struct assert_backtrack { + backtrack_common common; + jump_list *condfailed; + /* Less than 0 if a frame is not needed. */ + int framesize; + /* Points to our private memory word on the stack. */ + int private_data_ptr; + /* For iterators. */ + struct sljit_label *matchingpath; +} assert_backtrack; + +typedef struct bracket_backtrack { + backtrack_common common; + /* Where to coninue if an alternative is successfully matched. */ + struct sljit_label *alternative_matchingpath; + /* For rmin and rmax iterators. */ + struct sljit_label *recursive_matchingpath; + /* For greedy ? operator. */ + struct sljit_label *zero_matchingpath; + /* Contains the branches of a failed condition. */ + union { + /* Both for OP_COND, OP_SCOND. */ + jump_list *condfailed; + assert_backtrack *assert; + /* For OP_ONCE. Less than 0 if not needed. */ + int framesize; + /* For brackets with >3 alternatives. */ + struct sljit_jump *matching_mov_addr; + } u; + /* Points to our private memory word on the stack. */ + int private_data_ptr; +} bracket_backtrack; + +typedef struct bracketpos_backtrack { + backtrack_common common; + /* Points to our private memory word on the stack. */ + int private_data_ptr; + /* Reverting stack is needed. */ + int framesize; + /* Allocated stack size. */ + int stacksize; +} bracketpos_backtrack; + +typedef struct braminzero_backtrack { + backtrack_common common; + struct sljit_label *matchingpath; +} braminzero_backtrack; + +typedef struct char_iterator_backtrack { + backtrack_common common; + /* Next iteration. */ + struct sljit_label *matchingpath; + union { + jump_list *backtracks; + struct { + unsigned int othercasebit; + PCRE2_UCHAR chr; + BOOL enabled; + } charpos; + } u; +} char_iterator_backtrack; + +typedef struct ref_iterator_backtrack { + backtrack_common common; + /* Next iteration. */ + struct sljit_label *matchingpath; +} ref_iterator_backtrack; + +typedef struct recurse_entry { + struct recurse_entry *next; + /* Contains the function entry label. */ + struct sljit_label *entry_label; + /* Contains the function entry label. */ + struct sljit_label *backtrack_label; + /* Collects the entry calls until the function is not created. */ + jump_list *entry_calls; + /* Collects the backtrack calls until the function is not created. */ + jump_list *backtrack_calls; + /* Points to the starting opcode. */ + sljit_sw start; +} recurse_entry; + +typedef struct recurse_backtrack { + backtrack_common common; + /* Return to the matching path. */ + struct sljit_label *matchingpath; + /* Recursive pattern. */ + recurse_entry *entry; + /* Pattern is inlined. */ + BOOL inlined_pattern; +} recurse_backtrack; + +typedef struct vreverse_backtrack { + backtrack_common common; + /* Return to the matching path. */ + struct sljit_label *matchingpath; +} vreverse_backtrack; + +#define OP_THEN_TRAP OP_TABLE_LENGTH + +typedef struct then_trap_backtrack { + backtrack_common common; + /* If then_trap is not NULL, this structure contains the real + then_trap for the backtracking path. */ + struct then_trap_backtrack *then_trap; + /* Points to the starting opcode. */ + sljit_sw start; + /* Exit point for the then opcodes of this alternative. */ + jump_list *quit; + /* Frame size of the current alternative. */ + int framesize; +} then_trap_backtrack; + +#define MAX_N_CHARS 12 +#define MAX_DIFF_CHARS 5 + +typedef struct fast_forward_char_data { + /* Number of characters in the chars array, 255 for any character. */ + sljit_u8 count; + /* Number of last UTF-8 characters in the chars array. */ + sljit_u8 last_count; + /* Available characters in the current position. */ + PCRE2_UCHAR chars[MAX_DIFF_CHARS]; +} fast_forward_char_data; + +#define MAX_CLASS_RANGE_SIZE 4 +#define MAX_CLASS_CHARS_SIZE 3 + +typedef struct compiler_common { + /* The sljit ceneric compiler. */ + struct sljit_compiler *compiler; + /* Compiled regular expression. */ + pcre2_real_code *re; + /* First byte code. */ + PCRE2_SPTR start; + /* Maps private data offset to each opcode. */ + sljit_s32 *private_data_ptrs; + /* Chain list of read-only data ptrs. */ + void *read_only_data_head; + /* Tells whether the capturing bracket is optimized. */ + sljit_u8 *optimized_cbracket; + /* Tells whether the starting offset is a target of then. */ + sljit_u8 *then_offsets; + /* Current position where a THEN must jump. */ + then_trap_backtrack *then_trap; + /* Starting offset of private data for capturing brackets. */ + sljit_s32 cbra_ptr; + /* Output vector starting point. Must be divisible by 2. */ + sljit_s32 ovector_start; + /* Points to the starting character of the current match. */ + sljit_s32 start_ptr; + /* Last known position of the requested byte. */ + sljit_s32 req_char_ptr; + /* Head of the last recursion. */ + sljit_s32 recursive_head_ptr; + /* First inspected character for partial matching. + (Needed for avoiding zero length partial matches.) */ + sljit_s32 start_used_ptr; + /* Starting pointer for partial soft matches. */ + sljit_s32 hit_start; + /* Pointer of the match end position. */ + sljit_s32 match_end_ptr; + /* Points to the marked string. */ + sljit_s32 mark_ptr; + /* Head of the recursive control verb management chain. + Each item must have a previous offset and type + (see control_types) values. See do_search_mark. */ + sljit_s32 control_head_ptr; + /* Points to the last matched capture block index. */ + sljit_s32 capture_last_ptr; + /* Fast forward skipping byte code pointer. */ + PCRE2_SPTR fast_forward_bc_ptr; + /* Locals used by fast fail optimization. */ + sljit_s32 early_fail_start_ptr; + sljit_s32 early_fail_end_ptr; + /* Variables used by recursive call generator. */ + sljit_s32 recurse_bitset_size; + uint8_t *recurse_bitset; + + /* Flipped and lower case tables. */ + const sljit_u8 *fcc; + sljit_sw lcc; + /* Mode can be PCRE2_JIT_COMPLETE and others. */ + int mode; + /* TRUE, when empty match is accepted for partial matching. */ + BOOL allow_empty_partial; + /* TRUE, when minlength is greater than 0. */ + BOOL might_be_empty; + /* \K is found in the pattern. */ + BOOL has_set_som; + /* (*SKIP:arg) is found in the pattern. */ + BOOL has_skip_arg; + /* (*THEN) is found in the pattern. */ + BOOL has_then; + /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ + BOOL has_skip_in_assert_back; + /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */ + BOOL local_quit_available; + /* Currently in a positive assertion. */ + BOOL in_positive_assertion; + /* Newline control. */ + int nltype; + sljit_u32 nlmax; + sljit_u32 nlmin; + int newline; + int bsr_nltype; + sljit_u32 bsr_nlmax; + sljit_u32 bsr_nlmin; + /* Dollar endonly. */ + int endonly; + /* Tables. */ + sljit_sw ctypes; + /* Named capturing brackets. */ + PCRE2_SPTR name_table; + sljit_sw name_count; + sljit_sw name_entry_size; + + /* Labels and jump lists. */ + struct sljit_label *partialmatchlabel; + struct sljit_label *quit_label; + struct sljit_label *abort_label; + struct sljit_label *accept_label; + struct sljit_label *ff_newline_shortcut; + stub_list *stubs; + recurse_entry *entries; + recurse_entry *currententry; + jump_list *partialmatch; + jump_list *quit; + jump_list *positive_assertion_quit; + jump_list *abort; + jump_list *failed_match; + jump_list *accept; + jump_list *calllimit; + jump_list *stackalloc; + jump_list *revertframes; + jump_list *wordboundary; + jump_list *ucp_wordboundary; + jump_list *anynewline; + jump_list *hspace; + jump_list *vspace; + jump_list *casefulcmp; + jump_list *caselesscmp; + jump_list *reset_match; + /* Same as reset_match, but resets the STR_PTR as well. */ + jump_list *restart_match; + BOOL unset_backref; + BOOL alt_circumflex; +#ifdef SUPPORT_UNICODE + BOOL utf; + BOOL invalid_utf; + BOOL ucp; + /* Points to saving area for iref. */ + sljit_s32 iref_ptr; + jump_list *getucd; + jump_list *getucdtype; +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump_list *utfreadchar; + jump_list *utfreadtype8; + jump_list *utfpeakcharback; +#endif +#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 + jump_list *utfreadchar_invalid; + jump_list *utfreadnewline_invalid; + jump_list *utfmoveback_invalid; + jump_list *utfpeakcharback_invalid; +#endif +#endif /* SUPPORT_UNICODE */ +} compiler_common; + +/* For byte_sequence_compare. */ + +typedef struct compare_context { + int length; + int sourcereg; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + int ucharptr; + union { + sljit_s32 asint; + sljit_u16 asushort; +#if PCRE2_CODE_UNIT_WIDTH == 8 + sljit_u8 asbyte; + sljit_u8 asuchars[4]; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + sljit_u16 asuchars[2]; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + sljit_u32 asuchars[1]; +#endif + } c; + union { + sljit_s32 asint; + sljit_u16 asushort; +#if PCRE2_CODE_UNIT_WIDTH == 8 + sljit_u8 asbyte; + sljit_u8 asuchars[4]; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + sljit_u16 asuchars[2]; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + sljit_u32 asuchars[1]; +#endif + } oc; +#endif +} compare_context; + +/* Undefine sljit macros. */ +#undef CMP + +/* Used for accessing the elements of the stack. */ +#define STACK(i) ((i) * SSIZE_OF(sw)) + +#ifdef SLJIT_PREF_SHIFT_REG +#if SLJIT_PREF_SHIFT_REG == SLJIT_R2 +/* Nothing. */ +#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3 +#define SHIFT_REG_IS_R3 +#else +#error "Unsupported shift register" +#endif +#endif + +#define TMP1 SLJIT_R0 +#ifdef SHIFT_REG_IS_R3 +#define TMP2 SLJIT_R3 +#define TMP3 SLJIT_R2 +#else +#define TMP2 SLJIT_R2 +#define TMP3 SLJIT_R3 +#endif +#define STR_PTR SLJIT_R1 +#define STR_END SLJIT_S0 +#define STACK_TOP SLJIT_S1 +#define STACK_LIMIT SLJIT_S2 +#define COUNT_MATCH SLJIT_S3 +#define ARGUMENTS SLJIT_S4 +#define RETURN_ADDR SLJIT_R4 + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define HAS_VIRTUAL_REGISTERS 1 +#else +#define HAS_VIRTUAL_REGISTERS 0 +#endif + +/* Local space layout. */ +/* These two locals can be used by the current opcode. */ +#define LOCALS0 (0 * sizeof(sljit_sw)) +#define LOCALS1 (1 * sizeof(sljit_sw)) +/* Two local variables for possessive quantifiers (char1 cannot use them). */ +#define POSSESSIVE0 (2 * sizeof(sljit_sw)) +#define POSSESSIVE1 (3 * sizeof(sljit_sw)) +/* Max limit of recursions. */ +#define LIMIT_MATCH (4 * sizeof(sljit_sw)) +/* The output vector is stored on the stack, and contains pointers +to characters. The vector data is divided into two groups: the first +group contains the start / end character pointers, and the second is +the start pointers when the end of the capturing group has not yet reached. */ +#define OVECTOR_START (common->ovector_start) +#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw)) +#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MOV_UCHAR SLJIT_MOV_U8 +#define IN_UCHARS(x) (x) +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MOV_UCHAR SLJIT_MOV_U16 +#define UCHAR_SHIFT (1) +#define IN_UCHARS(x) ((x) * 2) +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define MOV_UCHAR SLJIT_MOV_U32 +#define UCHAR_SHIFT (2) +#define IN_UCHARS(x) ((x) * 4) +#else +#error Unsupported compiling mode +#endif + +/* Shortcuts. */ +#define DEFINE_COMPILER \ + struct sljit_compiler *compiler = common->compiler +#define OP1(op, dst, dstw, src, srcw) \ + sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) +#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ + sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP2U(op, src1, src1w, src2, src2w) \ + sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w)) +#define OP_SRC(op, src, srcw) \ + sljit_emit_op_src(compiler, (op), (src), (srcw)) +#define LABEL() \ + sljit_emit_label(compiler) +#define JUMP(type) \ + sljit_emit_jump(compiler, (type)) +#define JUMPTO(type, label) \ + sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) +#define JUMPHERE(jump) \ + sljit_set_label((jump), sljit_emit_label(compiler)) +#define SET_LABEL(jump, label) \ + sljit_set_label((jump), (label)) +#define CMP(type, src1, src1w, src2, src2w) \ + sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) +#define CMPTO(type, src1, src1w, src2, src2w, label) \ + sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) +#define OP_FLAGS(op, dst, dstw, type) \ + sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type)) +#define SELECT(type, dst_reg, src1, src1w, src2_reg) \ + sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg)) +#define GET_LOCAL_BASE(dst, dstw, offset) \ + sljit_get_local_base(compiler, (dst), (dstw), (offset)) + +#define READ_CHAR_MAX 0x7fffffff + +#define INVALID_UTF_CHAR -1 +#define UNASSIGNED_UTF_CHAR 888 + +#if defined SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] <= 0x7f) \ + c = *ptr++; \ + else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \ + { \ + c = ptr[1] - 0x80; \ + \ + if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \ + { \ + c |= (ptr[0] - 0xc0) << 6; \ + ptr += 2; \ + } \ + else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \ + { \ + c = c << 6 | (ptr[2] - 0x80); \ + \ + if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \ + { \ + c |= (ptr[0] - 0xe0) << 12; \ + ptr += 3; \ + \ + if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \ + { \ + invalid_action; \ + } \ + } \ + else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \ + { \ + c = c << 6 | (ptr[3] - 0x80); \ + \ + if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \ + { \ + c |= (ptr[0] - 0xf0) << 18; \ + ptr += 4; \ + \ + if (c >= 0x110000 || c < 0x10000) \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (c <= 0x7f) \ + ptr--; \ + else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \ + { \ + c -= 0x80; \ + \ + if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \ + { \ + c |= (ptr[-2] - 0xc0) << 6; \ + ptr -= 2; \ + } \ + else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \ + { \ + c = c << 6 | (ptr[-2] - 0x80); \ + \ + if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \ + { \ + c |= (ptr[-3] - 0xe0) << 12; \ + ptr -= 3; \ + \ + if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \ + { \ + invalid_action; \ + } \ + } \ + else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \ + { \ + c = c << 6 | (ptr[-3] - 0x80); \ + \ + if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \ + { \ + c |= (ptr[-4] - 0xf0) << 18; \ + ptr -= 4; \ + \ + if (c >= 0x110000 || c < 0x10000) \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#elif PCRE2_CODE_UNIT_WIDTH == 16 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \ + c = *ptr++; \ + else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \ + { \ + c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \ + ptr += 2; \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (c < 0xd800 || c >= 0xe000) \ + ptr--; \ + else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \ + { \ + c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \ + ptr -= 2; \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + + +#elif PCRE2_CODE_UNIT_WIDTH == 32 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \ + c = *ptr++; \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \ + ptr--; \ + else \ + { \ + invalid_action; \ + } \ + } + +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR bracketend(PCRE2_SPTR cc) +{ +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +do cc += GET(cc, 1); while (*cc == OP_ALT); +SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); +cc += 1 + LINK_SIZE; +return cc; +} + +static int no_alternatives(PCRE2_SPTR cc) +{ +int count = 0; +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +do + { + cc += GET(cc, 1); + count++; + } +while (*cc == OP_ALT); +SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); +return count; +} + +static BOOL find_vreverse(PCRE2_SPTR cc) +{ + SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA); + + do + { + if (cc[1 + LINK_SIZE] == OP_VREVERSE) + return TRUE; + cc += GET(cc, 1); + } + while (*cc == OP_ALT); + + return FALSE; +} + +/* Functions whose might need modification for all new supported opcodes: + next_opcode + check_opcode_types + set_private_data_ptrs + get_framesize + init_frame + get_recurse_data_length + copy_recurse_data + compile_matchingpath + compile_backtrackingpath +*/ + +static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc) +{ +SLJIT_UNUSED_ARG(common); +switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + case OP_CRPOSRANGE: + case OP_CLASS: + case OP_NCLASS: + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + case OP_RECURSE: + case OP_CALLOUT: + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_REVERSE: + case OP_VREVERSE: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + case OP_COND: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + case OP_SCOND: + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FALSE: + case OP_TRUE: + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + case OP_PRUNE: + case OP_SKIP: + case OP_THEN: + case OP_COMMIT: + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + case OP_CLOSE: + case OP_SKIPZERO: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + return cc + PRIV(OP_lengths)[*cc]; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + cc += PRIV(OP_lengths)[*cc]; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + return cc; + + /* Special cases. */ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + return cc + PRIV(OP_lengths)[*cc] - 1; + + case OP_ANYBYTE: +#ifdef SUPPORT_UNICODE + if (common->utf) return NULL; +#endif + return cc + 1; + + case OP_CALLOUT_STR: + return cc + GET(cc, 1 + 2*LINK_SIZE); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + return cc + GET(cc, 1); +#endif + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + return cc + 1 + 2 + cc[1]; + + default: + SLJIT_UNREACHABLE(); + return NULL; + } +} + +static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend) +{ +int count; +PCRE2_SPTR slot; +PCRE2_SPTR assert_back_end = cc - 1; +PCRE2_SPTR assert_na_end = cc - 1; + +/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ +while (cc < ccend) + { + switch(*cc) + { + case OP_SET_SOM: + common->has_set_som = TRUE; + common->might_be_empty = TRUE; + cc += 1; + break; + + case OP_REFI: +#ifdef SUPPORT_UNICODE + if (common->iref_ptr == 0) + { + common->iref_ptr = common->ovector_start; + common->ovector_start += 3 * sizeof(sljit_sw); + } +#endif /* SUPPORT_UNICODE */ + /* Fall through. */ + case OP_REF: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + slot = bracketend(cc); + if (slot > assert_na_end) + assert_na_end = slot; + cc += 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + case OP_SCOND: + /* Only AUTO_CALLOUT can insert this opcode. We do + not intend to support this case. */ + if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR) + return FALSE; + cc += 1 + LINK_SIZE; + break; + + case OP_CREF: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + + case OP_DNREF: + case OP_DNREFI: + case OP_DNCREF: + count = GET2(cc, 1 + IMM2_SIZE); + slot = common->name_table + GET2(cc, 1) * common->name_entry_size; + while (count-- > 0) + { + common->optimized_cbracket[GET2(slot, 0)] = 0; + slot += common->name_entry_size; + } + cc += 1 + 2 * IMM2_SIZE; + break; + + case OP_RECURSE: + /* Set its value only once. */ + if (common->recursive_head_ptr == 0) + { + common->recursive_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + LINK_SIZE; + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + if (common->capture_last_ptr == 0) + { + common->capture_last_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); + break; + + case OP_ASSERTBACK: + slot = bracketend(cc); + if (slot > assert_back_end) + assert_back_end = slot; + cc += 1 + LINK_SIZE; + break; + + case OP_THEN_ARG: + common->has_then = TRUE; + common->control_head_ptr = 1; + /* Fall through. */ + + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + if (cc < assert_na_end) + return FALSE; + /* Fall through */ + case OP_MARK: + if (common->mark_ptr == 0) + { + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + common->has_then = TRUE; + common->control_head_ptr = 1; + cc += 1; + break; + + case OP_SKIP: + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; + cc += 1; + break; + + case OP_SKIP_ARG: + common->control_head_ptr = 1; + common->has_skip_arg = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_COMMIT: + case OP_ASSERT_ACCEPT: + if (cc < assert_na_end) + return FALSE; + cc++; + break; + + default: + cc = next_opcode(common, cc); + if (cc == NULL) + return FALSE; + break; + } + } +return TRUE; +} + +#define EARLY_FAIL_ENHANCE_MAX (3 + 3) + +/* + Start represent the number of allowed early fail enhancements + + The 0-2 values has a special meaning: + 0 - skip is allowed for all iterators + 1 - fail is allowed for all iterators + 2 - fail is allowed for greedy iterators + 3 - only ranged early fail is allowed + >3 - (start - 3) number of remaining ranged early fails allowed + +return: the updated value of start +*/ +static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, + int *private_data_start, sljit_s32 depth, int start) +{ +PCRE2_SPTR begin = cc; +PCRE2_SPTR next_alt; +PCRE2_SPTR end; +PCRE2_SPTR accelerated_start; +int result = 0; +int count, prev_count; + +SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA); +SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); +SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX); + +next_alt = cc + GET(cc, 1); +if (*next_alt == OP_ALT && start < 1) + start = 1; + +do + { + count = start; + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + + while (TRUE) + { + accelerated_start = NULL; + + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + /* Zero width assertions. */ + cc++; + continue; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + if (count < 1) + count = 1; + cc++; + continue; + + case OP_ANYNL: + case OP_EXTUNI: + if (count < 3) + count = 3; + cc++; + continue; + + case OP_NOTPROP: + case OP_PROP: + if (count < 1) + count = 1; + cc += 1 + 2; + continue; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + if (count < 1) + count = 1; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_TYPEMINSTAR: + case OP_TYPEMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_TYPESTAR: + case OP_TYPEPLUS: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + /* The type or prop opcode is skipped in the next iteration. */ + cc += 1; + + if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI) + { + accelerated_start = cc - 1; + break; + } + + if (count < 3) + count = 3; + continue; + + case OP_TYPEEXACT: + if (count < 1) + count = 1; + cc += 1 + IMM2_SIZE; + continue; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + /* The type or prop opcode is skipped in the next iteration. */ + if (count < 3) + count = 3; + cc += 1; + continue; + + case OP_MINSTAR: + case OP_MINPLUS: + case OP_MINSTARI: + case OP_MINPLUSI: + case OP_NOTMINSTAR: + case OP_NOTMINPLUS: + case OP_NOTMINSTARI: + case OP_NOTMINPLUSI: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_STAR: + case OP_PLUS: + case OP_POSSTAR: + case OP_POSPLUS: + + case OP_STARI: + case OP_PLUSI: + case OP_POSSTARI: + case OP_POSPLUSI: + + case OP_NOTSTAR: + case OP_NOTPLUS: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + + case OP_NOTSTARI: + case OP_NOTPLUSI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + accelerated_start = cc; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_EXACT: + if (count < 1) + count = 1; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSUPTOI: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSUPTO: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSUPTOI: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + if (count < 3) + count = 3; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + accelerated_start = cc; + cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); +#else + accelerated_start = cc; + cc += (1 + (32 / sizeof(PCRE2_UCHAR))); +#endif + + switch (*cc) + { + case OP_CRMINSTAR: + case OP_CRMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_CRSTAR: + case OP_CRPLUS: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) + { + /* Exact repeat. */ + cc += 1 + 2 * IMM2_SIZE; + if (count < 1) + count = 1; + continue; + } + + cc += 2 * IMM2_SIZE; + /* Fall through */ + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + if (count < 3) + count = 3; + continue; + + default: + /* No repeat. */ + if (count < 1) + count = 1; + continue; + } + break; + + case OP_BRA: + case OP_CBRA: + prev_count = count; + if (count < 1) + count = 1; + + if (depth >= 4) + break; + + if (count < 3 && cc[GET(cc, 1)] == OP_ALT) + count = 3; + + end = bracketend(cc); + if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) + break; + + prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count); + + if (prev_count > count) + count = prev_count; + + if (PRIVATE_DATA(cc) != 0) + common->private_data_ptrs[begin - common->start] = 1; + + if (count < EARLY_FAIL_ENHANCE_MAX) + { + cc = end; + continue; + } + break; + + case OP_KET: + SLJIT_ASSERT(PRIVATE_DATA(cc) == 0); + if (cc >= next_alt) + break; + cc += 1 + LINK_SIZE; + continue; + } + + if (accelerated_start == NULL) + break; + + if (count == 0) + { + common->fast_forward_bc_ptr = accelerated_start; + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; + *private_data_start += sizeof(sljit_sw); + count = 4; + } + else if (count < 3) + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail; + + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; + + *private_data_start += sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; + + count = 4; + } + else + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range; + + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; + + *private_data_start += 2 * sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; + + count++; + } + + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; + + if (count >= EARLY_FAIL_ENHANCE_MAX) + break; + } + + if (*cc != OP_ALT && *cc != OP_KET) + result = EARLY_FAIL_ENHANCE_MAX; + else if (result < count) + result = count; + + cc = next_alt; + next_alt = cc + GET(cc, 1); + } +while (*cc == OP_ALT); + +return result; +} + +static int get_class_iterator_size(PCRE2_SPTR cc) +{ +sljit_u32 min; +sljit_u32 max; +switch(*cc) + { + case OP_CRSTAR: + case OP_CRPLUS: + return 2; + + case OP_CRMINSTAR: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + return 1; + + case OP_CRRANGE: + case OP_CRMINRANGE: + min = GET2(cc, 1); + max = GET2(cc, 1 + IMM2_SIZE); + if (max == 0) + return (*cc == OP_CRRANGE) ? 2 : 1; + max -= min; + if (max > 2) + max = 2; + return max; + + default: + return 0; + } +} + +static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin) +{ +PCRE2_SPTR end = bracketend(begin); +PCRE2_SPTR next; +PCRE2_SPTR next_end; +PCRE2_SPTR max_end; +PCRE2_UCHAR type; +sljit_sw length = end - begin; +sljit_s32 min, max, i; + +/* Detect fixed iterations first. */ +if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) + return FALSE; + +/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ + * Skip the check of the second part. */ +if (PRIVATE_DATA(end - LINK_SIZE) != 0) + return TRUE; + +next = end; +min = 1; +while (1) + { + if (*next != *begin) + break; + next_end = bracketend(next); + if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) + break; + next = next_end; + min++; + } + +if (min == 2) + return FALSE; + +max = 0; +max_end = next; +if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) + { + type = *next; + while (1) + { + if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) + break; + next_end = bracketend(next + 2 + LINK_SIZE); + if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) + break; + next = next_end; + max++; + } + + if (next[0] == type && next[1] == *begin && max >= 1) + { + next_end = bracketend(next + 1); + if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) + { + for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) + if (*next_end != OP_KET) + break; + + if (i == max) + { + common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; + /* +2 the original and the last. */ + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; + if (min == 1) + return TRUE; + min--; + max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); + } + } + } + } + +if (min >= 3) + { + common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; + common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; + common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; + return TRUE; + } + +return FALSE; +} + +#define CASE_ITERATOR_PRIVATE_DATA_1 \ + case OP_MINSTAR: \ + case OP_MINPLUS: \ + case OP_QUERY: \ + case OP_MINQUERY: \ + case OP_MINSTARI: \ + case OP_MINPLUSI: \ + case OP_QUERYI: \ + case OP_MINQUERYI: \ + case OP_NOTMINSTAR: \ + case OP_NOTMINPLUS: \ + case OP_NOTQUERY: \ + case OP_NOTMINQUERY: \ + case OP_NOTMINSTARI: \ + case OP_NOTMINPLUSI: \ + case OP_NOTQUERYI: \ + case OP_NOTMINQUERYI: + +#define CASE_ITERATOR_PRIVATE_DATA_2A \ + case OP_STAR: \ + case OP_PLUS: \ + case OP_STARI: \ + case OP_PLUSI: \ + case OP_NOTSTAR: \ + case OP_NOTPLUS: \ + case OP_NOTSTARI: \ + case OP_NOTPLUSI: + +#define CASE_ITERATOR_PRIVATE_DATA_2B \ + case OP_UPTO: \ + case OP_MINUPTO: \ + case OP_UPTOI: \ + case OP_MINUPTOI: \ + case OP_NOTUPTO: \ + case OP_NOTMINUPTO: \ + case OP_NOTUPTOI: \ + case OP_NOTMINUPTOI: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ + case OP_TYPEMINSTAR: \ + case OP_TYPEMINPLUS: \ + case OP_TYPEQUERY: \ + case OP_TYPEMINQUERY: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ + case OP_TYPESTAR: \ + case OP_TYPEPLUS: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ + case OP_TYPEUPTO: \ + case OP_TYPEMINUPTO: + +static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend) +{ +PCRE2_SPTR cc = common->start; +PCRE2_SPTR alternative; +PCRE2_SPTR end = NULL; +int private_data_ptr = *private_data_start; +int space, size, bracketlen; +BOOL repeat_check = TRUE; + +while (cc < ccend) + { + space = 0; + size = 0; + bracketlen = 0; + if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) + break; + + /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */ + if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) + { + if (detect_repeat(common, cc)) + { + /* These brackets are converted to repeats, so no global + based single character repeat is allowed. */ + if (cc >= end) + end = bracketend(cc); + } + } + repeat_check = TRUE; + + switch(*cc) + { + case OP_KET: + if (common->private_data_ptrs[cc + 1 - common->start] != 0) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + cc += common->private_data_ptrs[cc + 1 - common->start]; + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE; + break; + + case OP_ASSERTBACK_NA: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + + if (find_vreverse(cc)) + { + common->private_data_ptrs[cc + 1 - common->start] = 1; + private_data_ptr += sizeof(sljit_sw); + } + + bracketlen = 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + common->private_data_ptrs[cc - common->start] = 0; + alternative = cc + GET(cc, 1); + if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + } + bracketlen = 1 + LINK_SIZE; + break; + + case OP_BRA: + bracketlen = 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + size = 1; + repeat_check = FALSE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + size = -2; + space = 1; + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + size = -2; + space = 2; + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + size = -(2 + IMM2_SIZE); + space = 2; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + size = 1; + space = 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + size = 1; + if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) + space = 2; + break; + + case OP_TYPEUPTO: + size = 1 + IMM2_SIZE; + if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) + space = 2; + break; + + case OP_TYPEMINUPTO: + size = 1 + IMM2_SIZE; + space = 2; + break; + + case OP_CLASS: + case OP_NCLASS: + size = 1 + 32 / sizeof(PCRE2_UCHAR); + space = get_class_iterator_size(cc + size); + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + size = GET(cc, 1); + space = get_class_iterator_size(cc + size); + break; +#endif + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + + /* Character iterators, which are not inside a repeated bracket, + gets a private slot instead of allocating it on the stack. */ + if (space > 0 && cc >= end) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw) * space; + } + + if (size != 0) + { + if (size < 0) + { + cc += -size; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + } + else + cc += size; + } + + if (bracketlen > 0) + { + if (cc >= end) + { + end = bracketend(cc); + if (end[-1 - LINK_SIZE] == OP_KET) + end = NULL; + } + cc += bracketlen; + } + } +*private_data_start = private_data_ptr; +} + +/* Returns with a frame_types (always < 0) if no need for frame. */ +static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head) +{ +int length = 0; +int possessive = 0; +BOOL stack_restore = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +*needs_control_head = TRUE; +#else +*needs_control_head = FALSE; +#endif + +if (ccend == NULL) + { + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) + { + possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; + /* This is correct regardless of common->capture_last_ptr. */ + capture_last_found = TRUE; + } + cc = next_opcode(common, cc); + } + +SLJIT_ASSERT(cc != NULL); +while (cc < ccend) + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + stack_restore = TRUE; + if (!setsom_found) + { + length += 2; + setsom_found = TRUE; + } + cc += 1; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + stack_restore = TRUE; + if (!setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + stack_restore = TRUE; + if (common->has_set_som && !setsom_found) + { + length += 2; + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + stack_restore = TRUE; + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } + length += 3; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_THEN: + stack_restore = TRUE; + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc ++; + break; + + default: + stack_restore = TRUE; + /* Fall through. */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + + case OP_CALLOUT: + case OP_CALLOUT_STR: + + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + +/* Possessive quantifiers can use a special case. */ +if (SLJIT_UNLIKELY(possessive == length)) + return stack_restore ? no_frame : no_stack; + +if (length > 0) + return length + 1; +return stack_restore ? no_frame : no_stack; +} + +static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop) +{ +DEFINE_COMPILER; +BOOL setsom_found = FALSE; +BOOL setmark_found = FALSE; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; +int offset; + +/* >= 1 + shortest item size (2) */ +SLJIT_UNUSED_ARG(stacktop); +SLJIT_ASSERT(stackpos >= stacktop + 2); + +stackpos = STACK(stackpos); +if (ccend == NULL) + { + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS) + cc = next_opcode(common, cc); + } + +SLJIT_ASSERT(cc != NULL); +while (cc < ccend) + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + if (!setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + setsom_found = TRUE; + } + cc += 1; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + setmark_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + if (common->has_set_som && !setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + capture_last_found = TRUE; + } + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= SSIZE_OF(sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); + stackpos -= SSIZE_OF(sw); + + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); +SLJIT_ASSERT(stackpos == STACK(stacktop)); +} + +#define RECURSE_TMP_REG_COUNT 3 + +typedef struct delayed_mem_copy_status { + struct sljit_compiler *compiler; + int store_bases[RECURSE_TMP_REG_COUNT]; + int store_offsets[RECURSE_TMP_REG_COUNT]; + int tmp_regs[RECURSE_TMP_REG_COUNT]; + int saved_tmp_regs[RECURSE_TMP_REG_COUNT]; + int next_tmp_reg; +} delayed_mem_copy_status; + +static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common) +{ +int i; + +for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) + { + SLJIT_ASSERT(status->tmp_regs[i] >= 0); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); + + status->store_bases[i] = -1; + } +status->next_tmp_reg = 0; +status->compiler = common->compiler; +} + +static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset, + int store_base, sljit_sw store_offset) +{ +struct sljit_compiler *compiler = status->compiler; +int next_tmp_reg = status->next_tmp_reg; +int tmp_reg = status->tmp_regs[next_tmp_reg]; + +SLJIT_ASSERT(load_base > 0 && store_base > 0); + +if (status->store_bases[next_tmp_reg] == -1) + { + /* Preserve virtual registers. */ + if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0) + OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0); + } +else + OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); + +OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset); +status->store_bases[next_tmp_reg] = store_base; +status->store_offsets[next_tmp_reg] = store_offset; + +status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT; +} + +static void delayed_mem_copy_finish(delayed_mem_copy_status *status) +{ +struct sljit_compiler *compiler = status->compiler; +int next_tmp_reg = status->next_tmp_reg; +int tmp_reg, saved_tmp_reg, i; + +for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) + { + if (status->store_bases[next_tmp_reg] != -1) + { + tmp_reg = status->tmp_regs[next_tmp_reg]; + saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg]; + + OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); + + /* Restore virtual registers. */ + if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0) + OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0); + } + + next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT; + } +} + +#undef RECURSE_TMP_REG_COUNT + +static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) +{ +uint8_t *byte; +uint8_t mask; + +SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); + +bit_index >>= SLJIT_WORD_SHIFT; + +SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size); + +mask = 1 << (bit_index & 0x7); +byte = common->recurse_bitset + (bit_index >> 3); + +if (*byte & mask) + return FALSE; + +*byte |= mask; +return TRUE; +} + +enum get_recurse_flags { + recurse_flag_quit_found = (1 << 0), + recurse_flag_accept_found = (1 << 1), + recurse_flag_setsom_found = (1 << 2), + recurse_flag_setmark_found = (1 << 3), + recurse_flag_control_head_found = (1 << 4), +}; + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags) +{ +int length = 1; +int size, offset; +PCRE2_SPTR alternative; +uint32_t recurse_flags = 0; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +recurse_flags |= recurse_flag_control_head_found; +#endif + +/* Calculate the sum of the private machine words. */ +while (cc < ccend) + { + size = 0; + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + recurse_flags |= recurse_flag_setsom_found; + cc += 1; + break; + + case OP_RECURSE: + if (common->has_set_som) + recurse_flags |= recurse_flag_setsom_found; + if (common->mark_ptr != 0) + recurse_flags |= recurse_flag_setmark_found; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; + cc += 1 + LINK_SIZE; + break; + + case OP_KET: + offset = PRIVATE_DATA(cc); + if (offset != 0) + { + if (recurse_check_bit(common, offset)) + length++; + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + alternative = cc + GET(cc, 1); + if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + cc += 1 + LINK_SIZE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + length++; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); + length += 2; + } + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); + length += 2; + } + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + length++; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); + length += 2; + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); + length += 2; + } + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#else + size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#endif + + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + length += get_class_iterator_size(cc + size); + cc += size; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + recurse_flags |= recurse_flag_setmark_found; + if (common->control_head_ptr != 0) + recurse_flags |= recurse_flag_control_head_found; + if (*cc != OP_MARK) + recurse_flags |= recurse_flag_quit_found; + + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_SKIP: + case OP_COMMIT: + recurse_flags |= recurse_flag_quit_found; + cc++; + break; + + case OP_SKIP_ARG: + recurse_flags |= recurse_flag_quit_found; + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + SLJIT_ASSERT(common->control_head_ptr != 0); + recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found; + cc++; + break; + + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + recurse_flags |= recurse_flag_accept_found; + cc++; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + } +SLJIT_ASSERT(cc == ccend); + +if (recurse_flags & recurse_flag_control_head_found) + length++; +if (recurse_flags & recurse_flag_quit_found) + { + if (recurse_flags & recurse_flag_setsom_found) + length++; + if (recurse_flags & recurse_flag_setmark_found) + length++; + } + +*result_flags = recurse_flags; +return length; +} + +enum copy_recurse_data_types { + recurse_copy_from_global, + recurse_copy_private_to_global, + recurse_copy_shared_to_global, + recurse_copy_kept_shared_to_global, + recurse_swap_global +}; + +static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, + int type, int stackptr, int stacktop, uint32_t recurse_flags) +{ +delayed_mem_copy_status status; +PCRE2_SPTR alternative; +sljit_sw private_srcw[2]; +sljit_sw shared_srcw[3]; +sljit_sw kept_shared_srcw[2]; +int private_count, shared_count, kept_shared_count; +int from_sp, base_reg, offset, i; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +recurse_check_bit(common, common->control_head_ptr); +#endif + +switch (type) + { + case recurse_copy_from_global: + from_sp = TRUE; + base_reg = STACK_TOP; + break; + + case recurse_copy_private_to_global: + case recurse_copy_shared_to_global: + case recurse_copy_kept_shared_to_global: + from_sp = FALSE; + base_reg = STACK_TOP; + break; + + default: + SLJIT_ASSERT(type == recurse_swap_global); + from_sp = FALSE; + base_reg = TMP2; + break; + } + +stackptr = STACK(stackptr); +stacktop = STACK(stacktop); + +status.tmp_regs[0] = TMP1; +status.saved_tmp_regs[0] = TMP1; + +if (base_reg != TMP2) + { + status.tmp_regs[1] = TMP2; + status.saved_tmp_regs[1] = TMP2; + } +else + { + status.saved_tmp_regs[1] = RETURN_ADDR; + if (HAS_VIRTUAL_REGISTERS) + status.tmp_regs[1] = STR_PTR; + else + status.tmp_regs[1] = RETURN_ADDR; + } + +status.saved_tmp_regs[2] = TMP3; +if (HAS_VIRTUAL_REGISTERS) + status.tmp_regs[2] = STR_END; +else + status.tmp_regs[2] = TMP3; + +delayed_mem_copy_init(&status, common); + +if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr); + } + +stackptr += sizeof(sljit_sw); + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +if (type != recurse_copy_shared_to_global) + { + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr); + } + +stackptr += sizeof(sljit_sw); +#endif + +while (cc < ccend) + { + private_count = 0; + shared_count = 0; + kept_shared_count = 0; + + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0))) + { + kept_shared_srcw[0] = OVECTOR(0); + kept_shared_count = 1; + } + cc += 1; + break; + + case OP_RECURSE: + if (recurse_flags & recurse_flag_quit_found) + { + if (common->has_set_som && recurse_check_bit(common, OVECTOR(0))) + { + kept_shared_srcw[0] = OVECTOR(0); + kept_shared_count = 1; + } + if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr)) + { + kept_shared_srcw[kept_shared_count] = common->mark_ptr; + kept_shared_count++; + } + } + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + { + shared_srcw[0] = common->capture_last_ptr; + shared_count = 1; + } + cc += 1 + LINK_SIZE; + break; + + case OP_KET: + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0) + { + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } + + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + { + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; + } + + if (common->optimized_cbracket[offset] == 0) + { + private_srcw[0] = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + } + + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } + + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + { + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; + } + + private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + + offset = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, offset)) + { + private_srcw[private_count] = offset; + private_count++; + } + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + alternative = cc + GET(cc, 1); + if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + { + private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + } + cc += 1 + LINK_SIZE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + private_count = 1; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + private_count = 1; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#else + i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#endif + if (PRIVATE_DATA(cc) != 0) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + switch(get_class_iterator_size(cc + i)) + { + case 1: + break; + + case 2: + if (recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + } + cc += i; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr)) + { + kept_shared_srcw[0] = common->mark_ptr; + kept_shared_count = 1; + } + if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr)) + { + private_srcw[0] = common->control_head_ptr; + private_count = 1; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + SLJIT_ASSERT(common->control_head_ptr != 0); + if (recurse_check_bit(common, common->control_head_ptr)) + { + private_srcw[0] = common->control_head_ptr; + private_count = 1; + } + cc++; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + continue; + } + + if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global); + + for (i = 0; i < private_count; i++) + { + SLJIT_ASSERT(private_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * private_count; + + if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global); + + for (i = 0; i < shared_count; i++) + { + SLJIT_ASSERT(shared_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * shared_count; + + if (type != recurse_copy_private_to_global && type != recurse_swap_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global); + + for (i = 0; i < kept_shared_count; i++) + { + SLJIT_ASSERT(kept_shared_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * kept_shared_count; + } + +SLJIT_ASSERT(cc == ccend && stackptr == stacktop); + +delayed_mem_copy_finish(&status); +} + +static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset) +{ +PCRE2_SPTR end = bracketend(cc); +BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; + +/* Assert captures then. */ +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) + current_offset = NULL; +/* Conditional block does not. */ +if (*cc == OP_COND || *cc == OP_SCOND) + has_alternatives = FALSE; + +cc = next_opcode(common, cc); + +if (has_alternatives) + { + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + + current_offset = common->then_offsets + (cc - common->start); + } + +while (cc < end) + { + if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) + cc = set_then_offsets(common, cc, current_offset); + else + { + if (*cc == OP_ALT && has_alternatives) + { + cc += 1 + LINK_SIZE; + + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + + current_offset = common->then_offsets + (cc - common->start); + continue; + } + + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) + *current_offset = 1; + cc = next_opcode(common, cc); + } + } + +return end; +} + +#undef CASE_ITERATOR_PRIVATE_DATA_1 +#undef CASE_ITERATOR_PRIVATE_DATA_2A +#undef CASE_ITERATOR_PRIVATE_DATA_2B +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + +static SLJIT_INLINE BOOL is_powerof2(unsigned int value) +{ +return (value & (value - 1)) == 0; +} + +static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) +{ +while (list != NULL) + { + /* sljit_set_label is clever enough to do nothing + if either the jump or the label is NULL. */ + SET_LABEL(list->jump, label); + list = list->next; + } +} + +static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) +{ +jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); +if (list_item) + { + list_item->next = *list; + list_item->jump = jump; + *list = list_item; + } +} + +static void add_stub(compiler_common *common, struct sljit_jump *start) +{ +DEFINE_COMPILER; +stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); + +if (list_item) + { + list_item->start = start; + list_item->quit = LABEL(); + list_item->next = common->stubs; + common->stubs = list_item; + } +} + +static void flush_stubs(compiler_common *common) +{ +DEFINE_COMPILER; +stub_list *list_item = common->stubs; + +while (list_item) + { + JUMPHERE(list_item->start); + add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); + JUMPTO(SLJIT_JUMP, list_item->quit); + list_item = list_item->next; + } +common->stubs = NULL; +} + +static SLJIT_INLINE void count_match(compiler_common *common) +{ +DEFINE_COMPILER; + +OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); +add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); +} + +static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) +{ +/* May destroy all locals and registers except TMP2. */ +DEFINE_COMPILER; + +SLJIT_ASSERT(size > 0); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); +#ifdef DESTROY_REGISTERS +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); +OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); +OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); +#endif +add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0)); +} + +static SLJIT_INLINE void free_stack(compiler_common *common, int size) +{ +DEFINE_COMPILER; + +SLJIT_ASSERT(size > 0); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); +} + +static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) +{ +DEFINE_COMPILER; +sljit_uw *result; + +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + +result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); +if (SLJIT_UNLIKELY(result == NULL)) + { + sljit_set_compiler_memory_error(compiler); + return NULL; + } + +*(void**)result = common->read_only_data_head; +common->read_only_data_head = (void *)result; +return result + 1; +} + +static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +sljit_s32 i; + +/* At this point we can freely use all temporary registers. */ +SLJIT_ASSERT(length > 1); +/* TMP1 returns with begin - 1. */ +OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); +if (length < 8) + { + for (i = 1; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); + } +else + { + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); + loop = LABEL(); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + else + { + GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + } +} + +static SLJIT_INLINE void reset_early_fail(compiler_common *common) +{ +DEFINE_COMPILER; +sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr); +sljit_u32 uncleared_size; +sljit_s32 src = SLJIT_IMM; +sljit_s32 i; +struct sljit_label *loop; + +SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr); + +if (size == sizeof(sljit_sw)) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0); + return; + } + +if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER)) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + src = TMP3; + } + +if (size <= 6 * sizeof(sljit_sw)) + { + for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0); + return; + } + +GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr); + +uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw); + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size); + +loop = LABEL(); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0); +CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop); + +if (uncleared_size >= sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); + +if (uncleared_size >= 2 * sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0); +} + +static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +int i; + +SLJIT_ASSERT(length > 1); +/* OVECTOR(1) contains the "string begin - 1" constant. */ +if (length > 2) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); +if (length < 8) + { + for (i = 2; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); + } +else + { + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); + loop = LABEL(); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + else + { + GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + } + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack)); +else + OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); + +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); +if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end)); +} + +static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg) +{ +while (current != NULL) + { + switch (current[1]) + { + case type_then_trap: + break; + + case type_mark: + if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0) + return current[3]; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]); + current = (sljit_sw*)current[0]; + } +return 0; +} + +static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +BOOL has_pre; + +/* At this point we can freely use all registers. */ +OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), + SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } +else + { + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data)); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } + +has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; + +GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +loop = LABEL(); + +if (has_pre) + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); +else + { + OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); + OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); + } + +OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE)); +OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0); +/* Copy the integer value to the output buffer */ +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + +SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8); +OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0); + +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); +JUMPTO(SLJIT_NOT_ZERO, loop); + +/* Calculate the return value, which is the maximum ovector value. */ +if (topbracket > 1) + { + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); + + /* OVECTOR(0) is never equal to SLJIT_S2. */ + loop = LABEL(); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); + } + else + { + GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); + + /* OVECTOR(0) is never equal to SLJIT_S2. */ + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); + OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); + OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); + } + } +else + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); +} + +static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) +{ +DEFINE_COMPILER; +sljit_s32 mov_opcode; +sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1; + +SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0); +SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 + && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0)); + +if (arguments_reg != ARGUMENTS) + OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), + common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL); + +/* Store match begin and end. */ +OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data)); + +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; + +OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0); + +OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0); + +JUMPTO(SLJIT_JUMP, quit); +} + +static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + /* The value of -1 must be kept for start_used_ptr! */ + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); + /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting + is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ + jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +} + +static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc) +{ +/* Detects if the character has an othercase. */ +unsigned int c; + +#ifdef SUPPORT_UNICODE +if (common->utf || common->ucp) + { + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + + if (c > 127) + return c != UCD_OTHERCASE(c); + + return common->fcc[c] != c; + } +else +#endif + c = *cc; +return MAX_255(c) ? common->fcc[c] != c : FALSE; +} + +static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) +{ +/* Returns with the othercase. */ +#ifdef SUPPORT_UNICODE +if ((common->utf || common->ucp) && c > 127) + return UCD_OTHERCASE(c); +#endif +return TABLE_GET(c, common->fcc, c); +} + +static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc) +{ +/* Detects if the character and its othercase has only 1 bit difference. */ +unsigned int c, oc, bit; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +int n; +#endif + +#ifdef SUPPORT_UNICODE +if (common->utf || common->ucp) + { + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + + if (c <= 127) + oc = common->fcc[c]; + else + oc = UCD_OTHERCASE(c); + } +else + { + c = *cc; + oc = TABLE_GET(c, common->fcc, c); + } +#else +c = *cc; +oc = TABLE_GET(c, common->fcc, c); +#endif + +SLJIT_ASSERT(c != oc); + +bit = c ^ oc; +/* Optimized for English alphabet. */ +if (c <= 127 && bit == 0x20) + return (0 << 8) | 0x20; + +/* Since c != oc, they must have at least 1 bit difference. */ +if (!is_powerof2(bit)) + return 0; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + +#ifdef SUPPORT_UNICODE +if (common->utf && c > 127) + { + n = GET_EXTRALEN(*cc); + while ((bit & 0x3f) == 0) + { + n--; + bit >>= 6; + } + return (n << 8) | bit; + } +#endif /* SUPPORT_UNICODE */ +return (0 << 8) | bit; + +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + +#ifdef SUPPORT_UNICODE +if (common->utf && c > 65535) + { + if (bit >= (1u << 10)) + bit >>= 10; + else + return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); + } +#endif /* SUPPORT_UNICODE */ +return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8)); + +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +} + +static void check_partial(compiler_common *common, BOOL force) +{ +/* Checks whether a partial matching is occurred. Does not modify registers. */ +DEFINE_COMPILER; +struct sljit_jump *jump = NULL; + +SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE); + +if (common->mode == PCRE2_JIT_COMPLETE) + return; + +if (!force && !common->allow_empty_partial) + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } + +if (jump != NULL) + JUMPHERE(jump); +} + +static void check_str_end(compiler_common *common, jump_list **end_reached) +{ +/* Does not affect registers. Usually used in a tight spot. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_COMPLETE) + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); + } +else + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + +static void detect_partial_match(compiler_common *common, jump_list **backtracks) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_COMPLETE) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +/* Partial matching mode. */ +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); +if (!common->allow_empty_partial) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1)); + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + +static void process_partial_match(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +/* Partial matching mode. */ +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + JUMPHERE(jump); + } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + if (common->partialmatchlabel != NULL) + CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + } +} + +static void detect_partial_match_to(compiler_common *common, struct sljit_label *label) +{ +DEFINE_COMPILER; + +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label); +process_partial_match(common); +} + +static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks) +{ +/* Reads the character into TMP1, keeps STR_PTR. +Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(dst); +SLJIT_UNUSED_ARG(dstw); +SLJIT_UNUSED_ARG(backtracks); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw); + if (backtracks && common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800) return; + + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (common->invalid_utf) + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw); + if (backtracks && common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + { + /* TMP2 contains the high surrogate. */ + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + } + + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + if (max < 0xd800) return; + + if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks) +{ +/* Reads one character back without moving STR_PTR. TMP2 must +contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */ +DEFINE_COMPILER; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(backtracks); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + if (common->invalid_utf) + { + add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL)); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800) return; + + if (common->invalid_utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00); + /* TMP2 contains the low surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + } + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +#define READ_CHAR_UPDATE_STR_PTR 0x1 +#define READ_CHAR_UTF8_NEWLINE 0x2 +#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE) +#define READ_CHAR_VALID_UTF 0x4 + +static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max, + jump_list **backtracks, sljit_u32 options) +{ +/* Reads the precise value of a character into TMP1, if the character is +between min and max (c >= min && c <= max). Otherwise it returns with a value +outside the range. Does not check STR_END. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_jump *jump2; +#endif + +SLJIT_UNUSED_ARG(min); +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(options); +SLJIT_ASSERT(min <= max); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return; + + if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF)) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + + if (options & READ_CHAR_UTF8_NEWLINE) + add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL)); + else + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + return; + } + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + if (min >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (min >= 0x800 && max <= 0xffff) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (max >= 0x800) + { + add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + } + else if (max < 128) + { + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + } + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + else + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return; + + if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF)) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + + if (options & READ_CHAR_UTF8_NEWLINE) + add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL)); + else + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + return; + } + + if (max >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800); + /* TMP2 contains the high surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump); + return; + } + + /* Skip low surrogate if necessary. */ + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) + { + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); + if (options & READ_CHAR_UPDATE_STR_PTR) + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); + if (max >= 0xd800) + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1); + } + else + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (max >= 0xd800) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); + JUMPHERE(jump); + } + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +static void skip_valid_char(compiler_common *common) +{ +DEFINE_COMPILER; +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +struct sljit_jump *jump; +#endif + +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +} + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + +static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) +{ +/* Tells whether the character codes below 128 are enough +to determine a match. */ +const sljit_u8 value = nclass ? 0xff : 0; +const sljit_u8 *end = bitset + 32; + +bitset += 16; +do + { + if (*bitset++ != value) + return FALSE; + } +while (bitset < end); +return TRUE; +} + +static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated) +{ +/* Reads the precise character type of a character into TMP1, if the character +is less than 128. Otherwise it returns with zero. Does not check STR_END. The +full_read argument tells whether characters above max are accepted or not. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +SLJIT_ASSERT(common->utf); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +/* All values > 127 are zero in ctypes. */ +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + +if (negated) + { + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80); + + if (common->invalid_utf) + { + OP1(SLJIT_MOV, TMP1, 0, TMP2, 0); + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + } + else + { + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + } + JUMPHERE(jump); + } +} + +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + +static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated) +{ +/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 +struct sljit_jump *jump; +#endif +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_jump *jump2; +#endif + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(negated); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + /* The result of this read may be unused, but saves an "else" part. */ + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80); + + if (!negated) + { + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2)); + + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40)); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump2); + } + else if (common->invalid_utf) + { + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump2); + } + else + add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); + + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf && negated) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000)); +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 +/* The ctypes array contains only 256 values. */ +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf && negated) + { + /* Skip low surrogate if necessary. */ + if (!common->invalid_utf) + { + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) + { + OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); + } + else + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } + return; + } + + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400)); + + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */ +} + +static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid) +{ +/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE, +TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, +and it is destroyed. Does not modify STR_PTR for invalid character sequences. */ +DEFINE_COMPILER; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_label *label; + +if (common->utf) + { + if (!must_be_valid && common->invalid_utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); + JUMPHERE(jump); + return; + } + + label = LABEL(); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); + return; + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + if (!must_be_valid && common->invalid_utf) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800); + add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); + JUMPHERE(jump); + return; + } + + /* Skip low surrogate if necessary. */ + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + return; + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf && !must_be_valid) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + if (backtracks != NULL) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return; + } + + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + return; + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(must_be_valid); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +} + +static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) +{ +/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (nltype == NLTYPE_ANY) + { + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + } +else if (nltype == NLTYPE_ANYCRLF) + { + if (jumpifmatch) + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + JUMPHERE(jump); + } + } +else + { + SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); + add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } +} + +#ifdef SUPPORT_UNICODE + +#if PCRE2_CODE_UNIT_WIDTH == 8 +static void do_utfreadchar(compiler_common *common) +{ +/* Fast decoding a UTF-8 character. TMP1 contains the first byte +of the character (>= 0xc0). Return char value in TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Searching for the first zero. */ +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); +jump = JUMP(SLJIT_NOT_ZERO); +/* Three byte sequence. */ +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Four byte sequence. */ +JUMPHERE(jump); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadtype8(compiler_common *common) +{ +/* Fast decoding a UTF-8 character type. TMP2 contains the first byte +of the character (>= 0xc0). Return value in TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_jump *compare; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); +/* The upper 5 bits are known at this point. */ +compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(compare); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* We only have types for characters less than 256. */ +JUMPHERE(jump); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadchar_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-8 character. TMP1 contains the first byte +of the character (>= 0xc0). Return char value in TMP1. STR_PTR is +undefined for invalid characters. */ +DEFINE_COMPILER; +sljit_s32 i; +sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); +struct sljit_jump *jump; +struct sljit_jump *buffer_end_close; +struct sljit_label *three_byte_entry; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[11]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2); + +/* Usually more than 3 characters remained in the subject buffer. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + +/* Not a valid start of a multi-byte sequence, no more bytes read. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2); + +buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1); + exit_invalid[2] = NULL; + } +else + exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); +jump = JUMP(SLJIT_NOT_ZERO); + +three_byte_entry = LABEL(); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1); + exit_invalid[3] = NULL; + } +else + exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + exit_invalid[4] = NULL; + } +else + exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); + +/* Four-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1); + exit_invalid[5] = NULL; + } +else + exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); + exit_invalid[6] = NULL; + } +else + exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(buffer_end_close); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three-byte sequence. */ +JUMPHERE(jump); +exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + exit_invalid[10] = NULL; + } +else + exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +/* One will be substracted from STR_PTR later. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +/* Four byte sequences are not possible. */ +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry); + +exit_invalid_label = LABEL(); +for (i = 0; i < 11; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadnewline_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-8 character, specialized for newlines. +TMP1 contains the first byte of the character (>= 0xc0). Return +char value in TMP1. */ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_label *skip_start; +struct sljit_label *three_byte_exit; +struct sljit_jump *jump[5]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +if (common->nltype != NLTYPE_ANY) + { + SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128); + + /* All newlines are ascii, just skip intermediate octets. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + loop = LABEL(); + if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + JUMPHERE(jump[0]); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); + OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + return; + } + +jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2); +jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2); + +skip_start = LABEL(); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); +jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80); + +/* Skip intermediate octets. */ +loop = LABEL(); +jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); + +JUMPHERE(jump[3]); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +three_byte_exit = LABEL(); +JUMPHERE(jump[0]); +JUMPHERE(jump[4]); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Two byte long newline: 0x85. */ +JUMPHERE(jump[1]); +CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three byte long newlines: 0x2028 and 0x2029. */ +JUMPHERE(jump[2]); +CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start); +CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80); +CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfmoveback_invalid(compiler_common *common) +{ +/* Goes one character back. */ +DEFINE_COMPILER; +sljit_s32 i; +struct sljit_jump *jump; +struct sljit_jump *buffer_start_close; +struct sljit_label *exit_ok_label; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[7]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); + +/* Two-byte sequence. */ +buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three-byte sequence. */ +JUMPHERE(jump); +exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Four-byte sequence. */ +JUMPHERE(jump); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0); +exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05); + +exit_ok_label = LABEL(); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Two-byte sequence. */ +JUMPHERE(buffer_start_close); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label); + +/* Three-byte sequence. */ +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40); +exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label); + +/* Four-byte sequences are not possible. */ + +exit_invalid_label = LABEL(); +sljit_set_label(exit_invalid[5], exit_invalid_label); +sljit_set_label(exit_invalid[6], exit_invalid_label); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[4]); +/* -2 + 4 = 2 */ +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +exit_invalid_label = LABEL(); +for (i = 0; i < 4; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +struct sljit_jump *jump[2]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump[1]); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump[0]); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback_invalid(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +sljit_s32 i; +sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); +struct sljit_jump *jump[2]; +struct sljit_label *two_byte_entry; +struct sljit_label *three_byte_entry; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[8]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e); + +two_byte_entry = LABEL(); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[1]); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0); +jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10); + +three_byte_entry = LABEL(); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1); + exit_invalid[2] = NULL; + } +else + exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + exit_invalid[3] = NULL; + } +else + exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[1]); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80); +exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Four-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18); +/* ADD is used instead of OR because of the SUB 0x10000 above. */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + +if (has_cmov) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); + exit_invalid[5] = NULL; + } +else + exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[0]); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); +exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[0]); +exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry); + +exit_invalid_label = LABEL(); +for (i = 0; i < 8; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + +#if PCRE2_CODE_UNIT_WIDTH == 16 + +static void do_utfreadchar_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-16 character. TMP1 contains the first half +of the character (>= 0xd800). Return char value in TMP1. STR_PTR is +undefined for invalid characters. */ +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +/* TMP2 contains the high surrogate. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadnewline_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-16 character, specialized for newlines. +TMP1 contains the first half of the character (>= 0xd800). Return +char value in TMP1. */ + +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[2]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +/* TMP2 contains the high surrogate. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); + +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); +OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfmoveback_invalid(compiler_common *common) +{ +/* Goes one character back. */ +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback_invalid(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */ + +/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ +#define UCD_BLOCK_MASK 127 +#define UCD_BLOCK_SHIFT 7 + +static void do_getucd(compiler_common *common) +{ +/* Search the UCD record for the character comes in TMP1. +Returns chartype in TMP1 and UCD offset in TMP2. */ +DEFINE_COMPILER; +#if PCRE2_CODE_UNIT_WIDTH == 32 +struct sljit_jump *jump; +#endif + +#if defined SLJIT_DEBUG && SLJIT_DEBUG +/* dummy_ucd_record */ +const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR); +SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther); +SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); +#endif + +SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif + +OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_getucdtype(compiler_common *common) +{ +/* Search the UCD record for the character comes in TMP1. +Returns chartype in TMP1 and UCD offset in TMP2. */ +DEFINE_COMPILER; +#if PCRE2_CODE_UNIT_WIDTH == 32 +struct sljit_jump *jump; +#endif + +#if defined SLJIT_DEBUG && SLJIT_DEBUG +/* dummy_ucd_record */ +const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR); +SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther); +SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); +#endif + +SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif + +OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + +/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */ +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* SUPPORT_UNICODE */ + +static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *mainloop; +struct sljit_label *newlinelabel = NULL; +struct sljit_jump *start; +struct sljit_jump *end = NULL; +struct sljit_jump *end2 = NULL; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *loop; +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +jump_list *newline = NULL; +sljit_u32 overall_options = common->re->overall_options; +BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0; +BOOL newlinecheck = FALSE; +BOOL readuchar = FALSE; + +if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0) + && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) + newlinecheck = TRUE; + +SLJIT_ASSERT(common->abort_label == NULL); + +if ((overall_options & PCRE2_FIRSTLINE) != 0) + { + /* Search for the end of the first line. */ + SLJIT_ASSERT(common->match_end_ptr != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + mainloop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); + JUMPHERE(end); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + { + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + mainloop = LABEL(); + /* Continual stores does not cause data dependency. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); + read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); + check_newlinechar(common, common->nltype, &newline, TRUE); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); + set_jumps(newline, LABEL()); + } + + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + } +else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0) + { + /* Check whether offset limit is set and valid. */ + SLJIT_ASSERT(common->match_end_ptr != 0); + + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + JUMPHERE(end2); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0)); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0); + } + +start = JUMP(SLJIT_JUMP); + +if (newlinecheck) + { + newlinelabel = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + end2 = JUMP(SLJIT_JUMP); + } + +mainloop = LABEL(); + +/* Increasing the STR_PTR here requires one less jump in the most common case. */ +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && !common->invalid_utf) readuchar = TRUE; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +if (newlinecheck) readuchar = TRUE; + +if (readuchar) + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + +if (newlinecheck) + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->invalid_utf) + { + /* Skip continuation code units. */ + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } +else if (common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->invalid_utf) + { + /* Skip continuation code units. */ + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } +else if (common->utf) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */ +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +JUMPHERE(start); + +if (newlinecheck) + { + JUMPHERE(end); + JUMPHERE(end2); + } + +return mainloop; +} + + +static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last) +{ +sljit_u32 i, count = chars->count; + +if (count == 255) + return; + +if (count == 0) + { + chars->count = 1; + chars->chars[0] = chr; + + if (last) + chars->last_count = 1; + return; + } + +for (i = 0; i < count; i++) + if (chars->chars[i] == chr) + return; + +if (count >= MAX_DIFF_CHARS) + { + chars->count = 255; + return; + } + +chars->chars[count] = chr; +chars->count = count + 1; + +if (last) + chars->last_count++; +} + +static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count) +{ +/* Recursive function, which scans prefix literals. */ +BOOL last, any, class, caseless; +int len, repeat, len_save, consumed = 0; +sljit_u32 chr; /* Any unicode character. */ +sljit_u8 *bytes, *bytes_end, byte; +PCRE2_SPTR alternative, cc_save, oc; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_UCHAR othercase[4]; +#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 +PCRE2_UCHAR othercase[2]; +#else +PCRE2_UCHAR othercase[1]; +#endif + +repeat = 1; +while (TRUE) + { + if (*rec_count == 0) + return 0; + (*rec_count)--; + + last = TRUE; + any = FALSE; + class = FALSE; + caseless = FALSE; + + switch (*cc) + { + case OP_CHARI: + caseless = TRUE; + /* Fall through */ + case OP_CHAR: + last = FALSE; + cc++; + break; + + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + /* Zero width assertions. */ + cc++; + continue; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + cc = bracketend(cc); + continue; + + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + caseless = TRUE; + /* Fall through */ + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + cc++; + break; + + case OP_EXACTI: + caseless = TRUE; + /* Fall through */ + case OP_EXACT: + repeat = GET2(cc, 1); + last = FALSE; + cc += 1 + IMM2_SIZE; + break; + + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + caseless = TRUE; + /* Fall through */ + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + len = 1; + cc++; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); +#endif + max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + last = FALSE; + break; + + case OP_KET: + cc += 1 + LINK_SIZE; + continue; + + case OP_ALT: + cc += GET(cc, 1); + continue; + + case OP_ONCE: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + alternative = cc + GET(cc, 1); + while (*alternative == OP_ALT) + { + max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + alternative += GET(alternative, 1); + } + + if (*cc == OP_CBRA || *cc == OP_CBRAPOS) + cc += IMM2_SIZE; + cc += 1 + LINK_SIZE; + continue; + + case OP_CLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) + return consumed; +#endif + class = TRUE; + break; + + case OP_NCLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + class = TRUE; + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += GET(cc, 1); + break; +#endif + + case OP_DIGIT: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WHITESPACE: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WORDCHAR: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_NOT: + case OP_NOTI: + cc++; + /* Fall through. */ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_ANY: + case OP_ALLANY: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc++; + break; + +#ifdef SUPPORT_UNICODE + case OP_NOTPROP: + case OP_PROP: +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += 1 + 2; + break; +#endif + + case OP_TYPEEXACT: + repeat = GET2(cc, 1); + cc += 1 + IMM2_SIZE; + continue; + + case OP_NOTEXACT: + case OP_NOTEXACTI: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + repeat = GET2(cc, 1); + cc += 1 + IMM2_SIZE + 1; + break; + + default: + return consumed; + } + + if (any) + { + do + { + chars->count = 255; + + consumed++; + if (--max_chars == 0) + return consumed; + chars++; + } + while (--repeat > 0); + + repeat = 1; + continue; + } + + if (class) + { + bytes = (sljit_u8*) (cc + 1); + cc += 1 + 32 / sizeof(PCRE2_UCHAR); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + break; + + default: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + repeat = GET2(cc, 1); + if (repeat <= 0) + return consumed; + break; + } + + do + { + if (bytes[31] & 0x80) + chars->count = 255; + else if (chars->count != 255) + { + bytes_end = bytes + 32; + chr = 0; + do + { + byte = *bytes++; + SLJIT_ASSERT((chr & 0x7) == 0); + if (byte == 0) + chr += 8; + else + { + do + { + if ((byte & 0x1) != 0) + add_prefix_char(chr, chars, TRUE); + byte >>= 1; + chr++; + } + while (byte != 0); + chr = (chr + 7) & (sljit_u32)(~7); + } + } + while (chars->count != 255 && bytes < bytes_end); + bytes = bytes_end - 32; + } + + consumed++; + if (--max_chars == 0) + return consumed; + chars++; + } + while (--repeat > 0); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + return consumed; + + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) + return consumed; + cc += 1 + 2 * IMM2_SIZE; + break; + } + + repeat = 1; + continue; + } + + len = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); +#endif + + if (caseless && char_has_othercase(common, cc)) + { +#ifdef SUPPORT_UNICODE + if (common->utf) + { + GETCHAR(chr, cc); + if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) + return consumed; + } + else +#endif + { + chr = *cc; +#ifdef SUPPORT_UNICODE + if (common->ucp && chr > 127) + { + chr = UCD_OTHERCASE(chr); + othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc; + } + else +#endif + othercase[0] = TABLE_GET(chr, common->fcc, chr); + } + } + else + { + caseless = FALSE; + othercase[0] = 0; /* Stops compiler warning - PH */ + } + + len_save = len; + cc_save = cc; + while (TRUE) + { + oc = othercase; + do + { + len--; + consumed++; + + chr = *cc; + add_prefix_char(*cc, chars, len == 0); + + if (caseless) + add_prefix_char(*oc, chars, len == 0); + + if (--max_chars == 0) + return consumed; + chars++; + cc++; + oc++; + } + while (len > 0); + + if (--repeat == 0) + break; + + len = len_save; + cc = cc_save; + } + + repeat = 1; + if (last) + return consumed; + } +} + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); +CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label); +#else +#error "Unknown code width" +#endif +} +#endif + +#include "pcre2_jit_simd_inc.h" + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD + +static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max) +{ + sljit_s32 i, j, max_i = 0, max_j = 0; + sljit_u32 max_pri = 0; + sljit_s32 max_offset = max_fast_forward_char_pair_offset(); + PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri; + + for (i = max - 1; i >= 1; i--) + { + if (chars[i].last_count > 2) + { + a1 = chars[i].chars[0]; + a2 = chars[i].chars[1]; + a_pri = chars[i].last_count; + + j = i - max_offset; + if (j < 0) + j = 0; + + while (j < i) + { + b_pri = chars[j].last_count; + if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri) + { + b1 = chars[j].chars[0]; + b2 = chars[j].chars[1]; + + if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2) + { + max_pri = a_pri + b_pri; + max_i = i; + max_j = j; + } + } + j++; + } + } + } + +if (max_pri == 0) + return FALSE; + +fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); +return TRUE; +} + +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ + +static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *match; +struct sljit_jump *partial_quit; +PCRE2_UCHAR mask; +BOOL has_match_end = (common->match_end_ptr != 0); + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0); + +if (has_match_end) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + +if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + +if (has_match_end) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); + } + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD + +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD) + { + fast_forward_char_simd(common, char1, char2, offset); + + if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + + if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; + } + +#endif + +start = LABEL(); + +partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (char1 == char2) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start); +else + { + mask = char1 ^ char2; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start); + JUMPHERE(match); + } + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1))); + jumpto_if_not_utf_char_start(compiler, TMP1, start); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); + +if (common->mode != PCRE2_JIT_COMPLETE) + JUMPHERE(partial_quit); + +if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *match; +fast_forward_char_data chars[MAX_N_CHARS]; +sljit_s32 offset; +PCRE2_UCHAR mask; +PCRE2_UCHAR *char_set, *char_set_end; +int i, max, from; +int range_right = -1, range_len; +sljit_u8 *update_table = NULL; +BOOL in_range; +sljit_u32 rec_count; + +for (i = 0; i < MAX_N_CHARS; i++) + { + chars[i].count = 0; + chars[i].last_count = 0; + } + +rec_count = 10000; +max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); + +if (max < 1) + return FALSE; + +/* Convert last_count to priority. */ +for (i = 0; i < max; i++) + { + SLJIT_ASSERT(chars[i].last_count <= chars[i].count); + + switch (chars[i].count) + { + case 0: + chars[i].count = 255; + chars[i].last_count = 0; + break; + + case 1: + chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5; + /* Simplifies algorithms later. */ + chars[i].chars[1] = chars[i].chars[0]; + break; + + case 2: + SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]); + + if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1])) + chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4; + else + chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2; + break; + + default: + chars[i].last_count = (chars[i].count == 255) ? 0 : 1; + break; + } + } + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max)) + return TRUE; +#endif + +in_range = FALSE; +/* Prevent compiler "uninitialized" warning */ +from = 0; +range_len = 4 /* minimum length */ - 1; +for (i = 0; i <= max; i++) + { + if (in_range && (i - from) > range_len && (chars[i - 1].count < 255)) + { + range_len = i - from; + range_right = i - 1; + } + + if (i < max && chars[i].count < 255) + { + SLJIT_ASSERT(chars[i].count > 0); + if (!in_range) + { + in_range = TRUE; + from = i; + } + } + else + in_range = FALSE; + } + +if (range_right >= 0) + { + update_table = (sljit_u8 *)allocate_read_only_data(common, 256); + if (update_table == NULL) + return TRUE; + memset(update_table, IN_UCHARS(range_len), 256); + + for (i = 0; i < range_len; i++) + { + SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255); + + char_set = chars[range_right - i].chars; + char_set_end = char_set + chars[range_right - i].count; + do + { + if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) + update_table[(*char_set) & 0xff] = IN_UCHARS(i); + char_set++; + } + while (char_set < char_set_end); + } + } + +offset = -1; +/* Scan forward. */ +for (i = 0; i < max; i++) + { + if (range_right == i) + continue; + + if (offset == -1) + { + if (chars[i].last_count >= 2) + offset = i; + } + else if (chars[offset].last_count < chars[i].last_count) + offset = i; + } + +SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2)); + +if (range_right < 0) + { + if (offset < 0) + return FALSE; + /* Works regardless the value is 1 or 2. */ + fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset); + return TRUE; + } + +SLJIT_ASSERT(range_right != offset); + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); + } +else + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + } + +SLJIT_ASSERT(range_right >= 0); + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); + +start = LABEL(); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); +#else +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); +#endif + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +else + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); + +if (offset >= 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + if (chars[offset].count == 1) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start); + else + { + mask = chars[offset].chars[0] ^ chars[offset].chars[1]; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start); + JUMPHERE(match); + } + } + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset != 0) + { + if (offset < 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + + jumpto_if_not_utf_char_start(compiler, TMP1, start); + + if (offset < 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + +if (offset >= 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +else + OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +return TRUE; +} + +static SLJIT_INLINE void fast_forward_first_char(compiler_common *common) +{ +PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit); +PCRE2_UCHAR oc; + +oc = first_char; +if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0) + { + oc = TABLE_GET(first_char, common->fcc, first_char); +#if defined SUPPORT_UNICODE + if (first_char > 127 && (common->utf || common->ucp)) + oc = UCD_OTHERCASE(first_char); +#endif + } + +fast_forward_first_char2(common, first_char, oc, 0); +} + +static SLJIT_INLINE void fast_forward_newline(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_jump *lastchar = NULL; +struct sljit_jump *firstchar; +struct sljit_jump *quit = NULL; +struct sljit_jump *foundcr = NULL; +struct sljit_jump *notfoundnl; +jump_list *newline = NULL; + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + } + +if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD + if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE) + { + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + + fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + } + else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ + { + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + + loop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + + JUMPHERE(quit); + JUMPHERE(lastchar); + } + + JUMPHERE(firstchar); + + if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; + } + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + +/* Example: match /^/ to \r\n from offset 1. */ +firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + +if (common->nltype == NLTYPE_ANY) + move_back(common, NULL, FALSE); +else + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +loop = LABEL(); +common->ff_newline_shortcut = loop; + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF)) + { + if (common->nltype == NLTYPE_ANYCRLF) + { + fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0); + if (common->mode != PCRE2_JIT_COMPLETE) + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + } + else + { + fast_forward_char_simd(common, common->newline, common->newline, 0); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } + } + } +else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */ + { + read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + check_newlinechar(common, common->nltype, &newline, FALSE); + set_jumps(newline, loop); + } + +if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + { + if (quit == NULL) + { + quit = JUMP(SLJIT_JUMP); + JUMPHERE(foundcr); + } + + notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(notfoundnl); + JUMPHERE(quit); + } + +if (lastchar) + JUMPHERE(lastchar); +JUMPHERE(firstchar); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); + +static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common) +{ +DEFINE_COMPILER; +const sljit_u8 *start_bits = common->re->start_bitmap; +struct sljit_label *start; +struct sljit_jump *partial_quit; +#if PCRE2_CODE_UNIT_WIDTH != 8 +struct sljit_jump *found = NULL; +#endif +jump_list *matches = NULL; + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); + } + +start = LABEL(); + +partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches)) + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if ((start_bits[31] & 0x80) != 0) + found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255); + else + CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start); +#elif defined SUPPORT_UNICODE + if (common->utf && is_char7_bitset(start_bits, FALSE)) + CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start); +#endif + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); + if (!HAS_VIRTUAL_REGISTERS) + { + OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0); + } + else + { + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + } + JUMPTO(SLJIT_ZERO, start); + } +else + set_jumps(matches, start); + +#if PCRE2_CODE_UNIT_WIDTH != 8 +if (found != NULL) + JUMPHERE(found); +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (common->mode != PCRE2_JIT_COMPLETE) + JUMPHERE(partial_quit); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); +} + +static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_jump *toolong; +struct sljit_jump *already_found; +struct sljit_jump *found; +struct sljit_jump *found_oc = NULL; +jump_list *not_found = NULL; +sljit_u32 oc, bit; + +SLJIT_ASSERT(common->req_char_ptr != 0); +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); +toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0); +already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0); + +if (has_firstchar) + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +else + OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); + +oc = req_char; +if (caseless) + { + oc = TABLE_GET(req_char, common->fcc, req_char); +#if defined SUPPORT_UNICODE + if (req_char > 127 && (common->utf || common->ucp)) + oc = UCD_OTHERCASE(req_char); +#endif + } + +#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD +if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD) + { + not_found = fast_requested_char_simd(common, req_char, oc); + } +else +#endif + { + loop = LABEL(); + add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); + + if (req_char == oc) + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + else + { + bit = req_char ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); + } + else + { + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + } + } + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_JUMP, loop); + + JUMPHERE(found); + if (found_oc) + JUMPHERE(found_oc); + } + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); + +JUMPHERE(already_found); +JUMPHERE(toolong); +return not_found; +} + +static void do_revertframes(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *mainloop; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); +GET_LOCAL_BASE(TMP1, 0, 0); + +/* Drop frames until we reach STACK_TOP. */ +mainloop = LABEL(); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw)); +OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); +jump = JUMP(SLJIT_SIG_LESS_EQUAL); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); + } +else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); + GET_LOCAL_BASE(TMP1, 0, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0); + } +JUMPTO(SLJIT_JUMP, mainloop); + +JUMPHERE(jump); +sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z); +jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */); +/* End of reverting values. */ +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); +OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); + } +else + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0); + } +JUMPTO(SLJIT_JUMP, mainloop); +} + +#ifdef SUPPORT_UNICODE +#define UCPCAT(bit) (1 << (bit)) +#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2)) +#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3)) +#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1)) +#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu) +#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No) +#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1) +#endif + +static void check_wordboundary(compiler_common *common, BOOL ucp) +{ +DEFINE_COMPILER; +struct sljit_jump *skipread; +jump_list *skipread_list = NULL; +#ifdef SUPPORT_UNICODE +struct sljit_label *valid_utf; +jump_list *invalid_utf1 = NULL; +#endif /* SUPPORT_UNICODE */ +jump_list *invalid_utf2 = NULL; +#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE +struct sljit_jump *jump; +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ + +SLJIT_UNUSED_ARG(ucp); +SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Get type of the previous char, and put it to TMP3. */ +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); +skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) + { + peek_char_back(common, READ_CHAR_MAX, &invalid_utf1); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + } + } +else +#endif /* SUPPORT_UNICODE */ + { + if (common->mode == PCRE2_JIT_COMPLETE) + peek_char_back(common, READ_CHAR_MAX, NULL); + else + { + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR); + } + } + +/* Testing char type. */ +#ifdef SUPPORT_UNICODE +if (ucp) + { + add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO); + } +else +#endif /* SUPPORT_UNICODE */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UNICODE + /* Here TMP3 has already been zeroed. */ + jump = NULL; + if (common->utf) + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); + OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1); +#if PCRE2_CODE_UNIT_WIDTH != 8 + JUMPHERE(jump); +#elif defined SUPPORT_UNICODE + if (jump != NULL) + JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } +JUMPHERE(skipread); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); +check_str_end(common, &skipread_list); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); + +/* Testing char type. This is a code duplication. */ +#ifdef SUPPORT_UNICODE + +valid_utf = LABEL(); + +if (ucp) + { + add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + } +else +#endif /* SUPPORT_UNICODE */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + /* TMP2 may be destroyed by peek_char. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UNICODE + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + jump = NULL; + if (common->utf) + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#endif + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +#if PCRE2_CODE_UNIT_WIDTH != 8 + JUMPHERE(jump); +#elif defined SUPPORT_UNICODE + if (jump != NULL) + JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } +set_jumps(skipread_list, LABEL()); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) + { + set_jumps(invalid_utf1, LABEL()); + + peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + + set_jumps(invalid_utf2, LABEL()); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + } +#endif /* SUPPORT_UNICODE */ +} + +static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +int ranges[MAX_CLASS_RANGE_SIZE]; +sljit_u8 bit, cbit, all; +int i, byte, length = 0; + +bit = bits[0] & 0x1; +/* All bits will be zero or one (since bit is zero or one). */ +all = (sljit_u8)-bit; + +for (i = 0; i < 256; ) + { + byte = i >> 3; + if ((i & 0x7) == 0 && bits[byte] == all) + i += 8; + else + { + cbit = (bits[byte] >> (i & 0x7)) & 0x1; + if (cbit != bit) + { + if (length >= MAX_CLASS_RANGE_SIZE) + return FALSE; + ranges[length] = i; + length++; + bit = cbit; + all = (sljit_u8)-cbit; /* sign extend bit into byte */ + } + i++; + } + } + +if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) + { + if (length >= MAX_CLASS_RANGE_SIZE) + return FALSE; + ranges[length] = 256; + length++; + } + +if (length < 0 || length > 4) + return FALSE; + +bit = bits[0] & 0x1; +if (invert) bit ^= 0x1; + +/* No character is accepted. */ +if (length == 0 && bit == 0) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + +switch(length) + { + case 0: + /* When bit != 0, all characters are accepted. */ + return TRUE; + + case 1: + add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 2: + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 3: + if (bit != 0) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + } + + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); + return TRUE; + + case 4: + if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) + && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] + && (ranges[1] & (ranges[2] - ranges[0])) == 0 + && is_powerof2(ranges[2] - ranges[0])) + { + SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + return TRUE; + } + + if (bit != 0) + { + i = 0; + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + i = ranges[0]; + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); + return TRUE; + } + + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + return TRUE; + + default: + SLJIT_UNREACHABLE(); + return FALSE; + } +} + +static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +uint16_t char_list[MAX_CLASS_CHARS_SIZE]; +uint8_t byte; +sljit_s32 type; +int i, j, k, len, c; + +if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + return FALSE; + +len = 0; + +for (i = 0; i < 32; i++) + { + byte = bits[i]; + + if (nclass) + byte = (sljit_u8)~byte; + + j = 0; + while (byte != 0) + { + if (byte & 0x1) + { + c = i * 8 + j; + + k = len; + + if ((c & 0x20) != 0) + { + for (k = 0; k < len; k++) + if (char_list[k] == c - 0x20) + { + char_list[k] |= 0x120; + break; + } + } + + if (k == len) + { + if (len >= MAX_CLASS_CHARS_SIZE) + return FALSE; + + char_list[len++] = (uint16_t) c; + } + } + + byte >>= 1; + j++; + } + } + +if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */ + +i = 0; +j = 0; + +if (char_list[0] == 0) + { + i++; + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + +while (i < len) + { + if ((char_list[i] & 0x100) != 0) + j++; + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); + } + i++; + } + +if (j != 0) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20); + + for (i = 0; i < len; i++) + if ((char_list[i] & 0x100) != 0) + { + j--; + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); + } + } + +if (invert) + nclass = !nclass; + +type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; +add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0)); +return TRUE; +} + +static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +if (optimize_class_ranges(common, bits, nclass, invert, backtracks)) + return TRUE; +return optimize_class_chars(common, bits, nclass, invert, backtracks); +} + +static void check_anynewline(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void check_hspace(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20); +OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void check_vspace(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_casefulcmp(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; +int char1_reg; +int char2_reg; + +if (HAS_VIRTUAL_REGISTERS) + { + char1_reg = STR_END; + char2_reg = STACK_TOP; + } +else + { + char1_reg = TMP3; + char2_reg = RETURN_ADDR; + } + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +if (char1_reg == STR_END) + { + OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0); + OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); + } + +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + { + label = LABEL(); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + } +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + label = LABEL(); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +else + { + label = LABEL(); + OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); + OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + } + +if (char1_reg == STR_END) + { + OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0); + OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0); + } + +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); +} + +static void do_caselesscmp(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; +int char1_reg = STR_END; +int char2_reg; +int lcc_table; +int opt_type = 0; + +if (HAS_VIRTUAL_REGISTERS) + { + char2_reg = STACK_TOP; + lcc_table = STACK_LIMIT; + } +else + { + char2_reg = RETURN_ADDR; + lcc_table = TMP3; + } + +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + opt_type = 1; +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + opt_type = 2; + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); + +if (char2_reg == STACK_TOP) + { + OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0); + OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0); + } + +OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); + +if (opt_type == 1) + { + label = LABEL(); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + } +else if (opt_type == 2) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + label = LABEL(); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + } +else + { + label = LABEL(); + OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); + OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + } + +#if PCRE2_CODE_UNIT_WIDTH != 8 +jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255); +#endif +OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255); +#endif +OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +#endif + +if (opt_type == 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); +OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +JUMPTO(SLJIT_NOT_ZERO, label); + +JUMPHERE(jump); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +if (opt_type == 2) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (char2_reg == STACK_TOP) + { + OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0); + OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0); + } + +OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); +} + +static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, + compare_context *context, jump_list **backtracks) +{ +DEFINE_COMPILER; +unsigned int othercasebit = 0; +PCRE2_SPTR othercasechar = NULL; +#ifdef SUPPORT_UNICODE +int utflength; +#endif + +if (caseless && char_has_othercase(common, cc)) + { + othercasebit = char_get_othercase_bit(common, cc); + SLJIT_ASSERT(othercasebit); + /* Extracting bit difference info. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 + othercasechar = cc + (othercasebit >> 8); + othercasebit &= 0xff; +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + /* Note that this code only handles characters in the BMP. If there + ever are characters outside the BMP whose othercase differs in only one + bit from itself (there currently are none), this code will need to be + revised for PCRE2_CODE_UNIT_WIDTH == 32. */ + othercasechar = cc + (othercasebit >> 9); + if ((othercasebit & 0x100) != 0) + othercasebit = (othercasebit & 0xff) << 8; + else + othercasebit &= 0xff; +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + } + +if (context->sourcereg == -1) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 32 + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + context->sourcereg = TMP2; + } + +#ifdef SUPPORT_UNICODE +utflength = 1; +if (common->utf && HAS_EXTRALEN(*cc)) + utflength += GET_EXTRALEN(*cc); + +do + { +#endif + + context->length -= IN_UCHARS(1); +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + + /* Unaligned read is supported. */ + if (othercasebit != 0 && othercasechar == cc) + { + context->c.asuchars[context->ucharptr] = *cc | othercasebit; + context->oc.asuchars[context->ucharptr] = othercasebit; + } + else + { + context->c.asuchars[context->ucharptr] = *cc; + context->oc.asuchars[context->ucharptr] = 0; + } + context->ucharptr++; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +#else + if (context->ucharptr >= 2 || context->length == 0) +#endif + { + if (context->length >= 4) + OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#if PCRE2_CODE_UNIT_WIDTH == 8 + else if (context->length >= 1) + OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + switch(context->ucharptr) + { + case 4 / sizeof(PCRE2_UCHAR): + if (context->oc.asint != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + break; + + case 2 / sizeof(PCRE2_UCHAR): + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + break; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + case 1: + if (context->oc.asbyte != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + break; +#endif + + default: + SLJIT_UNREACHABLE(); + break; + } + context->ucharptr = 0; + } + +#else + + /* Unaligned read is unsupported or in 32 bit mode. */ + if (context->length >= 1) + OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + if (othercasebit != 0 && othercasechar == cc) + { + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + +#endif + + cc++; +#ifdef SUPPORT_UNICODE + utflength--; + } +while (utflength > 0); +#endif + +return cc; +} + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + +#define SET_CHAR_OFFSET(value) \ + if ((value) != charoffset) \ + { \ + if ((value) < charoffset) \ + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ + else \ + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ + } \ + charoffset = (value); + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); + +#ifdef SUPPORT_UNICODE +#define XCLASS_SAVE_CHAR 0x001 +#define XCLASS_CHAR_SAVED 0x002 +#define XCLASS_HAS_TYPE 0x004 +#define XCLASS_HAS_SCRIPT 0x008 +#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 +#define XCLASS_HAS_BOOL 0x020 +#define XCLASS_HAS_BIDICL 0x040 +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) +#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 +#endif /* SUPPORT_UNICODE */ + +static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +jump_list *found = NULL; +jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; +sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; +struct sljit_jump *jump = NULL; +PCRE2_SPTR ccbegin; +int compares, invertcmp, numberofcmps; +#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +BOOL utf = common->utf; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + +#ifdef SUPPORT_UNICODE +sljit_u32 unicode_status = 0; +sljit_u32 category_list = 0; +sljit_u32 items; +int typereg = TMP1; +const sljit_u32 *other_cases; +#endif /* SUPPORT_UNICODE */ + +/* Scanning the necessary info. */ +cc++; +ccbegin = cc; +compares = 0; + +if (cc[-1] & XCL_MAP) + { + min = 0; + cc += 32 / sizeof(PCRE2_UCHAR); + } + +while (*cc != XCL_END) + { + compares++; + + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + if (c > max) max = c; + if (c < min) min = c; +#ifdef SUPPORT_UNICODE + unicode_status |= XCLASS_SAVE_CHAR; +#endif /* SUPPORT_UNICODE */ + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + if (c < min) min = c; + GETCHARINCTEST(c, cc); + if (c > max) max = c; +#ifdef SUPPORT_UNICODE + unicode_status |= XCLASS_SAVE_CHAR; +#endif /* SUPPORT_UNICODE */ + } +#ifdef SUPPORT_UNICODE + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + + if (*cc == PT_CLIST && cc[-1] == XCL_PROP) + { + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + while (*other_cases != NOTACHAR) + { + if (*other_cases > max) max = *other_cases; + if (*other_cases < min) min = *other_cases; + other_cases++; + } + } + else + { + max = READ_CHAR_MAX; + min = 0; + } + + items = 0; + + switch(*cc) + { + case PT_ANY: + /* Any either accepts everything or ignored. */ + if (cc[-1] == XCL_PROP) + items = UCPCAT_ALL; + else + compares--; + break; + + case PT_LAMP: + items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); + break; + + case PT_GC: + items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); + break; + + case PT_PC: + items = UCPCAT(cc[1]); + break; + + case PT_WORD: + items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; + break; + + case PT_ALNUM: + items = UCPCAT_L | UCPCAT_N; + break; + + case PT_SCX: + unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; + if (cc[-1] == XCL_NOTPROP) + { + unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; + break; + } + compares++; + /* Fall through */ + + case PT_SC: + unicode_status |= XCLASS_HAS_SCRIPT; + break; + + case PT_SPACE: + case PT_PXSPACE: + case PT_PXGRAPH: + case PT_PXPRINT: + case PT_PXPUNCT: + unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; + break; + + case PT_CLIST: + case PT_UCNC: + case PT_PXXDIGIT: + unicode_status |= XCLASS_SAVE_CHAR; + break; + + case PT_BOOL: + unicode_status |= XCLASS_HAS_BOOL; + break; + + case PT_BIDICL: + unicode_status |= XCLASS_HAS_BIDICL; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + if (items > 0) + { + if (cc[-1] == XCL_NOTPROP) + items ^= UCPCAT_ALL; + category_list |= items; + unicode_status |= XCLASS_HAS_TYPE; + compares--; + } + + cc += 2; + } +#endif /* SUPPORT_UNICODE */ + } + +#ifdef SUPPORT_UNICODE +if (category_list == UCPCAT_ALL) + { + /* All characters are accepted, same as dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } + +if (compares == 0 && category_list == 0) + { + /* No characters are accepted, same as (*F) or dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list != backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } +#else /* !SUPPORT_UNICODE */ +SLJIT_ASSERT(compares > 0); +#endif /* SUPPORT_UNICODE */ + +/* We are not necessary in utf mode even in 8 bit mode. */ +cc = ccbegin; +if ((cc[-1] & XCL_NOT) != 0) + read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); +else + { +#ifdef SUPPORT_UNICODE + read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); +#else /* !SUPPORT_UNICODE */ + read_char(common, min, max, NULL, 0); +#endif /* SUPPORT_UNICODE */ + } + +if ((cc[-1] & XCL_HASPROP) == 0) + { + if ((cc[-1] & XCL_MAP) != 0) + { + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) + { + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); + } + + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump); + + cc += 32 / sizeof(PCRE2_UCHAR); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); + add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); + } + } +else if ((cc[-1] & XCL_MAP) != 0) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); +#ifdef SUPPORT_UNICODE + unicode_status |= XCLASS_CHAR_SAVED; +#endif /* SUPPORT_UNICODE */ + if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump = NULL; + if (common->utf) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + cc += 32 / sizeof(PCRE2_UCHAR); + } + +#ifdef SUPPORT_UNICODE +if (unicode_status & XCLASS_NEEDS_UCD) + { + if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + ccbegin = cc; + + if (category_list != 0) + compares++; + + if (unicode_status & XCLASS_HAS_BIDICL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BIDICL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_SCRIPT) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + switch (*cc) + { + case PT_SCX: + if (cc[-1] == XCL_NOTPROP) + break; + /* Fall through */ + + case PT_SC: + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) + { + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; + } + else + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; + } + } + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + } + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SCX) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + + jump = NULL; + if (cc[-1] == XCL_NOTPROP) + { + jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); + if (invertcmp) + { + add_jump(compiler, backtracks, jump); + jump = NULL; + } + invertcmp ^= 0x1; + } + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f))); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + + if (jump != NULL) + JUMPHERE(jump); + } + cc += 2; + } + } + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + cc = ccbegin; + } + + if (unicode_status & XCLASS_SAVE_CHAR) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + typereg = RETURN_ADDR; + + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); + + if (category_list > 0) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* Generating code. */ +charoffset = 0; +numberofcmps = 0; + +while (*cc != XCL_END) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + jump = NULL; + + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + + if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + numberofcmps++; + } + else if (numberofcmps > 0) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + numberofcmps = 0; + } + else + { + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + numberofcmps = 0; + } + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + SET_CHAR_OFFSET(c); + GETCHARINCTEST(c, cc); + + if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + numberofcmps++; + } + else if (numberofcmps > 0) + { + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + numberofcmps = 0; + } + else + { + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + numberofcmps = 0; + } + } +#ifdef SUPPORT_UNICODE + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + if (*cc == XCL_NOTPROP) + invertcmp ^= 0x1; + cc++; + switch(*cc) + { + case PT_ANY: + case PT_LAMP: + case PT_GC: + case PT_PC: + case PT_SC: + case PT_SCX: + case PT_BOOL: + case PT_BIDICL: + case PT_WORD: + case PT_ALNUM: + compares++; + /* Already handled. */ + break; + + case PT_SPACE: + case PT_PXSPACE: + SET_CHAR_OFFSET(9); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_CLIST: + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + + /* At least three characters are required. + Otherwise this case would be handled by the normal code path. */ + SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); + SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); + + /* Optimizing character pairs, if their difference is power of 2. */ + if (is_powerof2(other_cases[1] ^ other_cases[0])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + other_cases += 2; + } + else if (is_powerof2(other_cases[2] ^ other_cases[1])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + + other_cases += 3; + } + else + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + } + + while (*other_cases != NOTACHAR) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + } + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_UCNC: + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + SET_CHAR_OFFSET(0xa0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + SET_CHAR_OFFSET(0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXGRAPH: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + + c = charoffset; + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPRINT: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + + c = charoffset; + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPUNCT: + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); + + SET_CHAR_OFFSET(0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); + OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXXDIGIT: + SET_CHAR_OFFSET(CHAR_A); + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(CHAR_0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff21); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff41); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + + JUMPHERE(jump); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + cc += 2; + } +#endif /* SUPPORT_UNICODE */ + + if (jump != NULL) + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + +SLJIT_ASSERT(compares == 0); +if (found != NULL) + set_jumps(found, LABEL()); +} + +#undef SET_TYPE_OFFSET +#undef SET_CHAR_OFFSET + +#endif + +static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +struct sljit_jump *jump[4]; + +switch(type) + { + case OP_SOD: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + return cc; + + case OP_SOM: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + return cc; + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL)); +#ifdef SUPPORT_UNICODE + if (common->invalid_utf) + { + add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + return cc; + } +#endif /* SUPPORT_UNICODE */ + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_EODN: + /* Requires rather complex checks. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else if (common->nltype == NLTYPE_FIXED) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } + else + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); + jump[2] = JUMP(SLJIT_GREATER); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[1]); + if (common->nltype == NLTYPE_ANYCRLF) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + } + JUMPHERE(jump[2]); + JUMPHERE(jump[3]); + } + JUMPHERE(jump[0]); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); + return cc; + + case OP_EOD: + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); + return cc; + + case OP_DOLL: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + + if (!common->endonly) + compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } + return cc; + + case OP_DOLLM: + jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + check_partial(common, FALSE); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char(common, common->nlmax, TMP3, 0, NULL); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_CIRC: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + } + return cc; + + case OP_CIRCM: + /* TMP2 might be used by peek_char_back. */ + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (!common->alt_circumflex) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char_back(common, common->nlmax, backtracks); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + } +SLJIT_UNREACHABLE(); +return cc; +} + +#ifdef SUPPORT_UNICODE + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and those below it, must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC(c, cc); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + BACKCHAR(bptr); + GETCHAR(c, bptr); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function, and the one below it, must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +BOOL was_ep_ZWJ = FALSE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC_INVALID(c, cc, end_subject, break); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + GETCHARBACK_INVALID(c, bptr, start_subject, break); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +/* The code in this function copies the logic of the interpreter function that +is defined in the pcre2_extuni.c source. If that code is updated, this +function must be kept in step (note by PH, June 2024). */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR bptr; +uint32_t c; +BOOL was_ep_ZWJ = FALSE; + +/* Patch by PH */ +/* GETCHARINC(c, cc); */ +c = *cc++; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (c >= 0x110000) + return cc; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ +lgb = UCD_GRAPHBREAK(c); + +while (cc < end_subject) + { + c = *cc; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + rgb = UCD_GRAPHBREAK(c); + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was + preceded by Extended Pictographic. */ + + if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) + { + ricount = 0; + bptr = cc - 1; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + c = *bptr; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in + between; see next statement). */ + + was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ); + + /* If Extend follows Extended_Pictographic, do not update lgb; this allows + any number of them before a following ZWJ. */ + + if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + cc++; + } + +return cc; +} + +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr) +{ +DEFINE_COMPILER; +int length; +unsigned int c, oc, bit; +compare_context context; +struct sljit_jump *jump[3]; +jump_list *end_list; +#ifdef SUPPORT_UNICODE +PCRE2_UCHAR propdata[5]; +#endif /* SUPPORT_UNICODE */ + +switch(type) + { + case OP_NOT_DIGIT: + case OP_DIGIT: + /* Digits are usually 0-9, so it is worth to optimize them. */ + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_DIGIT); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_DIGIT); + /* Flip the starting bit in the negative case. */ + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_ANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[0]); + } + else + check_newlinechar(common, common->nltype, backtracks, TRUE); + return cc; + + case OP_ALLANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#ifdef SUPPORT_UNICODE + if (common->utf && common->invalid_utf) + { + read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); + return cc; + } +#endif /* SUPPORT_UNICODE */ + + skip_valid_char(common); + return cc; + + case OP_ANYBYTE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_NOTPROP: + case OP_PROP: + propdata[0] = XCL_HASPROP; + propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; + propdata[2] = cc[0]; + propdata[3] = cc[1]; + propdata[4] = XCL_END; + if (check_str_ptr) + detect_partial_match(common, backtracks); + compile_xclass_matchingpath(common, propdata, backtracks); + return cc + 2; +#endif + + case OP_ANYNL: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0); + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + /* We don't need to handle soft partial matching case. */ + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump[2] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[0]); + check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[1]); + JUMPHERE(jump[2]); + return cc; + + case OP_NOT_HSPACE: + case OP_HSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_HSPACE) + read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0x9, 0x3000, NULL, 0); + + add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_NOT_VSPACE: + case OP_VSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_VSPACE) + read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0xa, 0x2029, NULL, 0); + + add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_EXTUNI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#endif + + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + + if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0); + /* Since we successfully read a char above, partial matching must occure. */ + check_partial(common, TRUE); + JUMPHERE(jump[0]); + } + return cc; +#endif + + case OP_CHAR: + case OP_CHARI: + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); +#endif + + if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE) + detect_partial_match(common, backtracks); + + if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.length = IN_UCHARS(length); + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); + } + +#ifdef SUPPORT_UNICODE + if (common->utf) + { + GETCHAR(c, cc); + } + else +#endif + c = *cc; + + SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc)); + + if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0); + + SLJIT_ASSERT(!is_powerof2(c ^ oc)); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + { + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); + SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + JUMPHERE(jump[0]); + } + return cc + length; + + case OP_NOT: + case OP_NOTI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + c = *cc; + if (c < 128 && !common->invalid_utf) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + if (type == OP_NOT || !char_has_othercase(common, cc)) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + else + { + /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + } + /* Skip the variable-length character. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(jump[0]); + return cc + 1; + } + else +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + { + GETCHARLEN(c, cc, length); + } + } + else +#endif /* SUPPORT_UNICODE */ + c = *cc; + + if (type == OP_NOT || !char_has_othercase(common, cc)) + { + read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR); + bit = c ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + } + else + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + } + } + return cc + length; + + case OP_CLASS: + case OP_NCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; + if (type == OP_NCLASS) + read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, bit, NULL, 0); +#else + if (type == OP_NCLASS) + read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, 255, NULL, 0); +#endif + + if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + jump[0] = NULL; + if (common->utf) + { + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } + } +#elif PCRE2_CODE_UNIT_WIDTH != 8 + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + if (jump[0] != NULL) + JUMPHERE(jump[0]); +#endif + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + case OP_XCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); + return cc + GET(cc, 0) - 1; +#endif + } +SLJIT_UNREACHABLE(); +return cc; +} + +static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks) +{ +/* This function consumes at least one input character. */ +/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ +DEFINE_COMPILER; +PCRE2_SPTR ccbegin = cc; +compare_context context; +int size; + +context.length = 0; +do + { + if (cc >= ccend) + break; + + if (*cc == OP_CHAR) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); +#endif + } + else if (*cc == OP_CHARI) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + else if (HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); + } + else +#endif + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + } + else + size = 0; + + cc += 1 + size; + context.length += IN_UCHARS(size); + } +while (size > 0 && context.length <= 128); + +cc = ccbegin; +if (context.length > 0) + { + /* We have a fixed-length byte sequence. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); + return cc; + } + +/* A non-fixed length character will be checked if length == 0. */ +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); +} + +/* Forward definitions. */ +static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *); +static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); + +#define PUSH_BACKTRACK(size, ccstart, error) \ + do \ + { \ + backtrack = sljit_alloc_memory(compiler, (size)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return error; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ + } \ + while (0) + +#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ + do \ + { \ + backtrack = sljit_alloc_memory(compiler, (size)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ + } \ + while (0) + +#define BACKTRACK_AS(type) ((type *)backtrack) + +static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +/* The OVECTOR offset goes to TMP2. */ +DEFINE_COMPILER; +int count = GET2(cc, 1 + IMM2_SIZE); +PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size; +unsigned int offset; +jump_list *found = NULL; + +SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + +count--; +while (count-- > 0) + { + offset = GET2(slot, 0) << 1; + GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); + add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + slot += common->name_entry_size; + } + +offset = GET2(slot, 0) << 1; +GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); +if (backtracks != NULL && !common->unset_backref) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + +set_jumps(found, LABEL()); +} + +static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +int offset = 0; +struct sljit_jump *jump = NULL; +struct sljit_jump *partial; +struct sljit_jump *nopartial; +#if defined SUPPORT_UNICODE +struct sljit_label *loop; +struct sljit_label *caseless_loop; +jump_list *no_match = NULL; +int source_reg = COUNT_MATCH; +int source_end_reg = ARGUMENTS; +int char1_reg = STACK_LIMIT; +#endif /* SUPPORT_UNICODE */ + +if (ref) + { + offset = GET2(cc, 1) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + /* OVECTOR(1) contains the "string begin - 1" constant. */ + if (withchecks && !common->unset_backref) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + } +else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + +#if defined SUPPORT_UNICODE +if (common->utf && *cc == OP_REFI) + { + SLJIT_ASSERT(common->iref_ptr != 0); + + if (ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + + if (withchecks && emptyfail) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0)); + + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0); + + OP1(SLJIT_MOV, source_reg, 0, TMP1, 0); + OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0); + + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0); + partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + + /* Read original character. It must be a valid UTF character. */ + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0); + + read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF); + + OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0); + + /* Read second character. */ + read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR); + + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); + + add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records)); + + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case)); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets)); + + caseless_loop = LABEL(); + OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0); + JUMPTO(SLJIT_EQUAL, loop); + JUMPTO(SLJIT_LESS, caseless_loop); + + set_jumps(no_match, LABEL()); + if (common->mode == PCRE2_JIT_COMPLETE) + JUMPHERE(partial); + + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } + + JUMPHERE(jump); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + return; + } +else +#endif /* SUPPORT_UNICODE */ + { + if (ref) + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); + else + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); + + if (withchecks) + jump = JUMP(SLJIT_ZERO); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, partial); + + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + nopartial = JUMP(SLJIT_JUMP); + JUMPHERE(partial); + /* TMP2 -= STR_END - STR_PTR */ + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); + partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + JUMPHERE(partial); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } + } + +if (jump != NULL) + { + if (emptyfail) + add_jump(compiler, backtracks, jump); + else + JUMPHERE(jump); + } +} + +static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +backtrack_common *backtrack; +PCRE2_UCHAR type; +int offset = 0; +struct sljit_label *label; +struct sljit_jump *zerolength; +struct sljit_jump *jump = NULL; +PCRE2_SPTR ccbegin = cc; +int min = 0, max = 0; +BOOL minimize; + +PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL); + +if (ref) + offset = GET2(cc, 1) << 1; +else + cc += IMM2_SIZE; +type = cc[1 + IMM2_SIZE]; + +SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); +minimize = (type & 0x1) != 0; +switch(type) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + min = 0; + max = 0; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRPLUS: + case OP_CRMINPLUS: + min = 1; + max = 0; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRQUERY: + case OP_CRMINQUERY: + min = 0; + max = 1; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRRANGE: + case OP_CRMINRANGE: + min = GET2(cc, 1 + IMM2_SIZE + 1); + max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); + cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; + break; + default: + SLJIT_UNREACHABLE(); + break; + } + +if (!minimize) + { + if (min == 0) + { + allocate_stack(common, 2); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + /* Temporary release of STR_PTR. */ + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Restore if not zero length. */ + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + } + else + { + allocate_stack(common, 1); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + + if (ref) + { + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } + + if (min > 1 || max > 1) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); + + label = LABEL(); + if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE); + + if (min > 1 || max > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + if (min > 1) + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); + if (max > 1) + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + JUMPHERE(jump); + } + } + + if (max == 0) + { + /* Includes min > 1 case as well. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + } + + JUMPHERE(zerolength); + BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); + + count_match(common); + return cc; + } + +allocate_stack(common, ref ? 2 : 3); +if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); +if (type != OP_CRMINSTAR) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + +if (min == 0) + { + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Length is non-zero, we can match real repeats. */ + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + jump = JUMP(SLJIT_JUMP); + } +else + { + if (ref) + { + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } + +BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); +if (max > 0) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + +if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + +if (min > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath); + } +else if (max > 0) + OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + +if (jump != NULL) + JUMPHERE(jump); +JUMPHERE(zerolength); + +count_match(common); +return cc; +} + +static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +recurse_entry *entry = common->entries; +recurse_entry *prev = NULL; +sljit_sw start = GET(cc, 1); +PCRE2_SPTR start_cc; +BOOL needs_control_head; + +PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); + +/* Inlining simple patterns. */ +if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) + { + start_cc = common->start + start; + compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); + BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; + return cc + 1 + LINK_SIZE; + } + +while (entry != NULL) + { + if (entry->start == start) + break; + prev = entry; + entry = entry->next; + } + +if (entry == NULL) + { + entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + entry->next = NULL; + entry->entry_label = NULL; + entry->backtrack_label = NULL; + entry->entry_calls = NULL; + entry->backtrack_calls = NULL; + entry->start = start; + + if (prev != NULL) + prev->next = entry; + else + common->entries = entry; + } + +BACKTRACK_AS(recurse_backtrack)->entry = entry; + +if (entry->entry_label == NULL) + add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL)); +else + JUMPTO(SLJIT_FAST_CALL, entry->entry_label); +/* Leave if the match is failed. */ +add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); +return cc + 1 + LINK_SIZE; +} + +static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +{ +PCRE2_SPTR begin; +PCRE2_SIZE *ovector; +sljit_u32 oveccount, capture_top; + +if (arguments->callout == NULL) + return 0; + +SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size); + +begin = arguments->begin; +ovector = (PCRE2_SIZE*)(callout_block + 1); +oveccount = callout_block->capture_top; + +SLJIT_ASSERT(oveccount >= 1); + +callout_block->version = 2; +callout_block->callout_flags = 0; + +/* Offsets in subject. */ +callout_block->subject_length = arguments->end - arguments->begin; +callout_block->start_match = jit_ovector[0] - begin; +callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin; +callout_block->subject = begin; + +/* Convert and copy the JIT offset vector to the ovector array. */ +callout_block->capture_top = 1; +callout_block->offset_vector = ovector; + +ovector[0] = PCRE2_UNSET; +ovector[1] = PCRE2_UNSET; +ovector += 2; +jit_ovector += 2; +capture_top = 1; + +/* Convert pointers to sizes. */ +while (--oveccount != 0) + { + capture_top++; + + ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin); + ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin); + + if (ovector[0] != PCRE2_UNSET) + callout_block->capture_top = capture_top; + + ovector += 2; + jit_ovector += 2; + } + +return (arguments->callout)(callout_block, arguments->callout_data); +} + +#define CALLOUT_ARG_OFFSET(arg) \ + SLJIT_OFFSETOF(pcre2_callout_block, arg) + +static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +sljit_s32 mov_opcode; +unsigned int callout_length = (*cc == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE); +sljit_sw value1; +sljit_sw value2; +sljit_sw value3; +sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw); + +allocate_stack(common, callout_arg_size); + +SLJIT_ASSERT(common->capture_last_ptr != 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0; +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1); + +/* These pointer sized fields temporarly stores internal variables. */ +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); + +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1)); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE)); + +if (*cc == OP_CALLOUT) + { + value1 = 0; + value2 = 0; + value3 = 0; + } +else + { + value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1); + value2 = (callout_length - (1 + 4*LINK_SIZE + 2)); + value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE)); + } + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +/* Needed to save important temporary registers. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +/* SLJIT_R0 = arguments */ +OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); +GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +free_stack(common, callout_arg_size); + +/* Check return value. */ +OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER)); +if (common->abort_label == NULL) + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); +else + JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label); +return cc + callout_length; +} + +#undef CALLOUT_ARG_SIZE +#undef CALLOUT_ARG_OFFSET + +static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack = NULL; +jump_list **reverse_failed; +unsigned int lmin, lmax; +#ifdef SUPPORT_UNICODE +struct sljit_jump *jump; +struct sljit_label *label; +#endif + +SLJIT_ASSERT(parent->top == NULL); + +if (*cc == OP_REVERSE) + { + reverse_failed = &parent->own_backtracks; + lmin = GET2(cc, 1); + lmax = lmin; + cc += 1 + IMM2_SIZE; + + SLJIT_ASSERT(lmin > 0); + } +else + { + SLJIT_ASSERT(*cc == OP_VREVERSE); + PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL); + + reverse_failed = &backtrack->own_backtracks; + lmin = GET2(cc, 1); + lmax = GET2(cc, 1 + IMM2_SIZE); + cc += 1 + 2 * IMM2_SIZE; + + SLJIT_ASSERT(lmin < lmax); + } + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +#ifdef SUPPORT_UNICODE +if (common->utf) + { + if (lmin > 0) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin); + label = LABEL(); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin); + label = LABEL(); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } +else +#endif + { + if (lmin > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin)); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0)); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); + + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } + +check_start_used_ptr(common); + +if (lmin < lmax) + BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL(); + +return cc; +} + +static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc) +{ +while (TRUE) + { + switch (*cc) + { + case OP_CALLOUT_STR: + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CALLOUT: + case OP_ALT: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc += PRIV(OP_lengths)[*cc]; + break; + + case OP_KET: + return FALSE; + + default: + return TRUE; + } + } +} + +static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional) +{ +DEFINE_COMPILER; +int framesize; +int extrasize; +BOOL local_quit_available = FALSE; +BOOL needs_control_head; +BOOL end_block_size = 0; +BOOL has_vreverse; +int private_data_ptr; +backtrack_common altbacktrack; +PCRE2_SPTR ccbegin; +PCRE2_UCHAR opcode; +PCRE2_UCHAR bra = OP_BRA; +jump_list *tmp = NULL; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks; +jump_list **found; +/* Saving previous accept variables. */ +BOOL save_local_quit_available = common->local_quit_available; +BOOL save_in_positive_assertion = common->in_positive_assertion; +then_trap_backtrack *save_then_trap = common->then_trap; +struct sljit_label *save_quit_label = common->quit_label; +struct sljit_label *save_accept_label = common->accept_label; +jump_list *save_quit = common->quit; +jump_list *save_positive_assertion_quit = common->positive_assertion_quit; +jump_list *save_accept = common->accept; +struct sljit_jump *jump; +struct sljit_jump *brajump = NULL; + +/* Assert captures then. */ +common->then_trap = NULL; + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + SLJIT_ASSERT(!conditional); + bra = *cc; + cc++; + } + +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +backtrack->framesize = framesize; +backtrack->private_data_ptr = private_data_ptr; +opcode = *cc; +SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); +found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; +ccbegin = cc; +cc += GET(cc, 1); + +if (bra == OP_BRAMINZERO) + { + /* This is a braminzero backtrack path. */ + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + +if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin)) + end_block_size = 3; + +if (framesize < 0) + { + extrasize = 1; + if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) + extrasize = 0; + + extrasize += end_block_size; + + if (needs_control_head) + extrasize++; + + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + + if (extrasize > 0) + allocate_stack(common, extrasize); + + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + + if (extrasize > 0) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + + if (needs_control_head) + { + SLJIT_ASSERT(extrasize == end_block_size + 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); + } + } +else + { + extrasize = (needs_control_head ? 3 : 2) + end_block_size; + + OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); + allocate_stack(common, framesize + extrasize); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + } + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); + + init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize); + } + +if (end_block_size > 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + } + +memset(&altbacktrack, 0, sizeof(backtrack_common)); +if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) + { + /* Control verbs cannot escape from these asserts. */ + local_quit_available = TRUE; + common->local_quit_available = TRUE; + common->quit_label = NULL; + common->quit = NULL; + } + +common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK); +common->positive_assertion_quit = NULL; + +while (1) + { + common->accept_label = NULL; + common->accept = NULL; + altbacktrack.top = NULL; + altbacktrack.own_backtracks = NULL; + + if (*ccbegin == OP_ALT && extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + altbacktrack.cc = ccbegin; + ccbegin += 1 + LINK_SIZE; + + has_vreverse = (*ccbegin == OP_VREVERSE); + if (*ccbegin == OP_REVERSE || has_vreverse) + ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack); + + compile_matchingpath(common, ccbegin, cc, &altbacktrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->in_positive_assertion = save_in_positive_assertion; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assertion_quit = save_positive_assertion_quit; + common->accept = save_accept; + return NULL; + } + + if (has_vreverse) + { + SLJIT_ASSERT(altbacktrack.top != NULL); + add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + + common->accept_label = LABEL(); + if (common->accept != NULL) + set_jumps(common->accept, common->accept_label); + + /* Reset stack. */ + if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else if (extrasize > 0) + free_stack(common, extrasize); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + else + { + if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2)); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + else + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1)); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2)); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + } + } + + if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + /* We know that STR_PTR was stored on the top of the stack. */ + if (conditional) + { + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1))); + } + else if (bra == OP_BRAZERO) + { + if (framesize < 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (framesize >= 0) + { + /* For OP_BRA and OP_BRAMINZERO. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); + } + } + add_jump(compiler, found, JUMP(SLJIT_JUMP)); + + compile_backtrackingpath(common, altbacktrack.top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->in_positive_assertion = save_in_positive_assertion; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assertion_quit = save_positive_assertion_quit; + common->accept = save_accept; + return NULL; + } + set_jumps(altbacktrack.own_backtracks, LABEL()); + + if (*cc != OP_ALT) + break; + + ccbegin = cc; + cc += GET(cc, 1); + } + +if (local_quit_available) + { + SLJIT_ASSERT(common->positive_assertion_quit == NULL); + /* Makes the check less complicated below. */ + common->positive_assertion_quit = common->quit; + } + +/* None of them matched. */ +if (common->positive_assertion_quit != NULL) + { + jump = JUMP(SLJIT_JUMP); + set_jumps(common->positive_assertion_quit, LABEL()); + SLJIT_ASSERT(framesize != no_stack); + if (framesize < 0) + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); + else + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw)); + } + JUMPHERE(jump); + } + +if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1)); + +if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) + { + /* Assert is failed. */ + if ((conditional && extrasize > 0) || bra == OP_BRAZERO) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + if (framesize < 0) + { + /* The topmost item should be 0. */ + if (bra == OP_BRAZERO) + { + if (extrasize >= 2) + free_stack(common, extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (extrasize > 0) + free_stack(common, extrasize); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); + /* The topmost item should be 0. */ + if (bra == OP_BRAZERO) + { + free_stack(common, framesize + extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + jump = JUMP(SLJIT_JUMP); + if (bra != OP_BRAZERO) + add_jump(compiler, target, jump); + + /* Assert is successful. */ + set_jumps(tmp, LABEL()); + if (framesize < 0) + { + /* We know that STR_PTR was stored on the top of the stack. */ + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); + + /* Keep the STR_PTR on the top of the stack. */ + if (bra == OP_BRAZERO) + { + /* This allocation is always successful. */ + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + if (extrasize >= 2) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + else if (bra == OP_BRAMINZERO) + { + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + } + else + { + if (bra == OP_BRA) + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); + } + else + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw)); + + if (extrasize == 2 + end_block_size) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra == OP_BRAMINZERO) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + { + SLJIT_ASSERT(extrasize == 3 + end_block_size); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); + } + } + } + + if (bra == OP_BRAZERO) + { + backtrack->matchingpath = LABEL(); + SET_LABEL(jump, backtrack->matchingpath); + } + else if (bra == OP_BRAMINZERO) + { + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); + JUMPHERE(brajump); + if (framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + set_jumps(backtrack->common.own_backtracks, LABEL()); + } + } +else + { + /* AssertNot is successful. */ + if (framesize < 0) + { + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + if (bra != OP_BRA) + { + if (extrasize >= 2) + free_stack(common, extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (extrasize > 0) + free_stack(common, extrasize); + } + else + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); + /* The topmost item should be 0. */ + if (bra != OP_BRA) + { + free_stack(common, framesize + extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + + if (bra == OP_BRAZERO) + backtrack->matchingpath = LABEL(); + else if (bra == OP_BRAMINZERO) + { + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); + JUMPHERE(brajump); + } + + if (bra != OP_BRA) + { + SLJIT_ASSERT(found == &backtrack->common.own_backtracks); + set_jumps(backtrack->common.own_backtracks, LABEL()); + backtrack->common.own_backtracks = NULL; + } + } + +if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } +common->in_positive_assertion = save_in_positive_assertion; +common->then_trap = save_then_trap; +common->accept_label = save_accept_label; +common->positive_assertion_quit = save_positive_assertion_quit; +common->accept = save_accept; +return cc + 1 + LINK_SIZE; +} + +static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) +{ +DEFINE_COMPILER; +int stacksize; + +if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else + { + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) + stacksize++; + + if (stacksize > 0) + free_stack(common, stacksize); + } + + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1)); + + /* TMP2 which is set here used by OP_KETRMAX below. */ + if (ket == OP_KETRMAX) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + else if (ket == OP_KETRMIN) + { + /* Move the STR_PTR to the private_data_ptr. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + } +else + { + stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + + if (ket == OP_KETRMAX) + { + /* TMP2 which is set here used by OP_KETRMAX below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + } +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); +} + +static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) +{ +DEFINE_COMPILER; + +if (common->capture_last_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + stacksize++; + } +if (common->optimized_cbracket[offset >> 1] == 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + stacksize += 2; + } +return stacksize; +} + +static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr) +{ + if (PRIV(script_run)(ptr, endptr, FALSE)) + return endptr; + return NULL; +} + +#ifdef SUPPORT_UNICODE + +static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr) +{ + if (PRIV(script_run)(ptr, endptr, TRUE)) + return endptr; + return NULL; +} + +#endif /* SUPPORT_UNICODE */ + +static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) +{ +DEFINE_COMPILER; + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); +#ifdef SUPPORT_UNICODE +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run)); +#else +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run)); +#endif + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); +add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +} + +/* + Handling bracketed expressions is probably the most complex part. + + Stack layout naming characters: + S - Push the current STR_PTR + 0 - Push a 0 (NULL) + A - Push the current STR_PTR. Needed for restoring the STR_PTR + before the next alternative. Not pushed if there are no alternatives. + M - Any values pushed by the current alternative. Can be empty, or anything. + C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. + L - Push the previous local (pointed by localptr) to the stack + () - opional values stored on the stack + ()* - optonal, can be stored multiple times + + The following list shows the regular expression templates, their PCRE byte codes + and stack layout supported by pcre-sljit. + + (?:) OP_BRA | OP_KET A M + () OP_CBRA | OP_KET C M + (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* + OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* + (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* + OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* + ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* + OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* + ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* + OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* + (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) + (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) + ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) + ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) + (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* + OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* + (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* + OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* + ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* + OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* + ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* + OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* + + + Stack layout naming characters: + A - Push the alternative index (starting from 0) on the stack. + Not pushed if there is no alternatives. + M - Any values pushed by the current alternative. Can be empty, or anything. + + The next list shows the possible content of a bracket: + (|) OP_*BRA | OP_ALT ... M A + (?()|) OP_*COND | OP_ALT M A + (?>|) OP_ONCE | OP_ALT ... [stack trace] M A + Or nothing, if trace is unnecessary +*/ + +static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +int private_data_ptr = 0; +int offset = 0; +int i, stacksize; +int repeat_ptr = 0, repeat_length = 0; +int repeat_type = 0, repeat_count = 0; +PCRE2_SPTR ccbegin; +PCRE2_SPTR matchingpath; +PCRE2_SPTR slot; +PCRE2_UCHAR bra = OP_BRA; +PCRE2_UCHAR ket; +assert_backtrack *assert; +BOOL has_alternatives; +BOOL needs_control_head = FALSE; +BOOL has_vreverse = FALSE; +struct sljit_jump *jump; +struct sljit_jump *skip; +struct sljit_label *rmax_label = NULL; +struct sljit_jump *braminzero = NULL; + +PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + bra = *cc; + cc++; + opcode = *cc; + } + +opcode = *cc; +ccbegin = cc; +matchingpath = bracketend(cc) - 1 - LINK_SIZE; +ket = *matchingpath; +if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) + { + repeat_ptr = PRIVATE_DATA(matchingpath); + repeat_length = PRIVATE_DATA(matchingpath + 1); + repeat_type = PRIVATE_DATA(matchingpath + 2); + repeat_count = PRIVATE_DATA(matchingpath + 3); + SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } + +matchingpath = ccbegin + 1 + LINK_SIZE; +SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); +SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); +cc += GET(cc, 1); + +has_alternatives = *cc == OP_ALT; +if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) + { + SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3, + compile_time_checks_must_be_grouped_together); + has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE; + } + +if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) + opcode = OP_SCOND; + +if (opcode == OP_CBRA || opcode == OP_SCBRA) + { + /* Capturing brackets has a pre-allocated space. */ + offset = GET2(ccbegin, 1 + LINK_SIZE); + if (common->optimized_cbracket[offset] == 0) + { + private_data_ptr = OVECTOR_PRIV(offset); + offset <<= 1; + } + else + { + offset <<= 1; + private_data_ptr = OVECTOR(offset); + } + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; + matchingpath += IMM2_SIZE; + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + /* Other brackets simply allocate the next entry. */ + private_data_ptr = PRIVATE_DATA(ccbegin); + SLJIT_ASSERT(private_data_ptr != 0); + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; + if (opcode == OP_ONCE) + BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); + } + +/* Instructions before the first alternative. */ +stacksize = 0; +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) + stacksize++; +if (bra == OP_BRAZERO) + stacksize++; + +if (stacksize > 0) + allocate_stack(common, stacksize); + +stacksize = 0; +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + +if (bra == OP_BRAZERO) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + +if (bra == OP_BRAMINZERO) + { + /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (ket != OP_KETRMIN) + { + free_stack(common, 1); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + else if (opcode == OP_ONCE || opcode >= OP_SBRA) + { + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + /* Nothing stored during the first run. */ + skip = JUMP(SLJIT_JUMP); + JUMPHERE(jump); + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) + { + /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + } + else + { + /* Except when the whole stack frame must be saved. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2)); + } + JUMPHERE(skip); + } + else + { + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + JUMPHERE(jump); + } + } + +if (repeat_type != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); + if (repeat_type == OP_EXACT) + rmax_label = LABEL(); + } + +if (ket == OP_KETRMIN) + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); + +if (ket == OP_KETRMAX) + { + rmax_label = LABEL(); + if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; + } + +/* Handling capturing brackets and alternatives. */ +if (opcode == OP_ONCE) + { + stacksize = 0; + if (needs_control_head) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + stacksize++; + } + + if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) + { + /* Neither capturing brackets nor recursions are found in the block. */ + if (ket == OP_KETRMIN) + { + stacksize += 2; + if (!needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + } + else + { + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + if (ket == OP_KETRMAX || has_alternatives) + stacksize++; + } + + if (stacksize > 0) + allocate_stack(common, stacksize); + + stacksize = 0; + if (needs_control_head) + { + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + + if (ket == OP_KETRMIN) + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + } + else if (ket == OP_KETRMAX || has_alternatives) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + } + else + { + if (ket != OP_KET || has_alternatives) + stacksize++; + + stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; + allocate_stack(common, stacksize); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + } + else + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + } + init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1); + } + } +else if (opcode == OP_CBRA || opcode == OP_SCBRA) + { + /* Saving the previous values. */ + if (common->optimized_cbracket[offset >> 1] != 0) + { + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); + allocate_stack(common, 2); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + } +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 4); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + + has_vreverse = (*matchingpath == OP_VREVERSE); + if (*matchingpath == OP_REVERSE || has_vreverse) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + /* Saving the previous value. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + if (*matchingpath == OP_REVERSE) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); + } +else if (has_alternatives) + { + /* Pushing the starting string pointer. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + +/* Generating code for the first alternative. */ +if (opcode == OP_COND || opcode == OP_SCOND) + { + if (*matchingpath == OP_CREF) + { + SLJIT_ASSERT(has_alternatives); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + matchingpath += 1 + IMM2_SIZE; + } + else if (*matchingpath == OP_DNCREF) + { + SLJIT_ASSERT(has_alternatives); + + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + slot += common->name_entry_size; + i--; + while (i-- > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0); + slot += common->name_entry_size; + } + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); + matchingpath += 1 + 2 * IMM2_SIZE; + } + else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) + { + /* Never has other case. */ + BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + SLJIT_ASSERT(!has_alternatives); + + if (*matchingpath == OP_TRUE) + { + stacksize = 1; + matchingpath++; + } + else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL) + stacksize = 0; + else if (*matchingpath == OP_RREF) + { + stacksize = GET2(matchingpath, 1); + if (common->currententry == NULL) + stacksize = 0; + else if (stacksize == RREF_ANY) + stacksize = 1; + else if (common->currententry->start == 0) + stacksize = stacksize == 0; + else + stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + + if (stacksize != 0) + matchingpath += 1 + IMM2_SIZE; + } + else + { + if (common->currententry == NULL || common->currententry->start == 0) + stacksize = 0; + else + { + stacksize = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + while (stacksize > 0) + { + if ((int)GET2(slot, 0) == i) + break; + slot += common->name_entry_size; + stacksize--; + } + } + + if (stacksize != 0) + matchingpath += 1 + 2 * IMM2_SIZE; + } + + /* The stacksize == 0 is a common "else" case. */ + if (stacksize == 0) + { + if (*cc == OP_ALT) + { + matchingpath = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + } + else + matchingpath = cc; + } + } + else + { + SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); + /* Similar code as PUSH_BACKTRACK macro. */ + assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + memset(assert, 0, sizeof(assert_backtrack)); + assert->common.cc = matchingpath; + BACKTRACK_AS(bracket_backtrack)->u.assert = assert; + matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); + } + } + +compile_matchingpath(common, matchingpath, cc, backtrack); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + +switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_ONCE: + match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, backtrack); + break; + } + +stacksize = 0; +if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + stacksize++; + } +if (ket != OP_KET || bra != OP_BRA) + stacksize++; +if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } +if (has_alternatives && opcode != OP_ONCE) + stacksize++; + +if (stacksize > 0) + allocate_stack(common, stacksize); + +stacksize = 0; +if (repeat_type == OP_MINUPTO) + { + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); + stacksize++; + } + +if (ket != OP_KET || bra != OP_BRA) + { + if (ket != OP_KET) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + +if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + +/* Skip and count the other alternatives. */ +i = 1; +while (*cc == OP_ALT) + { + cc += GET(cc, 1); + i++; + } + +if (has_alternatives) + { + if (opcode != OP_ONCE) + { + if (i <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + else + BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } + if (ket != OP_KETRMAX) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + } + +/* Must be after the matchingpath label. */ +if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) + { + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + } + +if (ket == OP_KETRMAX) + { + if (repeat_type != 0) + { + if (has_alternatives) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode < OP_BRA || opcode >= OP_SBRA) + { + if (has_alternatives) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE) + { + /* This case includes opcodes such as OP_SCRIPT_RUN. */ + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (bra != OP_BRAZERO) + free_stack(common, 1); + } + else + /* TMP2 must contain the starting STR_PTR. */ + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); + } + else + JUMPTO(SLJIT_JUMP, rmax_label); + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); + } + +if (repeat_type == OP_EXACT) + { + count_match(common); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); + } +else if (repeat_type == OP_UPTO) + { + /* We need to preserve the counter. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + +if (bra == OP_BRAZERO) + BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); + +if (bra == OP_BRAMINZERO) + { + /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ + JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); + if (braminzero != NULL) + { + JUMPHERE(braminzero); + /* We need to release the end pointer to perform the + backtrack for the zero-length iteration. When + framesize is < 0, OP_ONCE will do the release itself. */ + if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + } + else if (ket == OP_KETRMIN && opcode != OP_ONCE) + free_stack(common, 1); + } + /* Continue to the normal backtrack. */ + } + +if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT)) + count_match(common); + +cc += 1 + LINK_SIZE; + +if (opcode == OP_ONCE) + { + int data; + int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize; + + SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2); + /* We temporarily encode the needs_control_head in the lowest bit. + The real value should be short enough for this operation to work + without triggering Undefined Behaviour. */ + data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0)); + BACKTRACK_AS(bracket_backtrack)->u.framesize = data; + } +return cc + repeat_length; +} + +static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +int private_data_ptr; +int cbraprivptr = 0; +BOOL needs_control_head; +int framesize; +int stacksize; +int offset = 0; +BOOL zero = FALSE; +PCRE2_SPTR ccbegin = NULL; +int stack; /* Also contains the offset of control head. */ +struct sljit_label *loop = NULL; +struct jump_list *emptymatch = NULL; + +PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); +if (*cc == OP_BRAPOSZERO) + { + zero = TRUE; + cc++; + } + +opcode = *cc; +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; +switch(opcode) + { + case OP_BRAPOS: + case OP_SBRAPOS: + ccbegin = cc + 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + offset = GET2(cc, 1 + LINK_SIZE); + /* This case cannot be optimized in the same way as + normal capturing brackets. */ + SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); + cbraprivptr = OVECTOR_PRIV(offset); + offset <<= 1; + ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; +if (framesize < 0) + { + if (offset != 0) + { + stacksize = 2; + if (common->capture_last_ptr != 0) + stacksize++; + } + else + stacksize = 1; + + if (needs_control_head) + stacksize++; + if (!zero) + stacksize++; + + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; + allocate_stack(common, stacksize); + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + + stack = 0; + if (offset != 0) + { + stack = 2; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + if (common->capture_last_ptr != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + stack = 3; + } + } + else + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + stack = 1; + } + + if (needs_control_head) + stack++; + if (!zero) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); + if (needs_control_head) + { + stack--; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); + } + } +else + { + stacksize = framesize + 1; + if (!zero) + stacksize++; + if (needs_control_head) + stacksize++; + if (offset == 0) + stacksize++; + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; + + allocate_stack(common, stacksize); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + + stack = 0; + if (!zero) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); + stack = 1; + } + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); + stack++; + } + if (offset == 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); + stack++; + } + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); + init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize); + stack -= 1 + (offset == 0); + } + +if (offset != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + +loop = LABEL(); +while (*cc != OP_KETRPOS) + { + backtrack->top = NULL; + backtrack->own_backtracks = NULL; + cc += GET(cc, 1); + + compile_matchingpath(common, ccbegin, cc, backtrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + + if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (offset != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + } + else + { + if (opcode == OP_SBRAPOS) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); + + if (!zero) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); + } + else + { + if (offset != 0) + { + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + if (opcode == OP_SBRAPOS) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0); + } + + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); + + if (!zero) + { + if (framesize < 0) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + } + + JUMPTO(SLJIT_JUMP, loop); + flush_stubs(common); + + compile_backtrackingpath(common, backtrack->top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + set_jumps(backtrack->own_backtracks, LABEL()); + + if (framesize < 0) + { + if (offset != 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + else + { + if (offset != 0) + { + /* Last alternative. */ + if (*cc == OP_KETRPOS) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); + } + } + + if (*cc == OP_KETRPOS) + break; + ccbegin = cc + 1 + LINK_SIZE; + } + +/* We don't have to restore the control head in case of a failed match. */ + +backtrack->own_backtracks = NULL; +if (!zero) + { + if (framesize < 0) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + else /* TMP2 is set to [private_data_ptr] above. */ + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); + } + +/* None of them matched. */ +set_jumps(emptymatch, LABEL()); +count_match(common); +return cc + 1 + LINK_SIZE; +} + +static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end) +{ +int class_len; + +*opcode = *cc; +*exact = 0; + +if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) + { + cc++; + *type = OP_CHAR; + } +else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) + { + cc++; + *type = OP_CHARI; + *opcode -= OP_STARI - OP_STAR; + } +else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) + { + cc++; + *type = OP_NOT; + *opcode -= OP_NOTSTAR - OP_STAR; + } +else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) + { + cc++; + *type = OP_NOTI; + *opcode -= OP_NOTSTARI - OP_STAR; + } +else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) + { + cc++; + *opcode -= OP_TYPESTAR - OP_STAR; + *type = OP_END; + } +else + { + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); + *type = *opcode; + cc++; + class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0); + *opcode = cc[class_len - 1]; + + if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) + { + *opcode -= OP_CRSTAR - OP_STAR; + *end = cc + class_len; + + if (*opcode == OP_PLUS || *opcode == OP_MINPLUS) + { + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + } + } + else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) + { + *opcode -= OP_CRPOSSTAR - OP_POSSTAR; + *end = cc + class_len; + + if (*opcode == OP_POSPLUS) + { + *exact = 1; + *opcode = OP_POSSTAR; + } + } + else + { + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *exact = GET2(cc, class_len); + + if (*max == 0) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSSTAR; + else + *opcode -= OP_CRRANGE - OP_STAR; + } + else + { + *max -= *exact; + if (*max == 0) + *opcode = OP_EXACT; + else if (*max == 1) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSQUERY; + else + *opcode -= OP_CRRANGE - OP_QUERY; + } + else + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else + *opcode -= OP_CRRANGE - OP_UPTO; + } + } + *end = cc + class_len + 2 * IMM2_SIZE; + } + return cc; + } + +switch(*opcode) + { + case OP_EXACT: + *exact = GET2(cc, 0); + cc += IMM2_SIZE; + break; + + case OP_PLUS: + case OP_MINPLUS: + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + break; + + case OP_POSPLUS: + *exact = 1; + *opcode = OP_POSSTAR; + break; + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + *max = GET2(cc, 0); + cc += IMM2_SIZE; + break; + } + +if (*type == OP_END) + { + *type = *cc; + *end = next_opcode(common, cc); + cc++; + return cc; + } + +*end = cc + 1; +#ifdef SUPPORT_UNICODE +if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); +#endif +return cc; +} + +static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +PCRE2_UCHAR type; +sljit_u32 max = 0, exact; +sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1); +sljit_s32 early_fail_type; +BOOL charpos_enabled; +PCRE2_UCHAR charpos_char; +unsigned int charpos_othercasebit; +PCRE2_SPTR end; +jump_list *no_match = NULL; +jump_list *no_char1_match = NULL; +struct sljit_jump *jump = NULL; +struct sljit_label *label; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); +int tmp_base, tmp_offset; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +BOOL use_tmp; +#endif + +PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); + +early_fail_type = (early_fail_ptr & 0x7); +early_fail_ptr >>= 3; + +/* During recursion, these optimizations are disabled. */ +if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL) + { + early_fail_ptr = 0; + early_fail_type = type_skip; + } + +SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 + || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); + +if (early_fail_type == type_fail) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); + +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); + +if (type != OP_EXTUNI) + { + tmp_base = TMP3; + tmp_offset = 0; + } +else + { + tmp_base = SLJIT_MEM1(SLJIT_SP); + tmp_offset = POSSESSIVE0; + } + +/* Handle fixed part first. */ +if (exact > 1) + { + SLJIT_ASSERT(early_fail_ptr == 0); + + if (common->mode == PCRE2_JIT_COMPLETE +#ifdef SUPPORT_UNICODE + && !common->utf +#endif + && type != OP_ANYNL && type != OP_EXTUNI) + { + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + } +else if (exact == 1) + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + +if (early_fail_type == type_fail_range) + { + /* Range end first, followed by range start. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); + } + +switch(opcode) + { + case OP_STAR: + case OP_UPTO: + SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR); + + if (type == OP_ANYNL || type == OP_EXTUNI) + { + SLJIT_ASSERT(private_data_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); + + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); + + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + if (opcode == OP_UPTO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + jump = JUMP(SLJIT_ZERO); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + } + + /* We cannot use TMP3 because of allocate_stack. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + if (jump != NULL) + JUMPHERE(jump); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } +#ifdef SUPPORT_UNICODE + else if (type == OP_ALLANY && !common->invalid_utf) +#else + else if (type == OP_ALLANY) +#endif + { + if (opcode == OP_STAR) + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset0, STR_END, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } +#ifdef SUPPORT_UNICODE + else if (!common->utf) +#else + else +#endif + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } + else + { + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } + } + + charpos_enabled = FALSE; + charpos_char = 0; + charpos_othercasebit = 0; + + if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) + { +#ifdef SUPPORT_UNICODE + charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); +#else + charpos_enabled = TRUE; +#endif + if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) + { + charpos_othercasebit = char_get_othercase_bit(common, end + 1); + if (charpos_othercasebit == 0) + charpos_enabled = FALSE; + } + + if (charpos_enabled) + { + charpos_char = end[1]; + /* Consume the OP_CHAR opcode. */ + end += 2; +#if PCRE2_CODE_UNIT_WIDTH == 8 + SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); + if ((charpos_othercasebit & 0x100) != 0) + charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; +#endif + if (charpos_othercasebit != 0) + charpos_char |= charpos_othercasebit; + + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; + } + } + + if (charpos_enabled) + { + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + + /* Search the first instance of charpos_char. */ + jump = JUMP(SLJIT_JUMP); + label = LABEL(); + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO)); + } + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + JUMPHERE(jump); + + detect_partial_match(common, &backtrack->own_backtracks); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + + /* Search the last instance of charpos_char. */ + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + detect_partial_match(common, &no_match); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + + if (opcode == OP_STAR) + { + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + } + else + { + jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + set_jumps(no_match, LABEL()); + OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR); + SLJIT_ASSERT(!use_tmp || tmp_base == TMP3); + + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); +#endif + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); +#endif + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + set_jumps(no_match, LABEL()); + if (use_tmp) + { + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); + } + else + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + } + else +#endif + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + } + + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_MINSTAR: + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + break; + + case OP_MINUPTO: + SLJIT_ASSERT(early_fail_ptr == 0); + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_QUERY: + case OP_MINQUERY: + SLJIT_ASSERT(early_fail_ptr == 0); + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (opcode == OP_QUERY) + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_EXACT: + break; + + case OP_POSSTAR: +#if defined SUPPORT_UNICODE + if (type == OP_ALLANY && !common->invalid_utf) +#else + if (type == OP_ALLANY) +#endif + { + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + break; + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (type == OP_EXTUNI || common->utf) + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match_to(common, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + if (early_fail_ptr != 0) + { + if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + } + break; + } +#endif + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + break; + + case OP_POSUPTO: + SLJIT_ASSERT(early_fail_ptr == 0); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + break; + } +#endif + + if (type == OP_ALLANY) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } + else + { + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + break; + } + + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + break; + + case OP_POSQUERY: + SLJIT_ASSERT(early_fail_ptr == 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +count_match(common); +return end; +} + +static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +if (*cc == OP_FAIL) + { + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); + return cc + 1; + } + +if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) + add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + +if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) + { + /* No need to check notempty conditions. */ + if (common->accept_label == NULL) + add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->accept_label); + return cc + 1; + } + +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); +else + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + } +else + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO)); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO)); +else + JUMPTO(SLJIT_ZERO, common->accept_label); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); +else + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); +return cc + 1; +} + +static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc) +{ +DEFINE_COMPILER; +int offset = GET2(cc, 1); +BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; + +/* Data will be discarded anyway... */ +if (common->currententry != NULL) + return cc + 1 + IMM2_SIZE; + +if (!optimized_cbracket) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); +offset <<= 1; +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); +if (!optimized_cbracket) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); +return cc + 1 + IMM2_SIZE; +} + +static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode = *cc; +PCRE2_SPTR ccend = cc + 1; + +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || + opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) + ccend += 2 + cc[1]; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +if (opcode == OP_SKIP) + { + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + return ccend; + } + +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) + { + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + } + +return ccend; +} + +static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP }; + +static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL needs_control_head; +int size; + +PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); +common->then_trap = BACKTRACK_AS(then_trap_backtrack); +BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; +BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); +BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); +allocate_stack(common, size); +if (size > 3) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); +else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +if (size >= 0) + init_frame(common, cc, ccend, size - 1, 0); +} + +static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL has_then_trap = FALSE; +then_trap_backtrack *save_then_trap = NULL; + +SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); + +if (common->has_then && common->then_offsets[cc - common->start] != 0) + { + SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); + has_then_trap = TRUE; + save_then_trap = common->then_trap; + /* Tail item on backtrack. */ + compile_then_trap_matchingpath(common, cc, ccend, parent); + } + +while (cc < ccend) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_DOLL: + case OP_DOLLM: + case OP_CIRC: + case OP_CIRCM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); + break; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_NOT: + case OP_NOTI: + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); + break; + + case OP_SET_SOM: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + cc++; + break; + + case OP_CHAR: + case OP_CHARI: + if (common->mode == PCRE2_JIT_COMPLETE) + cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); + break; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + cc = compile_iterator_matchingpath(common, cc, parent); + break; + + case OP_CLASS: + case OP_NCLASS: + if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + case OP_XCLASS: + if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); + break; +#endif + + case OP_REF: + case OP_REFI: + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); + cc += 1 + IMM2_SIZE; + } + break; + + case OP_DNREF: + case OP_DNREFI: + if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); + cc += 1 + 2 * IMM2_SIZE; + } + break; + + case OP_RECURSE: + cc = compile_recurse_matchingpath(common, cc, parent); + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + cc = compile_callout_matchingpath(common, cc, parent); + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); + break; + + case OP_BRAMINZERO: + PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); + cc = bracketend(cc + 1); + if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) + { + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + else + { + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); + } + BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); + count_match(common); + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_CBRA: + case OP_COND: + case OP_SBRA: + case OP_SCBRA: + case OP_SCOND: + cc = compile_bracket_matchingpath(common, cc, parent); + break; + + case OP_BRAZERO: + if (cc[1] > OP_ASSERTBACK_NOT) + cc = compile_bracket_matchingpath(common, cc, parent); + else + { + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); + } + break; + + case OP_BRAPOS: + case OP_CBRAPOS: + case OP_SBRAPOS: + case OP_SCBRAPOS: + case OP_BRAPOSZERO: + cc = compile_bracketpos_matchingpath(common, cc, parent); + break; + + case OP_MARK: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + SLJIT_ASSERT(common->mark_ptr != 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + allocate_stack(common, common->has_skip_arg ? 5 : 1); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + if (common->has_skip_arg) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + case OP_THEN: + case OP_THEN_ARG: + case OP_COMMIT: + case OP_COMMIT_ARG: + cc = compile_control_verb_matchingpath(common, cc, parent); + break; + + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + cc = compile_fail_accept_matchingpath(common, cc, parent); + break; + + case OP_CLOSE: + cc = compile_close_matchingpath(common, cc); + break; + + case OP_SKIPZERO: + cc = bracketend(cc + 1); + break; + + default: + SLJIT_UNREACHABLE(); + return; + } + if (cc == NULL) + return; + } + +if (has_then_trap) + { + /* Head item on backtrack. */ + PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); + BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; + BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; + common->then_trap = save_then_trap; + } +SLJIT_ASSERT(cc == ccend); +} + +#undef PUSH_BACKTRACK +#undef PUSH_BACKTRACK_NOVALUE +#undef BACKTRACK_AS + +#define COMPILE_BACKTRACKINGPATH(current) \ + do \ + { \ + compile_backtrackingpath(common, (current)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return; \ + } \ + while (0) + +#define CURRENT_AS(type) ((type *)current) + +static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +PCRE2_UCHAR opcode; +PCRE2_UCHAR type; +sljit_u32 max = 0, exact; +struct sljit_label *label = NULL; +struct sljit_jump *jump = NULL; +jump_list *jumplist = NULL; +PCRE2_SPTR end; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); + +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); + +switch(opcode) + { + case OP_STAR: + case OP_UPTO: + if (type == OP_ANYNL || type == OP_EXTUNI) + { + SLJIT_ASSERT(private_data_ptr == 0); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + } + else + { + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) + { + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + label = LABEL(); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + move_back(common, NULL, TRUE); + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); + } + else + { + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + move_back(common, NULL, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + } + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 2); + } + break; + + case OP_MINSTAR: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_MINUPTO: + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); + + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 2); + break; + + case OP_QUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + jump = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_MINQUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_EXACT: + case OP_POSSTAR: + case OP_POSQUERY: + case OP_POSUPTO: + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +set_jumps(current->own_backtracks, LABEL()); +} + +static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +PCRE2_UCHAR type; + +type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; + +if ((type & 0x1) == 0) + { + /* Maximize case. */ + set_jumps(current->own_backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); + return; + } + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); +set_jumps(current->own_backtracks, LABEL()); +free_stack(common, ref ? 2 : 3); +} + +static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +recurse_entry *entry; + +if (!CURRENT_AS(recurse_backtrack)->inlined_pattern) + { + entry = CURRENT_AS(recurse_backtrack)->entry; + if (entry->backtrack_label == NULL) + add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL)); + else + JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath); + } +else + compile_backtrackingpath(common, current->top); + +set_jumps(current->own_backtracks, LABEL()); +} + +static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +PCRE2_UCHAR bra = OP_BRA; +struct sljit_jump *brajump = NULL; + +SLJIT_ASSERT(*cc != OP_BRAMINZERO); +if (*cc == OP_BRAZERO) + { + bra = *cc; + cc++; + } + +if (bra == OP_BRAZERO) + { + SLJIT_ASSERT(current->own_backtracks == NULL); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + +if (CURRENT_AS(assert_backtrack)->framesize < 0) + { + set_jumps(current->own_backtracks, LABEL()); + + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + free_stack(common, 1); + } + return; + } + +if (bra == OP_BRAZERO) + { + if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + free_stack(common, 1); + return; + } + free_stack(common, 1); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + +if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0); + + set_jumps(current->own_backtracks, LABEL()); + } +else + set_jumps(current->own_backtracks, LABEL()); + +if (bra == OP_BRAZERO) + { + /* We know there is enough place on the stack. */ + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); + JUMPHERE(brajump); + } +} + +static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +int opcode, stacksize, alt_count, alt_max; +int offset = 0; +int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; +int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; +PCRE2_SPTR cc = current->cc; +PCRE2_SPTR ccbegin; +PCRE2_SPTR ccprev; +PCRE2_UCHAR bra = OP_BRA; +PCRE2_UCHAR ket; +assert_backtrack *assert; +BOOL has_alternatives; +BOOL needs_control_head = FALSE; +BOOL has_vreverse; +struct sljit_jump *brazero = NULL; +struct sljit_jump *next_alt = NULL; +struct sljit_jump *once = NULL; +struct sljit_jump *cond = NULL; +struct sljit_label *rmin_label = NULL; +struct sljit_label *exact_label = NULL; +struct sljit_jump *mov_addr = NULL; + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + bra = *cc; + cc++; + } + +opcode = *cc; +ccbegin = bracketend(cc) - 1 - LINK_SIZE; +ket = *ccbegin; +if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) + { + repeat_ptr = PRIVATE_DATA(ccbegin); + repeat_type = PRIVATE_DATA(ccbegin + 2); + repeat_count = PRIVATE_DATA(ccbegin + 3); + SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } +ccbegin = cc; +cc += GET(cc, 1); +has_alternatives = *cc == OP_ALT; +if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; +if (opcode == OP_CBRA || opcode == OP_SCBRA) + offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; +if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) + opcode = OP_SCOND; + +alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; + +/* Decoding the needs_control_head in framesize. */ +if (opcode == OP_ONCE) + { + needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; + CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; + } + +if (ket != OP_KET && repeat_type != 0) + { + /* TMP1 is used in OP_KETRMIN below. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + if (repeat_type == OP_UPTO) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); + else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); + } + +if (ket == OP_KETRMAX) + { + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + } +else if (ket == OP_KETRMIN) + { + if (bra != OP_BRAMINZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (repeat_type != 0) + { + /* TMP1 was set a few lines above. */ + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + /* Drop STR_PTR for non-greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode >= OP_SBRA || opcode == OP_ONCE) + { + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + } + /* Drop STR_PTR for non-greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + } + rmin_label = LABEL(); + if (repeat_type != 0) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + } +else if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } +else if (repeat_type == OP_EXACT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + exact_label = LABEL(); + } + +if (offset != 0) + { + if (common->capture_last_ptr != 0) + { + SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + free_stack(common, 3); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); + } + else if (common->optimized_cbracket[offset >> 1] == 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + } + } + +if (SLJIT_UNLIKELY(opcode == OP_ONCE)) + { + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + } + once = JUMP(SLJIT_JUMP); + } +else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + { + if (has_alternatives) + { + /* Always exactly one alternative. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + + alt_max = 2; + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + } +else if (has_alternatives) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + + if (alt_max > 3) + { + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr); + sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + +COMPILE_BACKTRACKINGPATH(current->top); +if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); + +if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + { + /* Conditional block always has at most one alternative. */ + if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) + { + SLJIT_ASSERT(has_alternatives); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } + cond = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); + } + else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) + { + SLJIT_ASSERT(has_alternatives); + cond = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); + } + else + SLJIT_ASSERT(!has_alternatives); + } + +if (has_alternatives) + { + alt_count = 1; + do + { + current->top = NULL; + current->own_backtracks = NULL; + current->simple_backtracks = NULL; + /* Conditional blocks always have an additional alternative, even if it is empty. */ + if (*cc == OP_ALT) + { + ccprev = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + + has_vreverse = FALSE; + if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA) + { + SLJIT_ASSERT(private_data_ptr != 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + has_vreverse = (*ccprev == OP_VREVERSE); + if (*ccprev == OP_REVERSE || has_vreverse) + ccprev = compile_reverse_matchingpath(common, ccprev, current); + } + else if (opcode != OP_COND && opcode != OP_SCOND) + { + if (opcode != OP_ONCE) + { + if (private_data_ptr != 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + } + + compile_matchingpath(common, ccprev, cc, current); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + + switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, current); + break; + } + } + + /* Instructions after the current alternative is successfully matched. */ + /* There is a similar code in compile_bracket_matchingpath. */ + if (opcode == OP_ONCE) + match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + + stacksize = 0; + if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + stacksize++; + } + if (ket != OP_KET || bra != OP_BRA) + stacksize++; + if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } + if (opcode != OP_ONCE) + stacksize++; + + if (stacksize > 0) + allocate_stack(common, stacksize); + + stacksize = 0; + if (repeat_type == OP_MINUPTO) + { + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); + stacksize++; + } + + if (ket != OP_KET || bra != OP_BRA) + { + if (ket != OP_KET) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + + if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + + if (opcode != OP_ONCE) + { + if (alt_max <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); + else + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } + + if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) + { + /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + } + + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); + + if (opcode != OP_ONCE) + { + if (alt_max <= 3) + { + JUMPHERE(next_alt); + alt_count++; + if (alt_count < alt_max) + { + SLJIT_ASSERT(alt_count == 2 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); + } + } + else + { + sljit_set_label(mov_addr, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + } + + COMPILE_BACKTRACKINGPATH(current->top); + if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); + SLJIT_ASSERT(!current->simple_backtracks); + } + while (*cc == OP_ALT); + + if (cond != NULL) + { + SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } + JUMPHERE(cond); + } + + /* Free the STR_PTR. */ + if (private_data_ptr == 0) + free_stack(common, 1); + } + +if (offset != 0) + { + /* Using both tmp register is better for instruction scheduling. */ + if (common->optimized_cbracket[offset >> 1] != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + } +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0); + free_stack(common, 4); + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + } +else if (opcode == OP_ONCE) + { + cc = ccbegin + GET(ccbegin, 1); + stacksize = needs_control_head ? 1 : 0; + + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + { + /* Reset head and drop saved frame. */ + stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); + } + else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) + { + /* The STR_PTR must be released. */ + stacksize++; + } + + if (stacksize > 0) + free_stack(common, stacksize); + + JUMPHERE(once); + /* Restore previous private_data_ptr */ + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1)); + else if (ket == OP_KETRMIN) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + /* See the comment below. */ + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + } + +if (repeat_type == OP_EXACT) + { + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); + CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); + } +else if (ket == OP_KETRMAX) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra != OP_BRAZERO) + free_stack(common, 1); + + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); + JUMPHERE(brazero); + free_stack(common, 1); + } + } +else if (ket == OP_KETRMIN) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + /* OP_ONCE removes everything in case of a backtrack, so we don't + need to explicitly release the STR_PTR. The extra release would + affect badly the free_stack(2) above. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); + if (opcode == OP_ONCE) + free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); + else if (bra == OP_BRAMINZERO) + free_stack(common, 1); + } +else if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); + JUMPHERE(brazero); + } +} + +static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +int offset; +struct sljit_jump *jump; +PCRE2_SPTR cc; + +/* No retry on backtrack, just drop everything. */ +if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) + { + cc = current->cc; + + if (*cc == OP_BRAPOSZERO) + cc++; + + if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS) + { + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); + } + set_jumps(current->own_backtracks, LABEL()); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); + return; + } + +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); +add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw)); + +if (current->own_backtracks) + { + jump = JUMP(SLJIT_JUMP); + set_jumps(current->own_backtracks, LABEL()); + /* Drop the stack frame. */ + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); + JUMPHERE(jump); + } +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1)); +} + +static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +assert_backtrack backtrack; + +current->top = NULL; +current->own_backtracks = NULL; +current->simple_backtracks = NULL; +if (current->cc[1] > OP_ASSERTBACK_NOT) + { + /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ + compile_bracket_matchingpath(common, current->cc, current); + compile_bracket_backtrackingpath(common, current->top); + } +else + { + memset(&backtrack, 0, sizeof(backtrack)); + backtrack.common.cc = current->cc; + backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; + /* Manual call of compile_assert_matchingpath. */ + compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); + } +SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks); +} + +static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_UCHAR opcode = *current->cc; +struct sljit_label *loop; +struct sljit_jump *jump; + +if (opcode == OP_THEN || opcode == OP_THEN_ARG) + { + if (common->then_trap != NULL) + { + SLJIT_ASSERT(common->control_head_ptr != 0); + + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); + jump = JUMP(SLJIT_JUMP); + + loop = LABEL(); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + JUMPHERE(jump); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop); + add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); + return; + } + else if (!common->local_quit_available && common->in_positive_assertion) + { + add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP)); + return; + } + } + +if (common->local_quit_available) + { + /* Abort match with a fail. */ + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + return; + } + +if (opcode == OP_SKIP_ARG) + { + SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); + + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + return; + } + +if (opcode == OP_SKIP) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); +add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); +} + +static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3)); +skip_valid_char(common); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); +JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath); + +label = LABEL(); +sljit_set_label(jump, label); +set_jumps(current->own_backtracks, label); +} + +static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +int size; + +if (CURRENT_AS(then_trap_backtrack)->then_trap) + { + common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; + return; + } + +size = CURRENT_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); +free_stack(common, size); +jump = JUMP(SLJIT_JUMP); + +set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); +/* STACK_TOP is set by THEN. */ +if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) + { + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw)); + } +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +free_stack(common, 3); + +JUMPHERE(jump); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); +} + +static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +then_trap_backtrack *save_then_trap = common->then_trap; + +while (current) + { + if (current->simple_backtracks != NULL) + set_jumps(current->simple_backtracks, LABEL()); + switch(*current->cc) + { + case OP_SET_SOM: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); + break; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: +#endif + compile_iterator_backtrackingpath(common, current); + break; + + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + compile_ref_iterator_backtrackingpath(common, current); + break; + + case OP_RECURSE: + compile_recurse_backtrackingpath(common, current); + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + compile_assert_backtrackingpath(common, current); + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_CBRA: + case OP_COND: + case OP_SBRA: + case OP_SCBRA: + case OP_SCOND: + compile_bracket_backtrackingpath(common, current); + break; + + case OP_BRAZERO: + if (current->cc[1] > OP_ASSERTBACK_NOT) + compile_bracket_backtrackingpath(common, current); + else + compile_assert_backtrackingpath(common, current); + break; + + case OP_BRAPOS: + case OP_CBRAPOS: + case OP_SBRAPOS: + case OP_SCBRAPOS: + case OP_BRAPOSZERO: + compile_bracketpos_backtrackingpath(common, current); + break; + + case OP_BRAMINZERO: + compile_braminzero_backtrackingpath(common, current); + break; + + case OP_MARK: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); + if (common->has_skip_arg) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, common->has_skip_arg ? 5 : 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); + if (common->has_skip_arg) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); + break; + + case OP_THEN: + case OP_THEN_ARG: + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + compile_control_verb_backtrackingpath(common, current); + break; + + case OP_COMMIT: + case OP_COMMIT_ARG: + if (!common->local_quit_available) + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + set_jumps(current->own_backtracks, LABEL()); + break; + + case OP_VREVERSE: + compile_vreverse_backtrackingpath(common, current); + break; + + case OP_THEN_TRAP: + /* A virtual opcode for then traps. */ + compile_then_trap_backtrackingpath(common, current); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + current = current->prev; + } +common->then_trap = save_then_trap; +} + +static SLJIT_INLINE void compile_recurse(compiler_common *common) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = common->start + common->currententry->start; +PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); +PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); +uint32_t recurse_flags = 0; +int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags); +int alt_count, alt_max, local_size; +backtrack_common altbacktrack; +jump_list *match = NULL; +struct sljit_jump *next_alt = NULL; +struct sljit_jump *accept_exit = NULL; +struct sljit_label *quit; +struct sljit_jump *mov_addr = NULL; + +/* Recurse captures then. */ +common->then_trap = NULL; + +SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); + +alt_max = no_alternatives(cc); +alt_count = 0; + +/* Matching path. */ +SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0); +common->currententry->entry_label = LABEL(); +set_jumps(common->currententry->entry_calls, common->currententry->entry_label); + +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0); +count_match(common); + +local_size = (alt_max > 1) ? 2 : 1; + +/* (Reversed) stack layout: + [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */ + +allocate_stack(common, private_data_size + local_size); +/* Save return address. */ +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0); + +copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags); + +/* This variable is saved and restored all time when we enter or exit from a recursive context. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); + +if (recurse_flags & recurse_flag_control_head_found) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + +if (alt_max > 1) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + +memset(&altbacktrack, 0, sizeof(backtrack_common)); +common->quit_label = NULL; +common->accept_label = NULL; +common->quit = NULL; +common->accept = NULL; +altbacktrack.cc = ccbegin; +cc += GET(cc, 1); +while (1) + { + altbacktrack.top = NULL; + altbacktrack.own_backtracks = NULL; + + if (altbacktrack.cc != ccbegin) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + + allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + + if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) + { + if (alt_max > 3) + mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); + } + + add_jump(compiler, &match, JUMP(SLJIT_JUMP)); + + if (alt_count == 0) + { + /* Backtracking path entry. */ + SLJIT_ASSERT(common->currententry->backtrack_label == NULL); + common->currententry->backtrack_label = LABEL(); + set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label); + + sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0); + + if (recurse_flags & recurse_flag_accept_found) + accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + /* Save return address. */ + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0); + + copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); + + if (alt_max > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + + if (alt_max > 3) + { + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + sljit_set_label(mov_addr, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + else + free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1); + } + else if (alt_max > 3) + { + sljit_set_label(mov_addr, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + { + JUMPHERE(next_alt); + if (alt_count + 1 < alt_max) + { + SLJIT_ASSERT(alt_count == 1 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); + } + } + + alt_count++; + + compile_backtrackingpath(common, altbacktrack.top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + set_jumps(altbacktrack.own_backtracks, LABEL()); + + if (*cc != OP_ALT) + break; + + altbacktrack.cc = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + } + +/* No alternative is matched. */ + +quit = LABEL(); + +copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); +free_stack(common, private_data_size + local_size); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); + +if (common->quit != NULL) + { + SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found); + + set_jumps(common->quit, LABEL()); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags); + JUMPTO(SLJIT_JUMP, quit); + } + +if (recurse_flags & recurse_flag_accept_found) + { + JUMPHERE(accept_exit); + free_stack(common, 2); + + /* Save return address. */ + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0); + + copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); + free_stack(common, private_data_size + local_size); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); + } + +if (common->accept != NULL) + { + SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found); + + set_jumps(common->accept, LABEL()); + + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); + + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); + } + +set_jumps(match, LABEL()); + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + +copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); +} + +#undef COMPILE_BACKTRACKINGPATH +#undef CURRENT_AS + +#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \ + (PCRE2_JIT_INVALID_UTF) + +static int jit_compile(pcre2_code *code, sljit_u32 mode) +{ +pcre2_real_code *re = (pcre2_real_code *)code; +struct sljit_compiler *compiler; +backtrack_common rootbacktrack; +compiler_common common_data; +compiler_common *common = &common_data; +const sljit_u8 *tables = re->tables; +void *allocator_data = &re->memctl; +int private_data_size; +PCRE2_SPTR ccend; +executable_functions *functions; +void *executable_func; +sljit_uw executable_size; +sljit_uw total_length; +struct sljit_label *mainloop_label = NULL; +struct sljit_label *continue_match_label; +struct sljit_label *empty_match_found_label = NULL; +struct sljit_label *empty_match_backtrack_label = NULL; +struct sljit_label *reset_match_label; +struct sljit_label *quit_label; +struct sljit_jump *jump; +struct sljit_jump *minlength_check_failed = NULL; +struct sljit_jump *empty_match = NULL; +struct sljit_jump *end_anchor_failed = NULL; +jump_list *reqcu_not_found = NULL; + +SLJIT_ASSERT(tables); + +#if HAS_VIRTUAL_REGISTERS == 1 +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0); +#elif HAS_VIRTUAL_REGISTERS == 0 +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0); +#else +#error "Invalid value for HAS_VIRTUAL_REGISTERS" +#endif + +memset(&rootbacktrack, 0, sizeof(backtrack_common)); +memset(common, 0, sizeof(compiler_common)); +common->re = re; +common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); +rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size; + +#ifdef SUPPORT_UNICODE +common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0; +#endif /* SUPPORT_UNICODE */ +mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS; + +common->start = rootbacktrack.cc; +common->read_only_data_head = NULL; +common->fcc = tables + fcc_offset; +common->lcc = (sljit_sw)(tables + lcc_offset); +common->mode = mode; +common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY); +common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY); +common->nltype = NLTYPE_FIXED; +switch(re->newline_convention) + { + case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break; + case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break; + case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; + case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; + case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; + case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break; + default: return PCRE2_ERROR_INTERNAL; + } +common->nlmax = READ_CHAR_MAX; +common->nlmin = 0; +if (re->bsr_convention == PCRE2_BSR_UNICODE) + common->bsr_nltype = NLTYPE_ANY; +else if (re->bsr_convention == PCRE2_BSR_ANYCRLF) + common->bsr_nltype = NLTYPE_ANYCRLF; +else + { +#ifdef BSR_ANYCRLF + common->bsr_nltype = NLTYPE_ANYCRLF; +#else + common->bsr_nltype = NLTYPE_ANY; +#endif + } +common->bsr_nlmax = READ_CHAR_MAX; +common->bsr_nlmin = 0; +common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0; +common->ctypes = (sljit_sw)(tables + ctypes_offset); +common->name_count = re->name_count; +common->name_entry_size = re->name_entry_size; +common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0; +common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; +#ifdef SUPPORT_UNICODE +/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ +common->utf = (re->overall_options & PCRE2_UTF) != 0; +common->ucp = (re->overall_options & PCRE2_UCP) != 0; +if (common->utf) + { + if (common->nltype == NLTYPE_ANY) + common->nlmax = 0x2029; + else if (common->nltype == NLTYPE_ANYCRLF) + common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + else + { + /* We only care about the first newline character. */ + common->nlmax = common->newline & 0xff; + } + + if (common->nltype == NLTYPE_FIXED) + common->nlmin = common->newline & 0xff; + else + common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + + if (common->bsr_nltype == NLTYPE_ANY) + common->bsr_nlmax = 0x2029; + else + common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + } +else + common->invalid_utf = FALSE; +#endif /* SUPPORT_UNICODE */ +ccend = bracketend(common->start); + +/* Calculate the local space size on the stack. */ +common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); +common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); +if (!common->optimized_cbracket) + return PCRE2_ERROR_NOMEMORY; +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 +memset(common->optimized_cbracket, 0, re->top_bracket + 1); +#else +memset(common->optimized_cbracket, 1, re->top_bracket + 1); +#endif + +SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 +common->capture_last_ptr = common->ovector_start; +common->ovector_start += sizeof(sljit_sw); +#endif +if (!check_opcode_types(common, common->start, ccend)) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +/* Checking flags and updating ovector_start. */ +if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + common->req_char_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (mode != PCRE2_JIT_COMPLETE) + { + common->start_used_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + common->hit_start = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + } +if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0) + { + common->match_end_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +common->control_head_ptr = 1; +#endif +if (common->control_head_ptr != 0) + { + common->control_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (common->has_set_som) + { + /* Saving the real start pointer is necessary. */ + common->start_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +/* Aligning ovector to even number of sljit words. */ +if ((common->ovector_start & sizeof(sljit_sw)) != 0) + common->ovector_start += sizeof(sljit_sw); + +if (common->start_ptr == 0) + common->start_ptr = OVECTOR(0); + +/* Capturing brackets cannot be optimized if callouts are allowed. */ +if (common->capture_last_ptr != 0) + memset(common->optimized_cbracket, 0, re->top_bracket + 1); + +SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); +common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); + +total_length = ccend - common->start; +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); +if (!common->private_data_ptrs) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } +memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); + +private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); + +if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) + detect_early_fail(common, common->start, &private_data_size, 0, 0); + +set_private_data_ptrs(common, &private_data_size, ccend); + +SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); + +if (private_data_size > 65536) + { + SLJIT_FREE(common->private_data_ptrs, allocator_data); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +if (common->has_then) + { + common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); + memset(common->then_offsets, 0, total_length); + set_then_offsets(common, common->start, NULL); + } + +compiler = sljit_create_compiler(allocator_data); +if (!compiler) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } +common->compiler = compiler; + +/* Main pcre2_jit_exec entry. */ +SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size); + +/* Register init. */ +reset_ovector(common, (re->top_bracket + 1) * 2); +if (common->req_char_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); + +OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); +OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end)); +OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); + +if (common->early_fail_start_ptr < common->early_fail_end_ptr) + reset_early_fail(common); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + +/* Main part of the matching */ +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + mainloop_label = mainloop_entry(common); + continue_match_label = LABEL(); + /* Forward search if possible. */ + if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) + ; + else if ((re->flags & PCRE2_FIRSTSET) != 0) + fast_forward_first_char(common); + else if ((re->flags & PCRE2_STARTLINE) != 0) + fast_forward_newline(common); + else if ((re->flags & PCRE2_FIRSTMAPSET) != 0) + fast_forward_start_bits(common); + } + } +else + continue_match_label = LABEL(); + +if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength)); + minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); + } +if (common->req_char_ptr != 0) + reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); + +/* Store the current STR_PTR in OVECTOR(0). */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); +/* Copy the limit of allowed recursions. */ +OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); +if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0); +if (common->fast_forward_bc_ptr != NULL) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0); + +if (common->start_ptr != OVECTOR(0)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); + +/* Copy the beginning of the string. */ +if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (mode == PCRE2_JIT_PARTIAL_HARD) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + +compile_matchingpath(common, common->start, ccend, &rootbacktrack); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +if ((re->overall_options & PCRE2_ENDANCHORED) != 0) + end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0); + +if (common->might_be_empty) + { + empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + empty_match_found_label = LABEL(); + } + +common->accept_label = LABEL(); +if (common->accept != NULL) + set_jumps(common->accept, common->accept_label); + +/* This means we have a match. Update the ovector. */ +copy_ovector(common, re->top_bracket + 1); +common->quit_label = common->abort_label = LABEL(); +if (common->quit != NULL) + set_jumps(common->quit, common->quit_label); +if (common->abort != NULL) + set_jumps(common->abort, common->abort_label); +if (minlength_check_failed != NULL) + SET_LABEL(minlength_check_failed, common->abort_label); + +sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); +sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + +if (common->failed_match != NULL) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + set_jumps(common->failed_match, LABEL()); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + JUMPTO(SLJIT_JUMP, common->abort_label); + } + +if ((re->overall_options & PCRE2_ENDANCHORED) != 0) + JUMPHERE(end_anchor_failed); + +if (mode != PCRE2_JIT_COMPLETE) + { + common->partialmatchlabel = LABEL(); + set_jumps(common->partialmatch, common->partialmatchlabel); + return_with_partial_match(common, common->quit_label); + } + +if (common->might_be_empty) + empty_match_backtrack_label = LABEL(); +compile_backtrackingpath(common, rootbacktrack.top); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +SLJIT_ASSERT(rootbacktrack.prev == NULL); +reset_match_label = LABEL(); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + /* Update hit_start only in the first time. */ + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); + JUMPHERE(jump); + } + +/* Check we have remaining characters. */ +if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + } + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), + (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr); + +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + if (common->ff_newline_shortcut != NULL) + { + /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */ + if ((re->overall_options & PCRE2_FIRSTLINE) == 0) + { + if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); + CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + else + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); + } + } + else + CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label); + } + +/* No more remaining characters. */ +if (reqcu_not_found != NULL) + set_jumps(reqcu_not_found, LABEL()); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); + +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); +JUMPTO(SLJIT_JUMP, common->quit_label); + +flush_stubs(common); + +if (common->might_be_empty) + { + JUMPHERE(empty_match); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); + JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + JUMPTO(SLJIT_ZERO, empty_match_found_label); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); + JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); + } + +common->fast_forward_bc_ptr = NULL; +common->early_fail_start_ptr = 0; +common->early_fail_end_ptr = 0; +common->currententry = common->entries; +common->local_quit_available = TRUE; +quit_label = common->quit_label; +if (common->currententry != NULL) + { + /* A free bit for each private data. */ + common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3; + SLJIT_ASSERT(common->recurse_bitset_size > 0); + common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);; + + if (common->recurse_bitset != NULL) + { + do + { + /* Might add new entries. */ + compile_recurse(common); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + break; + flush_stubs(common); + common->currententry = common->currententry->next; + } + while (common->currententry != NULL); + + SLJIT_FREE(common->recurse_bitset, allocator_data); + } + + if (common->currententry != NULL) + { + /* The common->recurse_bitset has been freed. */ + SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); + + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + } +common->local_quit_available = FALSE; +common->quit_label = quit_label; + +/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ +/* This is a (really) rare case. */ +set_jumps(common->stackalloc, LABEL()); +/* RETURN_ADDR is not a saved register. */ +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); +OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); +OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); + +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize)); + +jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); +OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + +/* Allocation failed. */ +JUMPHERE(jump); +/* We break the return address cache here, but this is a really rare case. */ +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT); +JUMPTO(SLJIT_JUMP, common->quit_label); + +/* Call limit reached. */ +set_jumps(common->calllimit, LABEL()); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT); +JUMPTO(SLJIT_JUMP, common->quit_label); + +if (common->revertframes != NULL) + { + set_jumps(common->revertframes, LABEL()); + do_revertframes(common); + } +if (common->wordboundary != NULL) + { + set_jumps(common->wordboundary, LABEL()); + check_wordboundary(common, FALSE); + } +if (common->ucp_wordboundary != NULL) + { + set_jumps(common->ucp_wordboundary, LABEL()); + check_wordboundary(common, TRUE); + } +if (common->anynewline != NULL) + { + set_jumps(common->anynewline, LABEL()); + check_anynewline(common); + } +if (common->hspace != NULL) + { + set_jumps(common->hspace, LABEL()); + check_hspace(common); + } +if (common->vspace != NULL) + { + set_jumps(common->vspace, LABEL()); + check_vspace(common); + } +if (common->casefulcmp != NULL) + { + set_jumps(common->casefulcmp, LABEL()); + do_casefulcmp(common); + } +if (common->caselesscmp != NULL) + { + set_jumps(common->caselesscmp, LABEL()); + do_caselesscmp(common); + } +if (common->reset_match != NULL || common->restart_match != NULL) + { + if (common->restart_match != NULL) + { + set_jumps(common->restart_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + } + + set_jumps(common->reset_match, LABEL()); + do_reset_match(common, (re->top_bracket + 1) * 2); + /* The value of restart_match is in TMP1. */ + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + JUMPTO(SLJIT_JUMP, reset_match_label); + } +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utfreadchar != NULL) + { + set_jumps(common->utfreadchar, LABEL()); + do_utfreadchar(common); + } +if (common->utfreadtype8 != NULL) + { + set_jumps(common->utfreadtype8, LABEL()); + do_utfreadtype8(common); + } +if (common->utfpeakcharback != NULL) + { + set_jumps(common->utfpeakcharback, LABEL()); + do_utfpeakcharback(common); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ +#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utfreadchar_invalid != NULL) + { + set_jumps(common->utfreadchar_invalid, LABEL()); + do_utfreadchar_invalid(common); + } +if (common->utfreadnewline_invalid != NULL) + { + set_jumps(common->utfreadnewline_invalid, LABEL()); + do_utfreadnewline_invalid(common); + } +if (common->utfmoveback_invalid) + { + set_jumps(common->utfmoveback_invalid, LABEL()); + do_utfmoveback_invalid(common); + } +if (common->utfpeakcharback_invalid) + { + set_jumps(common->utfpeakcharback_invalid, LABEL()); + do_utfpeakcharback_invalid(common); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */ +if (common->getucd != NULL) + { + set_jumps(common->getucd, LABEL()); + do_getucd(common); + } +if (common->getucdtype != NULL) + { + set_jumps(common->getucdtype, LABEL()); + do_getucdtype(common); + } +#endif /* SUPPORT_UNICODE */ + +SLJIT_FREE(common->optimized_cbracket, allocator_data); +SLJIT_FREE(common->private_data_ptrs, allocator_data); + +executable_func = sljit_generate_code(compiler, 0, NULL); +executable_size = sljit_get_generated_code_size(compiler); +sljit_free_compiler(compiler); + +if (executable_func == NULL) + { + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +/* Reuse the function descriptor if possible. */ +if (re->executable_jit != NULL) + functions = (executable_functions *)re->executable_jit; +else + { + functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data); + if (functions == NULL) + { + /* This case is highly unlikely since we just recently + freed a lot of memory. Not impossible though. */ + sljit_free_code(executable_func, NULL); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + memset(functions, 0, sizeof(executable_functions)); + functions->top_bracket = re->top_bracket + 1; + functions->limit_match = re->limit_match; + re->executable_jit = functions; + } + +/* Turn mode into an index. */ +if (mode == PCRE2_JIT_COMPLETE) + mode = 0; +else + mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2; + +SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES); +functions->executable_funcs[mode] = executable_func; +functions->read_only_data_heads[mode] = common->read_only_data_head; +functions->executable_sizes[mode] = executable_size; +return 0; +} + +#endif + +/************************************************* +* JIT compile a Regular Expression * +*************************************************/ + +/* This function used JIT to convert a previously-compiled pattern into machine +code. + +Arguments: + code a compiled pattern + options JIT option bits + +Returns: 0: success or (*NOJIT) was used + <0: an error code +*/ + +#define PUBLIC_JIT_COMPILE_OPTIONS \ + (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF) + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_jit_compile(pcre2_code *code, uint32_t options) +{ +pcre2_real_code *re = (pcre2_real_code *)code; +#ifdef SUPPORT_JIT +executable_functions *functions; +static int executable_allocator_is_working = -1; +#endif + +if (code == NULL) + return PCRE2_ERROR_NULL; + +if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0) + return PCRE2_ERROR_JIT_BADOPTION; + +/* Support for invalid UTF was first introduced in JIT, with the option +PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the +compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the +preferred feature, with the earlier option deprecated. However, for backward +compatibility, if the earlier option is set, it forces the new option so that +if JIT matching falls back to the interpreter, there is still support for +invalid UTF. However, if this function has already been successfully called +without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that +non-invalid-supporting JIT code was compiled), give an error. + +If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following +actions are needed: + + 1. Remove the definition from pcre2.h.in and from the list in + PUBLIC_JIT_COMPILE_OPTIONS above. + + 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module. + + 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c. + + 4. Delete the following short block of code. The setting of "re" and + "functions" can be moved into the JIT-only block below, but if that is + done, (void)re and (void)functions will be needed in the non-JIT case, to + avoid compiler warnings. +*/ + +#ifdef SUPPORT_JIT +functions = (executable_functions *)re->executable_jit; +#endif + +if ((options & PCRE2_JIT_INVALID_UTF) != 0) + { + if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) + { +#ifdef SUPPORT_JIT + if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION; +#endif + re->overall_options |= PCRE2_MATCH_INVALID_UTF; + } + } + +/* The above tests are run with and without JIT support. This means that +PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring +interpreter support) even in the absence of JIT. But now, if there is no JIT +support, give an error return. */ + +#ifndef SUPPORT_JIT +return PCRE2_ERROR_JIT_BADOPTION; +#else /* SUPPORT_JIT */ + +/* There is JIT support. Do the necessary. */ + +if ((re->flags & PCRE2_NOJIT) != 0) return 0; + +if (executable_allocator_is_working == -1) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + void *ptr = SLJIT_MALLOC_EXEC(32, NULL); + if (ptr != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); + executable_allocator_is_working = 1; + } + else executable_allocator_is_working = 0; + } + +if (!executable_allocator_is_working) + return PCRE2_ERROR_NOMEMORY; + +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + options |= PCRE2_JIT_INVALID_UTF; + +if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL + || functions->executable_funcs[0] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL + || functions->executable_funcs[1] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL + || functions->executable_funcs[2] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +return 0; + +#endif /* SUPPORT_JIT */ +} + +/* JIT compiler uses an all-in-one approach. This improves security, + since the code generator functions are not exported. */ + +#define INCLUDED_FROM_PCRE2_JIT_COMPILE + +#include "pcre2_jit_match.c" +#include "pcre2_jit_misc.c" + +/* End of pcre2_jit_compile.c */ diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c new file mode 100644 index 0000000..ae5903e --- /dev/null +++ b/src/pcre2_jit_match.c @@ -0,0 +1,200 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE +#error This file must be included from pcre2_jit_compile.c. +#endif + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + +#ifdef SUPPORT_JIT + +static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) +{ +sljit_u8 local_space[MACHINE_STACK_SIZE]; +struct sljit_stack local_stack; + +local_stack.min_start = local_space; +local_stack.start = local_space; +local_stack.end = local_space + MACHINE_STACK_SIZE; +local_stack.top = local_space + MACHINE_STACK_SIZE; +arguments->stack = &local_stack; +return executable_func(arguments); +} + +#endif + + +/************************************************* +* Do a JIT pattern match * +*************************************************/ + +/* This function runs a JIT pattern match. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block + mcontext points to a match context + +Returns: > 0 => success; value is the number of ovector pairs filled + = 0 => success, but ovector is not big enough + -1 => failed to match (PCRE_ERROR_NOMATCH) + < -1 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) +{ +#ifndef SUPPORT_JIT + +(void)code; +(void)subject; +(void)length; +(void)start_offset; +(void)options; +(void)match_data; +(void)mcontext; +return PCRE2_ERROR_JIT_BADOPTION; + +#else /* SUPPORT_JIT */ + +pcre2_real_code *re = (pcre2_real_code *)code; +executable_functions *functions = (executable_functions *)re->executable_jit; +pcre2_jit_stack *jit_stack; +uint32_t oveccount = match_data->oveccount; +uint32_t max_oveccount; +union { + void *executable_func; + jit_function call_executable_func; +} convert_executable_func; +jit_arguments arguments; +int rc; +int index = 0; + +if ((options & PCRE2_PARTIAL_HARD) != 0) + index = 2; +else if ((options & PCRE2_PARTIAL_SOFT) != 0) + index = 1; + +if (functions == NULL || functions->executable_funcs[index] == NULL) + return PCRE2_ERROR_JIT_BADOPTION; + +/* Sanity checks should be handled by pcre2_match. */ +arguments.str = subject + start_offset; +arguments.begin = subject; +arguments.end = subject + length; +arguments.match_data = match_data; +arguments.startchar_ptr = subject; +arguments.mark_ptr = NULL; +arguments.options = options; + +if (mcontext != NULL) + { + arguments.callout = mcontext->callout; + arguments.callout_data = mcontext->callout_data; + arguments.offset_limit = mcontext->offset_limit; + arguments.limit_match = (mcontext->match_limit < re->limit_match)? + mcontext->match_limit : re->limit_match; + if (mcontext->jit_callback != NULL) + jit_stack = mcontext->jit_callback(mcontext->jit_callback_data); + else + jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data; + } +else + { + arguments.callout = NULL; + arguments.callout_data = NULL; + arguments.offset_limit = PCRE2_UNSET; + arguments.limit_match = (MATCH_LIMIT < re->limit_match)? + MATCH_LIMIT : re->limit_match; + jit_stack = NULL; + } + + +max_oveccount = functions->top_bracket; +if (oveccount > max_oveccount) + oveccount = max_oveccount; +arguments.oveccount = oveccount << 1; + + +convert_executable_func.executable_func = functions->executable_funcs[index]; +if (jit_stack != NULL) + { + arguments.stack = (struct sljit_stack *)(jit_stack->stack); + rc = convert_executable_func.call_executable_func(&arguments); + } +else + rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func); + +if (rc > (int)oveccount) + rc = 0; +match_data->code = re; +match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL; +match_data->subject_length = length; +match_data->rc = rc; +match_data->startchar = arguments.startchar_ptr - subject; +match_data->leftchar = 0; +match_data->rightchar = 0; +match_data->mark = arguments.mark_ptr; +match_data->matchedby = PCRE2_MATCHEDBY_JIT; + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +if (rc > 0) + __msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0])); +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + +return match_data->rc; + +#endif /* SUPPORT_JIT */ +} + +/* End of pcre2_jit_match.c */ diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c new file mode 100644 index 0000000..c3abc0b --- /dev/null +++ b/src/pcre2_jit_misc.c @@ -0,0 +1,234 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE +#error This file must be included from pcre2_jit_compile.c. +#endif + + + +/************************************************* +* Free JIT read-only data * +*************************************************/ + +void +PRIV(jit_free_rodata)(void *current, void *allocator_data) +{ +#ifndef SUPPORT_JIT +(void)current; +(void)allocator_data; +#else /* SUPPORT_JIT */ +void *next; + +SLJIT_UNUSED_ARG(allocator_data); + +while (current != NULL) + { + next = *(void**)current; + SLJIT_FREE(current, allocator_data); + current = next; + } + +#endif /* SUPPORT_JIT */ +} + +/************************************************* +* Free JIT compiled code * +*************************************************/ + +void +PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl) +{ +#ifndef SUPPORT_JIT +(void)executable_jit; +(void)memctl; +#else /* SUPPORT_JIT */ + +executable_functions *functions = (executable_functions *)executable_jit; +void *allocator_data = memctl; +int i; + +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + { + if (functions->executable_funcs[i] != NULL) + sljit_free_code(functions->executable_funcs[i], NULL); + PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data); + } + +SLJIT_FREE(functions, allocator_data); + +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Free unused JIT memory * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) +{ +#ifndef SUPPORT_JIT +(void)gcontext; /* Suppress warning */ +#else /* SUPPORT_JIT */ +SLJIT_UNUSED_ARG(gcontext); +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +sljit_free_unused_memory_exec(); +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ +#endif /* SUPPORT_JIT */ +} + + + +/************************************************* +* Allocate a JIT stack * +*************************************************/ + +PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION +pcre2_jit_stack_create(size_t startsize, size_t maxsize, + pcre2_general_context *gcontext) +{ +#ifndef SUPPORT_JIT + +(void)gcontext; +(void)startsize; +(void)maxsize; +return NULL; + +#else /* SUPPORT_JIT */ + +pcre2_jit_stack *jit_stack; + +if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE) + return NULL; +if (startsize > maxsize) + startsize = maxsize; +startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1)); +maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1)); + +jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); +if (jit_stack == NULL) return NULL; +jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl); +if (jit_stack->stack == NULL) + { + jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); + return NULL; + } +return jit_stack; + +#endif +} + + +/************************************************* +* Assign a JIT stack to a pattern * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback, + void *callback_data) +{ +#ifndef SUPPORT_JIT +(void)mcontext; +(void)callback; +(void)callback_data; +#else /* SUPPORT_JIT */ + +if (mcontext == NULL) return; +mcontext->jit_callback = callback; +mcontext->jit_callback_data = callback_data; + +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Free a JIT stack * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) +{ +#ifndef SUPPORT_JIT +(void)jit_stack; +#else /* SUPPORT_JIT */ +if (jit_stack != NULL) + { + sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl); + jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); + } +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Get target CPU type * +*************************************************/ + +const char* +PRIV(jit_get_target)(void) +{ +#ifndef SUPPORT_JIT +return "JIT is not supported"; +#else /* SUPPORT_JIT */ +return sljit_get_platform_name(); +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Get size of JIT code * +*************************************************/ + +size_t +PRIV(jit_get_size)(void *executable_jit) +{ +#ifndef SUPPORT_JIT +(void)executable_jit; +return 0; +#else /* SUPPORT_JIT */ +sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes; +SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed); +return executable_sizes[0] + executable_sizes[1] + executable_sizes[2]; +#endif +} + +/* End of pcre2_jit_misc.c */ diff --git a/src/pcre2_jit_neon_inc.h b/src/pcre2_jit_neon_inc.h new file mode 100644 index 0000000..4a718b6 --- /dev/null +++ b/src/pcre2_jit_neon_inc.h @@ -0,0 +1,354 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg and Sebastian Pop + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +# if defined(FFCS) +# if defined(FF_UTF) +# define FF_FUN ffcs_utf +# else +# define FF_FUN ffcs +# endif + +# elif defined(FFCS_2) +# if defined(FF_UTF) +# define FF_FUN ffcs_2_utf +# else +# define FF_FUN ffcs_2 +# endif + +# elif defined(FFCS_MASK) +# if defined(FF_UTF) +# define FF_FUN ffcs_mask_utf +# else +# define FF_FUN ffcs_mask +# endif + +# elif defined(FFCPS_0) +# if defined (FF_UTF) +# define FF_FUN ffcps_0_utf +# else +# define FF_FUN ffcps_0 +# endif + +# elif defined (FFCPS_1) +# if defined (FF_UTF) +# define FF_FUN ffcps_1_utf +# else +# define FF_FUN ffcps_1 +# endif + +# elif defined (FFCPS_DEFAULT) +# if defined (FF_UTF) +# define FF_FUN ffcps_default_utf +# else +# define FF_FUN ffcps_default +# endif +# endif + +#if (defined(__GNUC__) && __SANITIZE_ADDRESS__) \ + || (defined(__clang__) \ + && ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3))) +__attribute__((no_sanitize_address)) +#endif +static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars) +#undef FF_FUN +{ +quad_word qw; +int_char ic; + +SLJIT_UNUSED_ARG(offs1); +SLJIT_UNUSED_ARG(offs2); + +ic.x = chars; + +#if defined(FFCS) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); + +#elif defined(FFCS_2) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); +sljit_u8 c2 = ic.c.c2; +vect_t vc2 = VDUPQ(c2); + +#elif defined(FFCS_MASK) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); +sljit_u8 mask = ic.c.c2; +vect_t vmask = VDUPQ(mask); +#endif + +#if defined(FFCPS) +compare_type compare1_type = compare_match1; +compare_type compare2_type = compare_match1; +vect_t cmp1a, cmp1b, cmp2a, cmp2b; +const sljit_u32 diff = IN_UCHARS(offs1 - offs2); +PCRE2_UCHAR char1a = ic.c.c1; +PCRE2_UCHAR char2a = ic.c.c3; + +# ifdef FFCPS_CHAR1A2A +cmp1a = VDUPQ(char1a); +cmp2a = VDUPQ(char2a); +cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ +cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ +# else +PCRE2_UCHAR char1b = ic.c.c2; +PCRE2_UCHAR char2b = ic.c.c4; +if (char1a == char1b) + { + cmp1a = VDUPQ(char1a); + cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ + } +else + { + sljit_u32 bit1 = char1a ^ char1b; + if (is_powerof2(bit1)) + { + compare1_type = compare_match1i; + cmp1a = VDUPQ(char1a | bit1); + cmp1b = VDUPQ(bit1); + } + else + { + compare1_type = compare_match2; + cmp1a = VDUPQ(char1a); + cmp1b = VDUPQ(char1b); + } + } + +if (char2a == char2b) + { + cmp2a = VDUPQ(char2a); + cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ + } +else + { + sljit_u32 bit2 = char2a ^ char2b; + if (is_powerof2(bit2)) + { + compare2_type = compare_match1i; + cmp2a = VDUPQ(char2a | bit2); + cmp2b = VDUPQ(bit2); + } + else + { + compare2_type = compare_match2; + cmp2a = VDUPQ(char2a); + cmp2b = VDUPQ(char2b); + } + } +# endif + +*str_ptr += IN_UCHARS(offs1); +#endif + +#if PCRE2_CODE_UNIT_WIDTH != 8 +vect_t char_mask = VDUPQ(0xff); +#endif + +#if defined(FF_UTF) +restart:; +#endif + +#if defined(FFCPS) +if (*str_ptr >= str_end) + return NULL; +sljit_u8 *p1 = *str_ptr - diff; +#endif +sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf); +*str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf); +vect_t data = VLD1Q(*str_ptr); +#if PCRE2_CODE_UNIT_WIDTH != 8 +data = VANDQ(data, char_mask); +#endif + +#if defined(FFCS) +vect_t eq = VCEQQ(data, vc1); + +#elif defined(FFCS_2) +vect_t eq1 = VCEQQ(data, vc1); +vect_t eq2 = VCEQQ(data, vc2); +vect_t eq = VORRQ(eq1, eq2); + +#elif defined(FFCS_MASK) +vect_t eq = VORRQ(data, vmask); +eq = VCEQQ(eq, vc1); + +#elif defined(FFCPS) +# if defined(FFCPS_DIFF1) +vect_t prev_data = data; +# endif + +vect_t data2; +if (p1 < *str_ptr) + { + data2 = VLD1Q(*str_ptr - diff); +#if PCRE2_CODE_UNIT_WIDTH != 8 + data2 = VANDQ(data2, char_mask); +#endif + } +else + data2 = shift_left_n_lanes(data, offs1 - offs2); + +if (compare1_type == compare_match1) + data = VCEQQ(data, cmp1a); +else + data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); + +if (compare2_type == compare_match1) + data2 = VCEQQ(data2, cmp2a); +else + data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); + +vect_t eq = VANDQ(data, data2); +#endif + +VST1Q(qw.mem, eq); +/* Ignore matches before the first STR_PTR. */ +if (align_offset < 8) + { + qw.dw[0] >>= align_offset * 8; + if (qw.dw[0]) + { + *str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8; + goto match; + } + if (qw.dw[1]) + { + *str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; + goto match; + } + } +else + { + qw.dw[1] >>= (align_offset - 8) * 8; + if (qw.dw[1]) + { + *str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8; + goto match; + } + } +*str_ptr += 16; + +while (*str_ptr < str_end) + { + vect_t orig_data = VLD1Q(*str_ptr); +#if PCRE2_CODE_UNIT_WIDTH != 8 + orig_data = VANDQ(orig_data, char_mask); +#endif + data = orig_data; + +#if defined(FFCS) + eq = VCEQQ(data, vc1); + +#elif defined(FFCS_2) + eq1 = VCEQQ(data, vc1); + eq2 = VCEQQ(data, vc2); + eq = VORRQ(eq1, eq2); + +#elif defined(FFCS_MASK) + eq = VORRQ(data, vmask); + eq = VCEQQ(eq, vc1); +#endif + +#if defined(FFCPS) +# if defined (FFCPS_DIFF1) + data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1); +# else + data2 = VLD1Q(*str_ptr - diff); +# if PCRE2_CODE_UNIT_WIDTH != 8 + data2 = VANDQ(data2, char_mask); +# endif +# endif + +# ifdef FFCPS_CHAR1A2A + data = VCEQQ(data, cmp1a); + data2 = VCEQQ(data2, cmp2a); +# else + if (compare1_type == compare_match1) + data = VCEQQ(data, cmp1a); + else + data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); + if (compare2_type == compare_match1) + data2 = VCEQQ(data2, cmp2a); + else + data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); +# endif + + eq = VANDQ(data, data2); +#endif + + VST1Q(qw.mem, eq); + if (qw.dw[0]) + *str_ptr += __builtin_ctzll(qw.dw[0]) / 8; + else if (qw.dw[1]) + *str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; + else { + *str_ptr += 16; +#if defined (FFCPS_DIFF1) + prev_data = orig_data; +#endif + continue; + } + +match:; + if (*str_ptr >= str_end) + /* Failed match. */ + return NULL; + +#if defined(FF_UTF) + if (utf_continue((PCRE2_SPTR)*str_ptr - offs1)) + { + /* Not a match. */ + *str_ptr += IN_UCHARS(1); + goto restart; + } +#endif + + /* Match. */ +#if defined (FFCPS) + *str_ptr -= IN_UCHARS(offs1); +#endif + return *str_ptr; + } + +/* Failed match. */ +return NULL; +} diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h new file mode 100644 index 0000000..502977f --- /dev/null +++ b/src/pcre2_jit_simd_inc.h @@ -0,0 +1,2355 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#if !(defined SUPPORT_VALGRIND) + +#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)) + +typedef enum { + vector_compare_match1, + vector_compare_match1i, + vector_compare_match2, +} vector_compare_type; + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +/* The AVX2 code path is currently disabled. */ +/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */ +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +/* The AVX2 code path is currently disabled. */ +/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */ +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +/* The AVX2 code path is currently disabled. */ +/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */ +return 3; +#else +#error "Unsupported unit width" +#endif +} +#else /* !SLJIT_CONFIG_X86 */ +static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} +#endif /* SLJIT_CONFIG_X86 */ + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); +return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); +return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); +#else +#error "Unknown code width" +#endif +} +#endif + +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +static sljit_s32 character_to_int32(PCRE2_UCHAR chr) +{ +sljit_u32 value = chr; +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define SIMD_COMPARE_TYPE_INDEX 0 +return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define SIMD_COMPARE_TYPE_INDEX 1 +return (sljit_s32)((value << 16) | value); +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define SIMD_COMPARE_TYPE_INDEX 2 +return (sljit_s32)(value); +#else +#error "Unsupported unit width" +#endif +} + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u8 instruction[4]; + +if (reg_type == SLJIT_SIMD_REG_128) + { + instruction[0] = 0x66; + instruction[1] = 0x0f; + } +else + { + /* Two byte VEX prefix. */ + instruction[0] = 0xc5; + instruction[1] = 0xfd; + } + +SLJIT_ASSERT(step >= 0 && step <= 3); + +if (compare_type != vector_compare_match2) + { + if (step == 0) + { + if (compare_type == vector_compare_match1i) + { + /* POR xmm1, xmm2/m128 */ + if (reg_type == SLJIT_SIMD_REG_256) + instruction[1] ^= (dst_ind << 3); + + /* Prefix is filled. */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + return; + } + + if (step != 2) + return; + + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + if (reg_type == SLJIT_SIMD_REG_256) + instruction[1] ^= (dst_ind << 3); + + /* Prefix is filled. */ + instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + } + +if (reg_type == SLJIT_SIMD_REG_256) + { + if (step == 2) + return; + + if (step == 0) + { + step = 2; + instruction[1] ^= (dst_ind << 3); + } + } + +switch (step) + { + case 0: + SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128); + + /* MOVDQA xmm1, xmm2/m128 */ + /* Prefix is filled. */ + instruction[2] = 0x6f; + instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 1: + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + if (reg_type == SLJIT_SIMD_REG_256) + instruction[1] ^= (dst_ind << 3); + + /* Prefix is filled. */ + instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 2: + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + /* Prefix is filled. */ + instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 3: + /* POR xmm1, xmm2/m128 */ + if (reg_type == SLJIT_SIMD_REG_256) + instruction[1] ^= (dst_ind << 3); + + /* Prefix is filled. */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +sljit_u8 instruction[8]; +/* The AVX2 code path is currently disabled. */ +/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */ +sljit_s32 reg_type = SLJIT_SIMD_REG_128; +sljit_s32 value; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); +sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); +sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); +sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ +value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; +sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +if (char1 != char2) + sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0); + +if (char1 != char2) + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf; +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); + +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16; +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +SLJIT_ASSERT(tmp1_reg_ind < 8); +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +sljit_u8 instruction[8]; +/* The AVX2 code path is currently disabled. */ +/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */ +sljit_s32 reg_type = SLJIT_SIMD_REG_128; +sljit_s32 value; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); +sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); +sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); +sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); +OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + +/* First part (unaligned start) */ + +value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; +sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +if (char1 != char2) + sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + +OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0); + +if (char1 != char2) + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); + +value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf; +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); + +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16; +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); + +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +SLJIT_ASSERT(tmp1_reg_ind < 8); +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); +return not_found; +} + +#ifndef _WIN64 + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u8 instruction[8]; +/* The AVX2 code path is currently disabled. */ +/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */ +sljit_s32 reg_type = SLJIT_SIMD_REG_128; +sljit_s32 value; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_u32 diff = IN_UCHARS(offs1 - offs2); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 data1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0); +sljit_s32 data2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1); +sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2); +sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3); +sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4); +sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5); +sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6); +sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0); +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *jump[2]; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0); +SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset())); + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); + SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +if (char1a == char1b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); +else + { + bit1 = char1a ^ char1b; + if (is_powerof2(bit1)) + { + compare1_type = vector_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); + } + else + { + compare1_type = vector_compare_match2; + bit1 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); + } + } + +value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO; +sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, TMP1, 0); + +if (char1a != char1b) + sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR4, 0, TMP2, 0); + +if (char2a == char2b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); +else + { + bit2 = char2a ^ char2b; + if (is_powerof2(bit2)) + { + compare2_type = vector_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); + } + else + { + compare2_type = vector_compare_match2; + bit2 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); + } + } + +sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR3, 0, TMP1, 0); + +if (char2a != char2b) + sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR5, 0, TMP2, 0); + +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0); +if (char1a != char1b) + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR4, SLJIT_FR4, 0); + +sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR3, SLJIT_FR3, 0); +if (char2a != char2b) + sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR5, SLJIT_FR5, 0); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); +value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf; +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); + +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); + +sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); +jump[1] = JUMP(SLJIT_JUMP); + +JUMPHERE(jump[0]); + +if (reg_type == SLJIT_SIMD_REG_256) + { + if (diff != 16) + { + /* PSLLDQ ymm1, ymm2, imm8 */ + instruction[0] = 0xc5; + instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3)); + instruction[2] = 0x73; + instruction[3] = 0xc0 | (7 << 3) | data1_ind; + instruction[4] = diff & 0xf; + sljit_emit_op_custom(compiler, instruction, 5); + } + + instruction[0] = 0xc4; + instruction[1] = 0xe3; + if (diff < 16) + { + /* VINSERTI128 xmm1, xmm2, xmm3/m128 */ + /* instruction[0] = 0xc4; */ + /* instruction[1] = 0xe3; */ + instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3)); + instruction[3] = 0x38; + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7); + instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); + instruction[5] = (sljit_u8)(16 - diff); + instruction[6] = 1; + sljit_emit_op_custom(compiler, instruction, 7); + } + else + { + /* VPERM2I128 xmm1, xmm2, xmm3/m128 */ + /* instruction[0] = 0xc4; */ + /* instruction[1] = 0xe3; */ + value = (diff == 16) ? data1_ind : data2_ind; + instruction[2] = (sljit_u8)(0x7d ^ (value << 3)); + instruction[3] = 0x46; + instruction[4] = 0xc0 | (data2_ind << 3) | value; + instruction[5] = 0x08; + sljit_emit_op_custom(compiler, instruction, 6); + } + } +else + { + /* MOVDQA xmm1, xmm2/m128 */ + instruction[0] = 0x66; + instruction[1] = 0x0f; + instruction[2] = 0x6f; + instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + + /* PSLLDQ xmm1, imm8 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0x73; + instruction[3] = 0xc0 | (7 << 3) | data2_ind; + instruction[4] = diff; + sljit_emit_op_custom(compiler, instruction, 5); + } + +JUMPHERE(jump[1]); + +value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf; +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value); + +for (i = 0; i < 4; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + } + +sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); + +/* Ignore matches before the first STR_PTR. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Main loop. */ +start = LABEL(); + +value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16; +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128; +sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0); +sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff); + +for (i = 0; i < 4; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); + } + +sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1); +sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(jump[0]); + +SLJIT_ASSERT(tmp1_reg_ind < 8); +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + jump[0] = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); + + add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[0]); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* !_WIN64 */ + +#undef SIMD_COMPARE_TYPE_INDEX + +#endif /* SLJIT_CONFIG_X86 */ + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) + +#include + +typedef union { + unsigned int x; + struct { unsigned char c1, c2, c3, c4; } c; +} int_char; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static SLJIT_INLINE int utf_continue(PCRE2_SPTR s) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return (*s & 0xc0) == 0x80; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return (*s & 0xfc00) == 0xdc00; +#else +#error "Unknown code width" +#endif +} +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +# define VECTOR_FACTOR 16 +# define vect_t uint8x16_t +# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) +# define VCEQQ vceqq_u8 +# define VORRQ vorrq_u8 +# define VST1Q vst1q_u8 +# define VDUPQ vdupq_n_u8 +# define VEXTQ vextq_u8 +# define VANDQ vandq_u8 +typedef union { + uint8_t mem[16]; + uint64_t dw[2]; +} quad_word; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +# define VECTOR_FACTOR 8 +# define vect_t uint16x8_t +# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) +# define VCEQQ vceqq_u16 +# define VORRQ vorrq_u16 +# define VST1Q vst1q_u16 +# define VDUPQ vdupq_n_u16 +# define VEXTQ vextq_u16 +# define VANDQ vandq_u16 +typedef union { + uint16_t mem[8]; + uint64_t dw[2]; +} quad_word; +#else +# define VECTOR_FACTOR 4 +# define vect_t uint32x4_t +# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) +# define VCEQQ vceqq_u32 +# define VORRQ vorrq_u32 +# define VST1Q vst1q_u32 +# define VDUPQ vdupq_n_u32 +# define VEXTQ vextq_u32 +# define VANDQ vandq_u32 +typedef union { + uint32_t mem[4]; + uint64_t dw[2]; +} quad_word; +#endif + +#define FFCS +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS + +#define FFCS_2 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS_2 + +#define FFCS_MASK +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS_MASK + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +int_char ic; +struct sljit_jump *partial_quit, *quit; +/* Save temporary registers. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); + +/* Prepare function arguments */ +OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); +GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0); +OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); + +if (char1 == char2) + { + ic.c.c1 = char1; + ic.c.c2 = char2; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); +#endif + } +else + { + PCRE2_UCHAR mask = char1 ^ char2; + if (is_powerof2(mask)) + { + ic.c.c1 = char1 | mask; + ic.c.c2 = mask; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); +#endif + } + else + { + ic.c.c1 = char1; + ic.c.c2 = char2; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); +#endif + } + } +/* Restore registers. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); + +/* Check return value. */ +partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +/* Fast forward STR_PTR to the result of memchr. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); +if (common->mode != PCRE2_JIT_COMPLETE) + { + quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); + JUMPHERE(partial_quit); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + JUMPHERE(quit); + } +} + +typedef enum { + compare_match1, + compare_match1i, + compare_match2, +} compare_type; + +static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2) +{ +if (ctype == compare_match2) + { + vect_t tmp = dst; + dst = VCEQQ(dst, cmp1); + tmp = VCEQQ(tmp, cmp2); + dst = VORRQ(dst, tmp); + return dst; + } + +if (ctype == compare_match1i) + dst = VORRQ(dst, cmp2); +dst = VCEQQ(dst, cmp1); +return dst; +} + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} + +/* ARM doesn't have a shift left across lanes. */ +static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) +{ +vect_t zero = VDUPQ(0); +SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); +/* VEXTQ takes an immediate as last argument. */ +#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); +switch (n) + { + C(1); C(2); C(3); +#if PCRE2_CODE_UNIT_WIDTH != 32 + C(4); C(5); C(6); C(7); +# if PCRE2_CODE_UNIT_WIDTH != 16 + C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); +# endif +#endif + default: + /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't + happen. The return is still here for compilers to not warn. */ + return a; + } +} + +#define FFCPS +#define FFCPS_DIFF1 +#define FFCPS_CHAR1A2A + +#define FFCPS_0 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS_0 + +#undef FFCPS_CHAR1A2A + +#define FFCPS_1 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS_1 + +#undef FFCPS_DIFF1 + +#define FFCPS_DEFAULT +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u32 diff = IN_UCHARS(offs1 - offs2); +struct sljit_jump *partial_quit; +int_char ic; +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(compiler->scratches == 5); + +/* Save temporary register STR_PTR. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); + +/* Prepare arguments for the function call. */ +if (common->match_end_ptr == 0) + OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); +else + { + OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0); + SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0); + } + +GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0); +OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); +OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); +ic.c.c1 = char1a; +ic.c.c2 = char1b; +ic.c.c3 = char2a; +ic.c.c4 = char2b; +OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +if (diff == 1) { + if (char1a == char1b && char2a == char2b) { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0)); + } else { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1)); + } +} else { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default)); +} + +/* Restore STR_PTR register. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +/* Check return value. */ +partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &common->failed_match, partial_quit); + +/* Fast forward STR_PTR to the result of memchr. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + +JUMPHERE(partial_quit); +} + +#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define VECTOR_ELEMENT_SIZE 0 +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define VECTOR_ELEMENT_SIZE 1 +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define VECTOR_ELEMENT_SIZE 2 +#else +#error "Unsupported unit width" +#endif + +static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, + sljit_s32 base_reg, sljit_s32 index_reg) +{ +sljit_u16 instruction[3]; + +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); +instruction[1] = (sljit_u16)(base_reg << 12); +instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); + +sljit_emit_op_custom(compiler, instruction, 6); +} + +#if PCRE2_CODE_UNIT_WIDTH == 32 + +static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, + PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 1); + +if (chr < 0x7fff) + { + if (step == 1) + return; + + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); + instruction[1] = (sljit_u16)chr; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (step == 0) + { + OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); + + /* VLVG */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +/* VREP */ +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); +instruction[1] = 0; +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); +sljit_emit_op_custom(compiler, instruction, 6); +} + +#endif + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 2); + +if (step == 1) + { + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp1_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (compare_type != vector_compare_match2) + { + if (step == 0 && compare_type == vector_compare_match1i) + { + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + } + return; + } + +switch (step) + { + case 0: + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + + case 2: + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(tmp_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +JUMPHERE(quit); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); +quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + +JUMPHERE(quit); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +return not_found; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *jump[2]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_s32 diff = IN_UCHARS(offs2 - offs1); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +sljit_s32 zero_ind = 8; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); + +if (char1a != char1b) + { + bit1 = char1a ^ char1b; + compare1_type = vector_compare_match1i; + + if (!is_powerof2(bit1)) + { + bit1 = 0; + compare1_type = vector_compare_match2; + } + } + +if (char2a != char2b) + { + bit2 = char2a ^ char2b; + compare2_type = vector_compare_match1i; + + if (!is_powerof2(bit2)) + { + bit2 = 0; + compare2_type = vector_compare_match2; + } + } + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); + SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); +instruction[1] = (sljit_u16)(char1a | bit1); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1a != char1b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); + instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); +instruction[1] = (sljit_u16)(char2a | bit2); +/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ +sljit_emit_op_custom(compiler, instruction, 6); + +if (char2a != char2b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); + instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); + + if (char1a != char1b) + replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); + + replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); + + if (char2a != char2b) + replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); + } + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); +instruction[1] = 0; +instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); +load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); +jump[1] = JUMP(SLJIT_JUMP); +JUMPHERE(jump[0]); +load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); +JUMPHERE(jump[1]); + +load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); +load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); +JUMPTO(SLJIT_OVERFLOW, start); + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +JUMPHERE(quit); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + /* TMP1 contains diff. */ + OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) + +#ifdef __linux__ +/* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */ +#include +#define LOONGARCH_HWCAP_LSX (1 << 4) +#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0) +#else +#define HAS_LSX_SUPPORT 0 +#endif + +typedef sljit_ins sljit_u32; + +#define SI12_IMM_MASK 0x003ffc00 +#define UI5_IMM_MASK 0x00007c00 +#define UI2_IMM_MASK 0x00000c00 + +#define VD(vd) ((sljit_ins)vd << 0) +#define VJ(vj) ((sljit_ins)vj << 5) +#define VK(vk) ((sljit_ins)vk << 10) +#define RD_V(rd) ((sljit_ins)rd << 0) +#define RJ_V(rj) ((sljit_ins)rj << 5) + +#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK) +#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK) +#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK) + +// LSX OPCODES: +#define VLD 0x2c000000 +#define VOR_V 0x71268000 +#define VAND_V 0x71260000 +#define VBSLL_V 0x728e0000 +#define VMSKLTZ_B 0x729c4000 +#define VPICKVE2GR_WU 0x72f3e000 + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define VREPLGR2VR 0x729f0000 +#define VSEQ 0x70000000 +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define VREPLGR2VR 0x729f0400 +#define VSEQ 0x70008000 +#else +#define VREPLGR2VR 0x729f0800 +#define VSEQ 0x70010000 +#endif + +static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + { + /* VOR.V vd, vj, vk */ + push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind)); + } + + /* VSEQ.B/H/W vd, vj, vk */ + push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind)); + return; + } + +/* VBSLL.V vd, vj, ui5 */ +push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0)); + +/* VSEQ.B/H/W vd, vj, vk */ +push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind)); + +/* VSEQ.B/H/W vd, vj, vk */ +push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind)); + +/* VOR vd, vj, vk */ +push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind)); +return; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_u32 bit = 0; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit); + +/* VREPLGR2VR.B/H/W vd, rj */ +push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind)); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2); + + /* VREPLGR2VR.B/H/W vd, rj */ + push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind)); + } + +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); +fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0)); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); +fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0)); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +/* CTZ.W rd, rj */ +push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind)); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_u32 bit = 0; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); +OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit); + +/* VREPLGR2VR vd, rj */ +push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind)); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2); + /* VREPLGR2VR vd, rj */ + push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind)); + } + +OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); +fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0)); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); +fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0)); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +/* CTZ.W rd, rj */ +push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind)); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); +return not_found; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_u32 diff = IN_UCHARS(offs1 - offs2); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *jump[2]; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset())); + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); + SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +if (char1a == char1b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a); +else + { + bit1 = char1a ^ char1b; + if (is_powerof2(bit1)) + { + compare1_type = vector_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1); + } + else + { + compare1_type = vector_compare_match2; + bit1 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b); + } + } + +/* VREPLGR2VR vd, rj */ +push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind)); + +if (char1a != char1b) + { + /* VREPLGR2VR vd, rj */ + push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind)); + } + +if (char2a == char2b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a); +else + { + bit2 = char2a ^ char2b; + if (is_powerof2(bit2)) + { + compare2_type = vector_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2); + } + else + { + compare2_type = vector_compare_match2; + bit2 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b); + } + } + +/* VREPLGR2VR vd, rj */ +push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind)); + +if (char2a != char2b) + { + /* VREPLGR2VR vd, rj */ + push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind)); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); + +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff)); +jump[1] = JUMP(SLJIT_JUMP); + +JUMPHERE(jump[0]); + +/* VBSLL.V vd, vj, ui5 */ +push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff)); + +JUMPHERE(jump[1]); + +fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); +fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + +/* VAND vd, vj, vk */ +push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind)); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0)); + +/* Ignore matches before the first STR_PTR. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +/* VLD vd, rj, si12 */ +push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0)); +push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff)); + +fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); +fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); + +/* VAND.V vd, vj, vk */ +push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind)); + +/* VMSKLTZ.B vd, vj */ +push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind)); + +/* VPICKVE2GR.WU rd, vj, ui2 */ +push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0)); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(jump[0]); + +/* CTZ.W rd, rj */ +push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind)); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + jump[0] = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); + + add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[0]); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* SLJIT_CONFIG_LOONGARCH_64 */ + +#endif /* !SUPPORT_VALGRIND */ diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c new file mode 100644 index 0000000..6d95bb9 --- /dev/null +++ b/src/pcre2_jit_test.c @@ -0,0 +1,2528 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#define PCRE2_CODE_UNIT_WIDTH 0 +#include "pcre2.h" + +/* + Letter characters: + \xe6\x92\xad = 0x64ad = 25773 (kanji) + Non-letter characters: + \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark) + \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888 + \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character) + \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character) + Newlines: + \xc2\x85 = 0x85 = 133 (NExt Line = NEL) + \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator) + Othercase pairs: + \xc3\xa9 = 0xe9 = 233 (e') + \xc3\x89 = 0xc9 = 201 (E') + \xc3\xa1 = 0xe1 = 225 (a') + \xc3\x81 = 0xc1 = 193 (A') + \x53 = 0x53 = S + \x73 = 0x73 = s + \xc5\xbf = 0x17f = 383 (long S) + \xc8\xba = 0x23a = 570 + \xe2\xb1\xa5 = 0x2c65 = 11365 + \xe1\xbd\xb8 = 0x1f78 = 8056 + \xe1\xbf\xb8 = 0x1ff8 = 8184 + \xf0\x90\x90\x80 = 0x10400 = 66560 + \xf0\x90\x90\xa8 = 0x10428 = 66600 + \xc7\x84 = 0x1c4 = 452 + \xc7\x85 = 0x1c5 = 453 + \xc7\x86 = 0x1c6 = 454 + Caseless sets: + ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586} + ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1 + ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a} + + Mark property: + \xcc\x8d = 0x30d = 781 + Special: + \xc2\x80 = 0x80 = 128 (lowest 2 byte character) + \xdf\xbf = 0x7ff = 2047 (highest 2 byte character) + \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character) + \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character) + \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character) + \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character) +*/ + +static int regression_tests(void); +static int invalid_utf8_regression_tests(void); +static int invalid_utf16_regression_tests(void); +static int invalid_utf32_regression_tests(void); + +int main(void) +{ + int jit = 0; +#if defined SUPPORT_PCRE2_8 + pcre2_config_8(PCRE2_CONFIG_JIT, &jit); +#elif defined SUPPORT_PCRE2_16 + pcre2_config_16(PCRE2_CONFIG_JIT, &jit); +#elif defined SUPPORT_PCRE2_32 + pcre2_config_32(PCRE2_CONFIG_JIT, &jit); +#endif + if (!jit) { + printf("JIT must be enabled to run pcre2_jit_test\n"); + return 1; + } + return regression_tests() + | invalid_utf8_regression_tests() + | invalid_utf16_regression_tests() + | invalid_utf32_regression_tests(); +} + +/* --------------------------------------------------------------------------------------- */ + +#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32) +#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined +#endif + +#define MU (PCRE2_MULTILINE | PCRE2_UTF) +#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) +#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF) +#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) +#define M (PCRE2_MULTILINE) +#define MP (PCRE2_MULTILINE | PCRE2_UCP) +#define U (PCRE2_UTF) +#define CM (PCRE2_CASELESS | PCRE2_MULTILINE) + +#define BSR(x) ((x) << 16) +#define A PCRE2_NEWLINE_ANYCRLF + +#define GET_NEWLINE(x) ((x) & 0xffff) +#define GET_BSR(x) ((x) >> 16) + +#define OFFSET_MASK 0x00ffff +#define F_NO8 0x010000 +#define F_NO16 0x020000 +#define F_NO32 0x020000 +#define F_NOMATCH 0x040000 +#define F_DIFF 0x080000 +#define F_FORCECONV 0x100000 +#define F_PROPERTY 0x200000 + +struct regression_test_case { + uint32_t compile_options; + int newline; + int match_options; + int start_offset; + const char *pattern; + const char *input; +}; + +static struct regression_test_case regression_test_cases[] = { + /* Constant strings. */ + { MU, A, 0, 0, "AbC", "AbAbC" }, + { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" }, + { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" }, + { M, A, 0, 0, "[^a]", "aAbB" }, + { CM, A, 0, 0, "[^m]", "mMnN" }, + { M, A, 0, 0, "a[^b][^#]", "abacd" }, + { CM, A, 0, 0, "A[^B][^E]", "abacd" }, + { CMU, A, 0, 0, "[^x][^#]", "XxBll" }, + { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" }, + { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" }, + { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" }, + { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" }, + { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" }, + { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" }, + { MU, A, 0, 0, "[axd]", "sAXd" }, + { CMU, A, 0, 0, "[axd]", "sAXd" }, + { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" }, + { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" }, + { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" }, + { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, + { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, + { MU, A, 0, 0, "[^a]", "\xc2\x80[]" }, + { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" }, + { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" }, + { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" }, + { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" }, + { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" }, +#ifndef NEVER_BACKSLASH_C + { M, A, 0, 0, "\\Ca", "cda" }, + { CM, A, 0, 0, "\\Ca", "CDA" }, + { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" }, + { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" }, +#endif /* !NEVER_BACKSLASH_C */ + { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, + { M, A, 0, 0, "[3-57-9]", "5" }, + { PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", + "12345678901234567890123456789012345678901234567890123456789012345678901234567890" }, + { 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" }, + { 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" }, + + /* Assertions. */ + { MU, A, 0, 0, "\\b[^A]", "A_B#" }, + { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" }, + { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" }, + { MP, A, 0, 0, "\\B", "_\xa1" }, + { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," }, + { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" }, + { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" }, + { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" }, + { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, + { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" }, + { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" }, + { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" }, + { M, A, 0, 1 | F_NOMATCH, "^", "\n" }, + { 0, 0, 0, 0, "^ab", "ab" }, + { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" }, + { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" }, + { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" }, + { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" }, + { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" }, + { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" }, + { 0, 0, 0, 0, "ab$", "ab" }, + { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" }, + { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" }, + { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" }, + { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" }, + { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" }, + { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" }, + { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" }, + { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" }, + { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" }, + { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" }, + { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" }, + { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" }, + { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" }, + { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" }, + { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" }, + { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" }, + { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" }, + { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, + { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" }, + { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, + { M, A, 0, 0, "\\Aa", "aaa" }, + { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" }, + { M, A, 0, 1, "\\Ga", "aaa" }, + { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" }, + { M, A, 0, 0, "a\\z", "aaa" }, + { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" }, + + /* Brackets and alternatives. */ + { MU, A, 0, 0, "(ab|bb|cd)", "bacde" }, + { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" }, + { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" }, + { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" }, + { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" }, + { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" }, + { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, + { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" }, + { U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" }, + { U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" }, + { CM, A, 0, 0, "ab|cd", "CD" }, + { CM, A, 0, 0, "a1277|a1377|bX487", "bx487" }, + { CM, A, 0, 0, "a1277|a1377|bx487", "bX487" }, + { 0, A, 0, 0, "(a|)b*+a", "a" }, + { 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" }, + + /* Greedy and non-greedy ? operators. */ + { MU, A, 0, 0, "(?:a)?a", "laab" }, + { CMU, A, 0, 0, "(A)?A", "llaab" }, + { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */ + { MU, A, 0, 0, "(a)?a", "manm" }, + { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, + { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" }, + { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, + + /* Greedy and non-greedy + operators */ + { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" }, + { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" }, + { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" }, + { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" }, + { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, + { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, + { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" }, + { MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" }, + + /* Greedy and non-greedy * operators */ + { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" }, + { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" }, + { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" }, + { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" }, + { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" }, + { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" }, + { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" }, + { M, A, 0, 0, "((?:a|)*){0}a", "a" }, + + /* Combining ? + * operators */ + { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" }, + { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" }, + { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" }, + { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" }, + { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" }, + + /* Single character iterators. */ + { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" }, + { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" }, + { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" }, + { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" }, + { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" }, + { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" }, + { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" }, + { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" }, + { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" }, + { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" }, + { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" }, + { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" }, + { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" }, + { CMU, A, 0, 0, "[c-f]+k", "DemmFke" }, + { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" }, + { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" }, + { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" }, + { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" }, + { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" }, + { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" }, + { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" }, + { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" }, + { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" }, + { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" }, + { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" }, + { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, + { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, + { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, + { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" }, + { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, + { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, + { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, + { MU, A, 0, 0, "\\d+123", "987654321,01234" }, + { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" }, + { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" }, + { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."}, + { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."}, + { MU, A, 0, 0, ".[ab]*.", "xx" }, + { MU, A, 0, 0, ".[ab]*a", "xxa" }, + { MU, A, 0, 0, ".[ab]?.", "xx" }, + { MU, A, 0, 0, "_[ab]+_*a", "_aa" }, + { MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" }, + { MU, A, 0, 0, "(?P\\d+)m|M", "4M" }, + { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" }, + { 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "
" }, + + /* Bracket repeats with limit. */ + { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, + { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" }, + { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" }, + { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" }, + { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" }, + { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" }, + { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" }, + { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" }, + { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" }, + + /* Basic character sets. */ + { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, + { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" }, + { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" }, + { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" }, + { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" }, + { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" }, + { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" }, + { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" }, + { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" }, + { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" }, + { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" }, + { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" }, + { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" }, + { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" }, + { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" }, + { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" }, + { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" }, + { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" }, + { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" }, + { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" }, + { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" }, + { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" }, + { CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " }, + { M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" }, + + /* Unicode properties. */ + { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" }, + { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" }, + { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, + { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, + { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, + { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, + { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" }, + { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" }, + { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" }, + { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" }, + { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" }, + { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" }, + { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, + { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, + { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" }, + { MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " }, + { MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, + { MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, + { CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" }, + { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" }, + { MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" }, + { MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" }, + + /* Possible empty brackets. */ + { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, + { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" }, + { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" }, + { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" }, + { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" }, + { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" }, + { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" }, + { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" }, + { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" }, + { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" }, + + /* Start offset. */ + { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" }, + { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, + { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, + { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" }, + + /* Newline. */ + { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, + { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, + { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." }, + { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" }, + { MU, A, 0, 1, "^", "\r\n" }, + { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" }, + { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" }, + + /* Any character except newline or any newline. */ + { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, + { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" }, + { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, + { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" }, + { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" }, + { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" }, + { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" }, + { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" }, + { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" }, + { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" }, + { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" }, + { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" }, + { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" }, + { MU, A, 0, 0, "\\R+", "ab\r\n\r" }, + { MU, A, 0, 0, "\\R*", "ab\r\n\r" }, + { MU, A, 0, 0, "\\R*", "\r\n\r" }, + { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" }, + { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" }, + { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" }, + { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" }, + { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" }, + { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" }, + { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" }, + { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" }, + { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" }, + + /* Atomic groups (no fallback from "next" direction). */ + { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" }, + { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" }, + { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op", + "bababcdedefgheijijklmlmnop" }, + { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" }, + { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" }, + { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" }, + { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" }, + { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" }, + { MU, A, 0, 0, "(?>x|)*$", "aaa" }, + { MU, A, 0, 0, "(?>(x)|)*$", "aaa" }, + { MU, A, 0, 0, "(?>x|())*$", "aaa" }, + { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" }, + { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" }, + { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" }, + { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" }, + { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" }, + { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" }, + { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" }, + { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" }, + { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" }, + { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" }, + { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" }, + { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" }, + { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" }, + { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" }, + { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" }, + { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, + { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" }, + { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" }, + { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" }, + { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" }, + { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" }, + { MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" }, + + /* Possessive quantifiers. */ + { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" }, + { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" }, + { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" }, + { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" }, + { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" }, + { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" }, + { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" }, + { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" }, + { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" }, + { MU, A, 0, 0, "(A)*+$", "ABC" }, + { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" }, + { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" }, + { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" }, + { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, + { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, + + /* Back references. */ + { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" }, + { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" }, + { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" }, + { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" }, + { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" }, + { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, + { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, + { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" }, + { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" }, + { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" }, + { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" }, + { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" }, + { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, + { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, + { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" }, + { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" }, + { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" }, + { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, + { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." }, + { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" }, + { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" }, + { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, + { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" }, + { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k{1,3}(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k{1,3}(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k*(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?aa)(?bb)\\k{0,3}aaaaaa", "aabbaaaaaa" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?aa)(?bb)\\k{2,5}bb", "aabbaaaabb" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?aa)|(?bb))\\k{0,3}m", "aaaaaaaabbbbaabbbbm" }, + { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k{1,3}?(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k{1,3}?(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k*?(?aa)(?bb)", "aabb" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?aa)|(?bb))\\k{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?aa)|(?bb))\\k*?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, + { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?aa)|(?bb))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}M", "aaaaaaaabbbbaabbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{1,3}M", "aaaaaaaabbbbaabbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, + { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?AA)|(?BB))\\k{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, + { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" }, + { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" }, + + /* Assertions. */ + { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, + { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" }, + { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" }, + { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, + { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, + { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, + { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, + { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" }, + { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" }, + { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" }, + { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" }, + { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" }, + { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" }, + { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" }, + { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" }, + { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" }, + { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" }, + { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" }, + { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, + { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, + { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" }, + { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" }, + { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" }, + { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" }, + { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" }, + { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" }, + { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" }, + { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" }, + { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" }, + { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" }, + { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, + { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, + { MU, A, 0, 0, "a(?=)b", "ab" }, + { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" }, + { MU, A, 0, 0, "(?(?a)?(?Pb)?(?(Name)c|d)*l", "bc ddd abccabccl" }, + { MU, A, 0, 0, "(?Pa)?(?Pb)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" }, + { MU, A, 0, 0, "(?Pa)?(?Pb)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" }, + { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" }, + { MU, A, 0, 0, "(?(?!)a|b)", "ab" }, + { MU, A, 0, 0, "(?(?!)a)", "ab" }, + { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" }, + + /* Set start of match. */ + { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" }, + { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" }, + { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" }, + { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" }, + { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" }, + + /* First line. */ + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" }, + { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" }, + { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" }, + { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" }, + { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" }, + { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" }, + { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" }, + + /* Recurse. */ + { MU, A, 0, 0, "(a)(?1)", "aa" }, + { MU, A, 0, 0, "((a))(?1)", "aa" }, + { MU, A, 0, 0, "(b|a)(?1)", "aa" }, + { MU, A, 0, 0, "(b|(a))(?1)", "aa" }, + { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" }, + { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" }, + { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" }, + { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" }, + { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" }, + { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" }, + { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" }, + { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" }, + { MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" }, + { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" }, + { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" }, + { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" }, + { MU, A, 0, 0, "b|<(?R)*>", "<" }, + { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" }, + { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" }, + { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" }, + { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" }, + { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" }, + { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" }, + { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" }, + { MU, A, 0, 0, "(?Pa(?(R&Name)a|b))(?1)", "aab abb abaa" }, + { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" }, + { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" }, + { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" }, + { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" }, + { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" }, + { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" }, + { MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" }, + + /* 16 bit specific tests. */ + { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" }, + { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" }, + { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" }, + { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" }, + { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" }, + { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" }, + { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" }, + { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" }, + { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" }, + { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, + { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, + { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, + { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" }, + { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" }, + { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" }, + { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" }, + { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" }, + { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" }, + { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" }, + { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" }, + { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" }, + { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" }, + { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" }, + { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" }, + { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" }, + + /* Partial matching. */ + { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" }, + { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" }, + { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" }, + + /* (*MARK) verb. */ + { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" }, + { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" }, + { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" }, + { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" }, + { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" }, + { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" }, + { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" }, + { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, + { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, + { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, + { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, + { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, + { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, + { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, + { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" }, + + /* (*COMMIT) verb. */ + { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, + { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" }, + { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" }, + { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" }, + { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" }, + { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" }, + + /* (*PRUNE) verb. */ + { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" }, + { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" }, + { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" }, + { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" }, + { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" }, + { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" }, + { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" }, + { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" }, + { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, + { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, + { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, + { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, + + /* (*SKIP) verb. */ + { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, + { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, + { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, + { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" }, + + /* (*THEN) verb. */ + { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, + { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" }, + { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" }, + { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" }, + { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" }, + { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" }, + { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" }, + { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" }, + { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" }, + { MU, A, 0, 0, "(?=(*THEN: ))* ", " " }, + { MU, A, 0, 0, "a(*THEN)(?R) |", "a" }, + { MU, A, 0, 0 | F_NOMATCH, "(?\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, + { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " }, + { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" }, + + /* Deep recursion: Stack limit reached. */ + { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" }, + { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, + { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, + { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, + { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, + + { 0, 0, 0, 0, NULL, NULL } +}; + +#ifdef SUPPORT_PCRE2_8 +static pcre2_jit_stack_8* callback8(void *arg) +{ + return (pcre2_jit_stack_8 *)arg; +} +#endif + +#ifdef SUPPORT_PCRE2_16 +static pcre2_jit_stack_16* callback16(void *arg) +{ + return (pcre2_jit_stack_16 *)arg; +} +#endif + +#ifdef SUPPORT_PCRE2_32 +static pcre2_jit_stack_32* callback32(void *arg) +{ + return (pcre2_jit_stack_32 *)arg; +} +#endif + +#ifdef SUPPORT_PCRE2_8 +static pcre2_jit_stack_8 *stack8; + +static pcre2_jit_stack_8 *getstack8(void) +{ + if (!stack8) + stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL); + return stack8; +} + +static void setstack8(pcre2_match_context_8 *mcontext) +{ + if (!mcontext) { + if (stack8) + pcre2_jit_stack_free_8(stack8); + stack8 = NULL; + return; + } + + pcre2_jit_stack_assign_8(mcontext, callback8, getstack8()); +} +#endif /* SUPPORT_PCRE2_8 */ + +#ifdef SUPPORT_PCRE2_16 +static pcre2_jit_stack_16 *stack16; + +static pcre2_jit_stack_16 *getstack16(void) +{ + if (!stack16) + stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL); + return stack16; +} + +static void setstack16(pcre2_match_context_16 *mcontext) +{ + if (!mcontext) { + if (stack16) + pcre2_jit_stack_free_16(stack16); + stack16 = NULL; + return; + } + + pcre2_jit_stack_assign_16(mcontext, callback16, getstack16()); +} +#endif /* SUPPORT_PCRE2_16 */ + +#ifdef SUPPORT_PCRE2_32 +static pcre2_jit_stack_32 *stack32; + +static pcre2_jit_stack_32 *getstack32(void) +{ + if (!stack32) + stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL); + return stack32; +} + +static void setstack32(pcre2_match_context_32 *mcontext) +{ + if (!mcontext) { + if (stack32) + pcre2_jit_stack_free_32(stack32); + stack32 = NULL; + return; + } + + pcre2_jit_stack_assign_32(mcontext, callback32, getstack32()); +} +#endif /* SUPPORT_PCRE2_32 */ + +#ifdef SUPPORT_PCRE2_16 + +static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length) +{ + PCRE2_SPTR8 iptr = input; + PCRE2_UCHAR16 *optr = output; + unsigned int c; + + if (max_length == 0) + return 0; + + while (*iptr && max_length > 1) { + c = 0; + if (offsetmap) + *offsetmap++ = (int)(iptr - (unsigned char*)input); + + if (*iptr < 0xc0) + c = *iptr++; + else if (!(*iptr & 0x20)) { + c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); + iptr += 2; + } else if (!(*iptr & 0x10)) { + c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); + iptr += 3; + } else if (!(*iptr & 0x08)) { + c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); + iptr += 4; + } + + if (c < 65536) { + *optr++ = c; + max_length--; + } else if (max_length <= 2) { + *optr = '\0'; + return (int)(optr - output); + } else { + c -= 0x10000; + *optr++ = 0xd800 | ((c >> 10) & 0x3ff); + *optr++ = 0xdc00 | (c & 0x3ff); + max_length -= 2; + if (offsetmap) + offsetmap++; + } + } + if (offsetmap) + *offsetmap = (int)(iptr - (unsigned char*)input); + *optr = '\0'; + return (int)(optr - output); +} + +static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length) +{ + PCRE2_SPTR8 iptr = input; + PCRE2_UCHAR16 *optr = output; + + if (max_length == 0) + return 0; + + while (*iptr && max_length > 1) { + *optr++ = *iptr++; + max_length--; + } + *optr = '\0'; + return (int)(optr - output); +} + +#define REGTEST_MAX_LENGTH16 4096 +static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16]; +static int regtest_offsetmap16[REGTEST_MAX_LENGTH16]; + +#endif /* SUPPORT_PCRE2_16 */ + +#ifdef SUPPORT_PCRE2_32 + +static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length) +{ + PCRE2_SPTR8 iptr = input; + PCRE2_UCHAR32 *optr = output; + unsigned int c; + + if (max_length == 0) + return 0; + + while (*iptr && max_length > 1) { + c = 0; + if (offsetmap) + *offsetmap++ = (int)(iptr - (unsigned char*)input); + + if (*iptr < 0xc0) + c = *iptr++; + else if (!(*iptr & 0x20)) { + c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); + iptr += 2; + } else if (!(*iptr & 0x10)) { + c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); + iptr += 3; + } else if (!(*iptr & 0x08)) { + c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); + iptr += 4; + } + + *optr++ = c; + max_length--; + } + if (offsetmap) + *offsetmap = (int)(iptr - (unsigned char*)input); + *optr = 0; + return (int)(optr - output); +} + +static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length) +{ + PCRE2_SPTR8 iptr = input; + PCRE2_UCHAR32 *optr = output; + + if (max_length == 0) + return 0; + + while (*iptr && max_length > 1) { + *optr++ = *iptr++; + max_length--; + } + *optr = '\0'; + return (int)(optr - output); +} + +#define REGTEST_MAX_LENGTH32 4096 +static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32]; +static int regtest_offsetmap32[REGTEST_MAX_LENGTH32]; + +#endif /* SUPPORT_PCRE2_32 */ + +static int check_ascii(const char *input) +{ + const unsigned char *ptr = (unsigned char *)input; + while (*ptr) { + if (*ptr > 127) + return 0; + ptr++; + } + return 1; +} + +#define OVECTOR_SIZE 15 + +static int regression_tests(void) +{ + struct regression_test_case *current = regression_test_cases; + int error; + PCRE2_SIZE err_offs; + int is_successful; + int is_ascii; + int total = 0; + int successful = 0; + int successful_row = 0; + int counter = 0; + int jit_compile_mode; + int utf = 0; + uint32_t disabled_options = 0; + int i; +#ifdef SUPPORT_PCRE2_8 + pcre2_code_8 *re8; + pcre2_compile_context_8 *ccontext8; + pcre2_match_data_8 *mdata8_1; + pcre2_match_data_8 *mdata8_2; + pcre2_match_context_8 *mcontext8; + PCRE2_SIZE *ovector8_1 = NULL; + PCRE2_SIZE *ovector8_2 = NULL; + int return_value8[2]; +#endif +#ifdef SUPPORT_PCRE2_16 + pcre2_code_16 *re16; + pcre2_compile_context_16 *ccontext16; + pcre2_match_data_16 *mdata16_1; + pcre2_match_data_16 *mdata16_2; + pcre2_match_context_16 *mcontext16; + PCRE2_SIZE *ovector16_1 = NULL; + PCRE2_SIZE *ovector16_2 = NULL; + int return_value16[2]; + int length16; +#endif +#ifdef SUPPORT_PCRE2_32 + pcre2_code_32 *re32; + pcre2_compile_context_32 *ccontext32; + pcre2_match_data_32 *mdata32_1; + pcre2_match_data_32 *mdata32_2; + pcre2_match_context_32 *mcontext32; + PCRE2_SIZE *ovector32_1 = NULL; + PCRE2_SIZE *ovector32_2 = NULL; + int return_value32[2]; + int length32; +#endif + +#if defined SUPPORT_PCRE2_8 + PCRE2_UCHAR8 cpu_info[128]; +#elif defined SUPPORT_PCRE2_16 + PCRE2_UCHAR16 cpu_info[128]; +#elif defined SUPPORT_PCRE2_32 + PCRE2_UCHAR32 cpu_info[128]; +#endif +#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) + int return_value; +#endif + + /* This test compares the behaviour of interpreter and JIT. Although disabling + utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is + still considered successful from pcre2_jit_test point of view. */ + +#if defined SUPPORT_PCRE2_8 + pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info); +#elif defined SUPPORT_PCRE2_16 + pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info); +#elif defined SUPPORT_PCRE2_32 + pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info); +#endif + + printf("Running JIT regression tests\n"); + printf(" target CPU of SLJIT compiler: "); + for (i = 0; cpu_info[i]; i++) + printf("%c", (char)(cpu_info[i])); + printf("\n"); + +#if defined SUPPORT_PCRE2_8 + pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf); +#elif defined SUPPORT_PCRE2_16 + pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf); +#elif defined SUPPORT_PCRE2_32 + pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf); +#endif + + if (!utf) + disabled_options |= PCRE2_UTF; +#ifdef SUPPORT_PCRE2_8 + printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled"); +#endif +#ifdef SUPPORT_PCRE2_16 + printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled"); +#endif +#ifdef SUPPORT_PCRE2_32 + printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled"); +#endif + + while (current->pattern) { + /* printf("\nPattern: %s :\n", current->pattern); */ + total++; + is_ascii = 0; + if (!(current->start_offset & F_PROPERTY)) + is_ascii = check_ascii(current->pattern) && check_ascii(current->input); + + if (current->match_options & PCRE2_PARTIAL_SOFT) + jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT; + else if (current->match_options & PCRE2_PARTIAL_HARD) + jit_compile_mode = PCRE2_JIT_PARTIAL_HARD; + else + jit_compile_mode = PCRE2_JIT_COMPLETE; + error = 0; +#ifdef SUPPORT_PCRE2_8 + re8 = NULL; + ccontext8 = pcre2_compile_context_create_8(NULL); + if (ccontext8) { + if (GET_NEWLINE(current->newline)) + pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline)); + if (GET_BSR(current->newline)) + pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline)); + + if (!(current->start_offset & F_NO8)) { + re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED, + current->compile_options & ~disabled_options, + &error, &err_offs, ccontext8); + + if (!re8 && (utf || is_ascii)) + printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); + } + pcre2_compile_context_free_8(ccontext8); + } + else + printf("\n8 bit: Cannot allocate compile context\n"); +#endif +#ifdef SUPPORT_PCRE2_16 + if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) + convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16); + else + copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16); + + re16 = NULL; + ccontext16 = pcre2_compile_context_create_16(NULL); + if (ccontext16) { + if (GET_NEWLINE(current->newline)) + pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline)); + if (GET_BSR(current->newline)) + pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline)); + + if (!(current->start_offset & F_NO16)) { + re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED, + current->compile_options & ~disabled_options, + &error, &err_offs, ccontext16); + + if (!re16 && (utf || is_ascii)) + printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); + } + pcre2_compile_context_free_16(ccontext16); + } + else + printf("\n16 bit: Cannot allocate compile context\n"); +#endif +#ifdef SUPPORT_PCRE2_32 + if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) + convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32); + else + copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32); + + re32 = NULL; + ccontext32 = pcre2_compile_context_create_32(NULL); + if (ccontext32) { + if (GET_NEWLINE(current->newline)) + pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline)); + if (GET_BSR(current->newline)) + pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline)); + + if (!(current->start_offset & F_NO32)) { + re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED, + current->compile_options & ~disabled_options, + &error, &err_offs, ccontext32); + + if (!re32 && (utf || is_ascii)) + printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); + } + pcre2_compile_context_free_32(ccontext32); + } + else + printf("\n32 bit: Cannot allocate compile context\n"); +#endif + + counter++; + if ((counter & 0x3) != 0) { +#ifdef SUPPORT_PCRE2_8 + setstack8(NULL); +#endif +#ifdef SUPPORT_PCRE2_16 + setstack16(NULL); +#endif +#ifdef SUPPORT_PCRE2_32 + setstack32(NULL); +#endif + } + +#ifdef SUPPORT_PCRE2_8 + return_value8[0] = -1000; + return_value8[1] = -1000; + mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); + mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); + mcontext8 = pcre2_match_context_create_8(NULL); + if (!mdata8_1 || !mdata8_2 || !mcontext8) { + printf("\n8 bit: Cannot allocate match data\n"); + pcre2_match_data_free_8(mdata8_1); + pcre2_match_data_free_8(mdata8_2); + pcre2_match_context_free_8(mcontext8); + pcre2_code_free_8(re8); + re8 = NULL; + } else { + ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1); + ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector8_1[i] = (PCRE2_SIZE)(-2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector8_2[i] = (PCRE2_SIZE)(-2); + pcre2_set_match_limit_8(mcontext8, 10000000); + } + if (re8) { + return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), + current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8); + + if (pcre2_jit_compile_8(re8, jit_compile_mode)) { + printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern); + } else if ((counter & 0x1) != 0) { + setstack8(mcontext8); + return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), + current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); + } else { + pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8()); + return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), + current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); + } + } +#endif + +#ifdef SUPPORT_PCRE2_16 + return_value16[0] = -1000; + return_value16[1] = -1000; + mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); + mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); + mcontext16 = pcre2_match_context_create_16(NULL); + if (!mdata16_1 || !mdata16_2 || !mcontext16) { + printf("\n16 bit: Cannot allocate match data\n"); + pcre2_match_data_free_16(mdata16_1); + pcre2_match_data_free_16(mdata16_2); + pcre2_match_context_free_16(mcontext16); + pcre2_code_free_16(re16); + re16 = NULL; + } else { + ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1); + ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector16_1[i] = (PCRE2_SIZE)(-2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector16_2[i] = (PCRE2_SIZE)(-2); + pcre2_set_match_limit_16(mcontext16, 10000000); + } + if (re16) { + if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) + length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16); + else + length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16); + + return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16, + current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16); + + if (pcre2_jit_compile_16(re16, jit_compile_mode)) { + printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern); + } else if ((counter & 0x1) != 0) { + setstack16(mcontext16); + return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16, + current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); + } else { + pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16()); + return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16, + current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); + } + } +#endif + +#ifdef SUPPORT_PCRE2_32 + return_value32[0] = -1000; + return_value32[1] = -1000; + mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); + mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); + mcontext32 = pcre2_match_context_create_32(NULL); + if (!mdata32_1 || !mdata32_2 || !mcontext32) { + printf("\n32 bit: Cannot allocate match data\n"); + pcre2_match_data_free_32(mdata32_1); + pcre2_match_data_free_32(mdata32_2); + pcre2_match_context_free_32(mcontext32); + pcre2_code_free_32(re32); + re32 = NULL; + } else { + ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1); + ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector32_1[i] = (PCRE2_SIZE)(-2); + for (i = 0; i < OVECTOR_SIZE * 2; ++i) + ovector32_2[i] = (PCRE2_SIZE)(-2); + pcre2_set_match_limit_32(mcontext32, 10000000); + } + if (re32) { + if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) + length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32); + else + length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32); + + return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32, + current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32); + + if (pcre2_jit_compile_32(re32, jit_compile_mode)) { + printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern); + } else if ((counter & 0x1) != 0) { + setstack32(mcontext32); + return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32, + current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); + } else { + pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32()); + return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32, + current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); + } + } +#endif + + /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s", + return_value8[0], return_value16[0], return_value32[0], + (int)ovector8_1[0], (int)ovector8_1[1], + (int)ovector16_1[0], (int)ovector16_1[1], + (int)ovector32_1[0], (int)ovector32_1[1], + (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */ + + /* If F_DIFF is set, just run the test, but do not compare the results. + Segfaults can still be captured. */ + + is_successful = 1; + if (!(current->start_offset & F_DIFF)) { +#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) + if (!(current->start_offset & F_FORCECONV)) { + + /* All results must be the same. */ +#ifdef SUPPORT_PCRE2_8 + if ((return_value = return_value8[0]) != return_value8[1]) { + printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n", + return_value8[0], return_value8[1], total, current->pattern, current->input); + is_successful = 0; + } else +#endif +#ifdef SUPPORT_PCRE2_16 + if ((return_value = return_value16[0]) != return_value16[1]) { + printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n", + return_value16[0], return_value16[1], total, current->pattern, current->input); + is_successful = 0; + } else +#endif +#ifdef SUPPORT_PCRE2_32 + if ((return_value = return_value32[0]) != return_value32[1]) { + printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n", + return_value32[0], return_value32[1], total, current->pattern, current->input); + is_successful = 0; + } else +#endif +#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 + if (return_value8[0] != return_value16[0]) { + printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n", + return_value8[0], return_value16[0], + total, current->pattern, current->input); + is_successful = 0; + } else +#endif +#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 + if (return_value8[0] != return_value32[0]) { + printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n", + return_value8[0], return_value32[0], + total, current->pattern, current->input); + is_successful = 0; + } else +#endif +#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 + if (return_value16[0] != return_value32[0]) { + printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n", + return_value16[0], return_value32[0], + total, current->pattern, current->input); + is_successful = 0; + } else +#endif + if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) { + if (return_value == PCRE2_ERROR_PARTIAL) { + return_value = 2; + } else { + return_value *= 2; + } +#ifdef SUPPORT_PCRE2_8 + return_value8[0] = return_value; +#endif +#ifdef SUPPORT_PCRE2_16 + return_value16[0] = return_value; +#endif +#ifdef SUPPORT_PCRE2_32 + return_value32[0] = return_value; +#endif + /* Transform back the results. */ + if (current->compile_options & PCRE2_UTF) { +#ifdef SUPPORT_PCRE2_16 + for (i = 0; i < return_value; ++i) { + if (ovector16_1[i] != PCRE2_UNSET) + ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]]; + if (ovector16_2[i] != PCRE2_UNSET) + ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]]; + } +#endif +#ifdef SUPPORT_PCRE2_32 + for (i = 0; i < return_value; ++i) { + if (ovector32_1[i] != PCRE2_UNSET) + ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]]; + if (ovector32_2[i] != PCRE2_UNSET) + ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]]; + } +#endif + } + + for (i = 0; i < return_value; ++i) { +#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 + if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { + printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n", + i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i], + total, current->pattern, current->input); + is_successful = 0; + } +#endif +#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 + if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) { + printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", + i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i], + total, current->pattern, current->input); + is_successful = 0; + } +#endif +#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 + if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) { + printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", + i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i], + total, current->pattern, current->input); + is_successful = 0; + } +#endif + } + } + } else +#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */ + { +#ifdef SUPPORT_PCRE2_8 + if (return_value8[0] != return_value8[1]) { + printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", + return_value8[0], return_value8[1], total, current->pattern, current->input); + is_successful = 0; + } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) { + if (return_value8[0] == PCRE2_ERROR_PARTIAL) + return_value8[0] = 2; + else + return_value8[0] *= 2; + + for (i = 0; i < return_value8[0]; ++i) + if (ovector8_1[i] != ovector8_2[i]) { + printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", + i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input); + is_successful = 0; + } + } +#endif + +#ifdef SUPPORT_PCRE2_16 + if (return_value16[0] != return_value16[1]) { + printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", + return_value16[0], return_value16[1], total, current->pattern, current->input); + is_successful = 0; + } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) { + if (return_value16[0] == PCRE2_ERROR_PARTIAL) + return_value16[0] = 2; + else + return_value16[0] *= 2; + + for (i = 0; i < return_value16[0]; ++i) + if (ovector16_1[i] != ovector16_2[i]) { + printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", + i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input); + is_successful = 0; + } + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (return_value32[0] != return_value32[1]) { + printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", + return_value32[0], return_value32[1], total, current->pattern, current->input); + is_successful = 0; + } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) { + if (return_value32[0] == PCRE2_ERROR_PARTIAL) + return_value32[0] = 2; + else + return_value32[0] *= 2; + + for (i = 0; i < return_value32[0]; ++i) + if (ovector32_1[i] != ovector32_2[i]) { + printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", + i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input); + is_successful = 0; + } + } +#endif + } + } + + if (is_successful) { +#ifdef SUPPORT_PCRE2_8 + if (!(current->start_offset & F_NO8) && (utf || is_ascii)) { + if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) { + printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + + if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) { + printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + } +#endif +#ifdef SUPPORT_PCRE2_16 + if (!(current->start_offset & F_NO16) && (utf || is_ascii)) { + if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) { + printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + + if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) { + printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + } +#endif +#ifdef SUPPORT_PCRE2_32 + if (!(current->start_offset & F_NO32) && (utf || is_ascii)) { + if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) { + printf("32 bit: Test should match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + + if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) { + printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } + } +#endif + } + + if (is_successful) { +#ifdef SUPPORT_PCRE2_8 + if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) { + printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } +#endif +#ifdef SUPPORT_PCRE2_16 + if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) { + printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } +#endif +#ifdef SUPPORT_PCRE2_32 + if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) { + printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } +#endif + } + +#ifdef SUPPORT_PCRE2_8 + pcre2_code_free_8(re8); + pcre2_match_data_free_8(mdata8_1); + pcre2_match_data_free_8(mdata8_2); + pcre2_match_context_free_8(mcontext8); +#endif +#ifdef SUPPORT_PCRE2_16 + pcre2_code_free_16(re16); + pcre2_match_data_free_16(mdata16_1); + pcre2_match_data_free_16(mdata16_2); + pcre2_match_context_free_16(mcontext16); +#endif +#ifdef SUPPORT_PCRE2_32 + pcre2_code_free_32(re32); + pcre2_match_data_free_32(mdata32_1); + pcre2_match_data_free_32(mdata32_2); + pcre2_match_context_free_32(mcontext32); +#endif + + if (is_successful) { + successful++; + successful_row++; + printf("."); + if (successful_row >= 60) { + successful_row = 0; + printf("\n"); + } + } else + successful_row = 0; + + fflush(stdout); + current++; + } +#ifdef SUPPORT_PCRE2_8 + setstack8(NULL); +#endif +#ifdef SUPPORT_PCRE2_16 + setstack16(NULL); +#endif +#ifdef SUPPORT_PCRE2_32 + setstack32(NULL); +#endif + + if (total == successful) { + printf("\nAll JIT regression tests are successfully passed.\n"); + return 0; + } else { + printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); + return 1; + } +} + +#if defined SUPPORT_UNICODE + +static int check_invalid_utf_result(int pattern_index, const char *type, int result, + int match_start, int match_end, PCRE2_SIZE *ovector) +{ + if (match_start < 0) { + if (result != -1) { + printf("Pattern[%d] %s result is not -1.\n", pattern_index, type); + return 1; + } + return 0; + } + + if (result <= 0) { + printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result); + return 1; + } + + if (ovector[0] != (PCRE2_SIZE)match_start) { + printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n", + pattern_index, type, (int)ovector[0], match_start); + return 1; + } + + if (ovector[1] != (PCRE2_SIZE)match_end) { + printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n", + pattern_index, type, (int)ovector[1], match_end); + return 1; + } + + return 0; +} + +#endif /* SUPPORT_UNICODE */ + +#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8 + +#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) +#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) +#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) + +struct invalid_utf8_regression_test_case { + uint32_t compile_options; + int jit_compile_options; + int start_offset; + int skip_left; + int skip_right; + int match_start; + int match_end; + const char *pattern[2]; + const char *input; +}; + +static const char invalid_utf8_newline_cr; + +static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = { + { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" }, + { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" }, + { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" }, + { UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" }, + { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" }, + { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" }, + { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" }, + { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" }, + { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" }, + + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" }, + { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" }, + { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" }, + { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" }, + { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" }, + { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" }, + { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" }, + { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" }, + + { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" }, + { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" }, + { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" }, + { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" }, + { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" }, + { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" }, + { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" }, + { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" }, + + { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" }, + { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" }, + { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" }, + { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" }, + { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" }, + { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" }, + { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" }, + + { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" }, + { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" }, + { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" }, + { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" }, + + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" }, + + { UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" }, + { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" }, + { UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" }, + { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" }, + { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" }, + { UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" }, + { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" }, + { UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" }, + { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" }, + + { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" }, + { UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" }, + { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" }, + { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" }, + + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"}, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"}, + + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"}, + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"}, + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"}, + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"}, + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"}, + { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"}, + + { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" }, + { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, + { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" }, + { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, + + { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" }, + { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" }, + { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" }, + { PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" }, + + /* These two are not invalid UTF tests, but this infrastructure fits better for them. */ + { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" }, + { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" }, + + { PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" }, + + { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } +}; + +#undef UDA +#undef CI +#undef CPI + +static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current, + int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata) +{ + pcre2_code_8 *code; + int result, errorcode; + PCRE2_SIZE length, erroroffset; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata); + + if (current->pattern[i] == NULL) + return 1; + + code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED, + current->compile_options, &errorcode, &erroroffset, ccontext); + + if (!code) { + printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); + return 0; + } + + if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) { + printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); + pcre2_code_free_8(code); + return 0; + } + + length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right); + + if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { + result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left), + length, current->start_offset - current->skip_left, 0, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_8(code); + return 0; + } + } + + if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { + result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left), + length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_8(code); + return 0; + } + } + + pcre2_code_free_8(code); + return 1; +} + +static int invalid_utf8_regression_tests(void) +{ + const struct invalid_utf8_regression_test_case *current; + pcre2_compile_context_8 *ccontext; + pcre2_match_data_8 *mdata; + int total = 0, successful = 0; + int result; + + printf("\nRunning invalid-utf8 JIT regression tests\n"); + + ccontext = pcre2_compile_context_create_8(NULL); + pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY); + mdata = pcre2_match_data_create_8(4, NULL); + + for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) { + /* printf("\nPattern: %s :\n", current->pattern); */ + total++; + + result = 1; + if (current->pattern[1] != &invalid_utf8_newline_cr) + { + if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata)) + result = 0; + if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata)) + result = 0; + } else { + pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR); + if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata)) + result = 0; + pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY); + } + + if (result) { + successful++; + } + + printf("."); + if ((total % 60) == 0) + printf("\n"); + } + + if ((total % 60) != 0) + printf("\n"); + + pcre2_match_data_free_8(mdata); + pcre2_compile_context_free_8(ccontext); + + if (total == successful) { + printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n"); + return 0; + } else { + printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); + return 1; + } +} + +#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */ + +static int invalid_utf8_regression_tests(void) +{ + return 0; +} + +#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */ + +#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16 + +#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) +#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) +#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) + +struct invalid_utf16_regression_test_case { + uint32_t compile_options; + int jit_compile_options; + int start_offset; + int skip_left; + int skip_right; + int match_start; + int match_end; + const PCRE2_UCHAR16 *pattern[2]; + const PCRE2_UCHAR16 *input; +}; + +static PCRE2_UCHAR16 allany16[] = { '.', 0 }; +static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 }; +static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 }; +static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 }; +static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 }; +static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 }; +static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 }; +static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 }; +static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 }; +static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 }; +static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 }; +static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 }; +static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 }; +static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 }; +static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 }; +static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 }; +static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 }; +static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 }; +static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 }; +static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 }; + +static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = { + { UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 }, + { UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 }, + { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 }, + { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 }, + { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 }, + { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 }, + { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 }, + { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 }, + { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 }, + { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 }, + + { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 }, + { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 }, + { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 }, + { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 }, + { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 }, + { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 }, + { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 }, + { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 }, + { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 }, + { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 }, + + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 }, + { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 }, + { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 }, + + { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 }, + { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 }, + { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 }, + { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 }, + { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 }, + { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 }, + + { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 }, + { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 }, + { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 }, + + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 }, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 }, + + { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 }, + { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 }, + { PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 }, + { PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 }, + + { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } +}; + +#undef UDA +#undef CI +#undef CPI + +static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current, + int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata) +{ + pcre2_code_16 *code; + int result, errorcode; + PCRE2_SIZE length, erroroffset; + const PCRE2_UCHAR16 *input; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata); + + if (current->pattern[i] == NULL) + return 1; + + code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED, + current->compile_options, &errorcode, &erroroffset, ccontext); + + if (!code) { + printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); + return 0; + } + + if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) { + printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); + pcre2_code_free_16(code); + return 0; + } + + input = current->input; + length = 0; + + while (*input++ != 0) + length++; + + length -= current->skip_left + current->skip_right; + + if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { + result = pcre2_jit_match_16(code, (current->input + current->skip_left), + length, current->start_offset - current->skip_left, 0, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_16(code); + return 0; + } + } + + if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { + result = pcre2_jit_match_16(code, (current->input + current->skip_left), + length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_16(code); + return 0; + } + } + + pcre2_code_free_16(code); + return 1; +} + +static int invalid_utf16_regression_tests(void) +{ + const struct invalid_utf16_regression_test_case *current; + pcre2_compile_context_16 *ccontext; + pcre2_match_data_16 *mdata; + int total = 0, successful = 0; + int result; + + printf("\nRunning invalid-utf16 JIT regression tests\n"); + + ccontext = pcre2_compile_context_create_16(NULL); + pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY); + mdata = pcre2_match_data_create_16(4, NULL); + + for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) { + /* printf("\nPattern: %s :\n", current->pattern); */ + total++; + + result = 1; + if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata)) + result = 0; + if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata)) + result = 0; + + if (result) { + successful++; + } + + printf("."); + if ((total % 60) == 0) + printf("\n"); + } + + if ((total % 60) != 0) + printf("\n"); + + pcre2_match_data_free_16(mdata); + pcre2_compile_context_free_16(ccontext); + + if (total == successful) { + printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n"); + return 0; + } else { + printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); + return 1; + } +} + +#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */ + +static int invalid_utf16_regression_tests(void) +{ + return 0; +} + +#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */ + +#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32 + +#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) +#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) +#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) + +struct invalid_utf32_regression_test_case { + uint32_t compile_options; + int jit_compile_options; + int start_offset; + int skip_left; + int skip_right; + int match_start; + int match_end; + const PCRE2_UCHAR32 *pattern[2]; + const PCRE2_UCHAR32 *input; +}; + +static PCRE2_UCHAR32 allany32[] = { '.', 0 }; +static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 }; +static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 }; +static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 }; +static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 }; +static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 }; +static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 }; +static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 }; +static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 }; +static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 }; +static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 }; +static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 }; +static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 }; + +static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = { + { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 }, + { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 }, + { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 }, + { UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 }, + { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 }, + { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 }, + + { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 }, + { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 }, + { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 }, + { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, + { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, + + { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 }, + { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 }, + + { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 }, + { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 }, + { UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 }, + { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 }, + { UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 }, + { UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 }, + + { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 }, + { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 }, + { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 }, + { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 }, + { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 }, + + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 }, + { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 }, + + { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } +}; + +#undef UDA +#undef CI +#undef CPI + +static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current, + int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata) +{ + pcre2_code_32 *code; + int result, errorcode; + PCRE2_SIZE length, erroroffset; + const PCRE2_UCHAR32 *input; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata); + + if (current->pattern[i] == NULL) + return 1; + + code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED, + current->compile_options, &errorcode, &erroroffset, ccontext); + + if (!code) { + printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); + return 0; + } + + if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) { + printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); + pcre2_code_free_32(code); + return 0; + } + + input = current->input; + length = 0; + + while (*input++ != 0) + length++; + + length -= current->skip_left + current->skip_right; + + if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { + result = pcre2_jit_match_32(code, (current->input + current->skip_left), + length, current->start_offset - current->skip_left, 0, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_32(code); + return 0; + } + } + + if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { + result = pcre2_jit_match_32(code, (current->input + current->skip_left), + length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); + + if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { + pcre2_code_free_32(code); + return 0; + } + } + + pcre2_code_free_32(code); + return 1; +} + +static int invalid_utf32_regression_tests(void) +{ + const struct invalid_utf32_regression_test_case *current; + pcre2_compile_context_32 *ccontext; + pcre2_match_data_32 *mdata; + int total = 0, successful = 0; + int result; + + printf("\nRunning invalid-utf32 JIT regression tests\n"); + + ccontext = pcre2_compile_context_create_32(NULL); + pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY); + mdata = pcre2_match_data_create_32(4, NULL); + + for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) { + /* printf("\nPattern: %s :\n", current->pattern); */ + total++; + + result = 1; + if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata)) + result = 0; + if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata)) + result = 0; + + if (result) { + successful++; + } + + printf("."); + if ((total % 60) == 0) + printf("\n"); + } + + if ((total % 60) != 0) + printf("\n"); + + pcre2_match_data_free_32(mdata); + pcre2_compile_context_free_32(ccontext); + + if (total == successful) { + printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n"); + return 0; + } else { + printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); + return 1; + } +} + +#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */ + +static int invalid_utf32_regression_tests(void) +{ + return 0; +} + +#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */ + +/* End of pcre2_jit_test.c */ diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c new file mode 100644 index 0000000..ac8b63b --- /dev/null +++ b/src/pcre2_maketables.c @@ -0,0 +1,165 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre2_maketables(), which builds +character tables for PCRE2 in the current locale. The file is compiled on its +own as part of the PCRE2 library. It is also included in the compilation of +pcre2_dftables.c as a freestanding program, in which case the macro +PCRE2_DFTABLES is defined. */ + +#ifndef PCRE2_DFTABLES /* Compiling the library */ +# ifdef HAVE_CONFIG_H +# include "config.h" +# endif +# include "pcre2_internal.h" +#endif + +/************************************************* +* Create PCRE2 character tables * +*************************************************/ + +/* This function builds a set of character tables for use by PCRE2 and returns +a pointer to them. They are build using the ctype functions, and consequently +their contents will depend upon the current locale setting. When compiled as +part of the library, the store is obtained via a general context malloc, if +supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables +freestanding auxiliary program) malloc() is used, and the function has a +different name so as not to clash with the prototype in pcre2.h. + +Arguments: none when PCRE2_DFTABLES is defined + else a PCRE2 general context or NULL +Returns: pointer to the contiguous block of data + else NULL if memory allocation failed +*/ + +#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */ +static const uint8_t *maketables(void) +{ +uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH); + +#else /* Not PCRE2_DFTABLES, that is, compiling the library */ +PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION +pcre2_maketables(pcre2_general_context *gcontext) +{ +uint8_t *yield = (uint8_t *)((gcontext != NULL)? + gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) : + malloc(TABLES_LENGTH)); +#endif /* PCRE2_DFTABLES */ + +int i; +uint8_t *p; + +if (yield == NULL) return NULL; +p = yield; + +/* First comes the lower casing table */ + +for (i = 0; i < 256; i++) *p++ = tolower(i); + +/* Next the case-flipping table */ + +for (i = 0; i < 256; i++) + { + int c = islower(i)? toupper(i) : tolower(i); + *p++ = (c < 256)? c : i; + } + +/* Then the character class tables. Don't try to be clever and save effort on +exclusive ones - in some locales things may be different. + +Note that the table for "space" includes everything "isspace" gives, including +VT in the default locale. This makes it work for the POSIX class [:space:]. +From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl +space, because Perl added VT at release 5.18. + +Note also that it is possible for a character to be alnum or alpha without +being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the +fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must +test for alnum specially. */ + +memset(p, 0, cbit_length); +for (i = 0; i < 256; i++) + { + if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7); + if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7); + if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7); + if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7); + if (i == '_') p[cbit_word + i/8] |= 1u << (i&7); + if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7); + if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7); + if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7); + if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7); + if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7); + if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7); + } +p += cbit_length; + +/* Finally, the character type table. In this, we used to exclude VT from the +white space chars, because Perl didn't recognize it as such for \s and for +comments within regexes. However, Perl changed at release 5.18, so PCRE1 +changed at release 8.34 and it's always been this way for PCRE2. */ + +for (i = 0; i < 256; i++) + { + int x = 0; + if (isspace(i)) x += ctype_space; + if (isalpha(i)) x += ctype_letter; + if (islower(i)) x += ctype_lcletter; + if (isdigit(i)) x += ctype_digit; + if (isalnum(i) || i == '_') x += ctype_word; + *p++ = x; + } + +return yield; +} + +#ifndef PCRE2_DFTABLES /* Compiling the library */ +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables) +{ + if (gcontext) + gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); + else + free((void *)tables); +} +#endif + +/* End of pcre2_maketables.c */ diff --git a/src/pcre2_match.c b/src/pcre2_match.c new file mode 100644 index 0000000..6c422c2 --- /dev/null +++ b/src/pcre2_match.c @@ -0,0 +1,7777 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2015-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +/* These defines enable debugging code */ + +/* #define DEBUG_FRAMES_DISPLAY */ +/* #define DEBUG_SHOW_OPS */ +/* #define DEBUG_SHOW_RMATCH */ + +#ifdef DEBUG_FRAMES_DISPLAY +#include +#endif + +#ifdef DEBUG_SHOW_OPS +static const char *OP_names[] = { OP_NAME_LIST }; +#endif + +/* These defines identify the name of the block containing "static" +information, and fields within it. */ + +#define NLBLOCK mb /* Block containing newline information */ +#define PSSTART start_subject /* Field containing processed string start */ +#define PSEND end_subject /* Field containing processed string end */ + +#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */ + +/* Masks for identifying the public options that are permitted at match time. */ + +#define PUBLIC_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \ + PCRE2_DISABLE_RECURSELOOP_CHECK) + +#define PUBLIC_JIT_MATCH_OPTIONS \ + (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\ + PCRE2_COPY_MATCHED_SUBJECT) + +/* Non-error returns from and within the match() function. Error returns are +externally defined PCRE2_ERROR_xxx codes, which are all negative. */ + +#define MATCH_MATCH 1 +#define MATCH_NOMATCH 0 + +/* Special internal returns used in the match() function. Make them +sufficiently negative to avoid the external error codes. */ + +#define MATCH_ACCEPT (-999) +#define MATCH_KETRPOS (-998) +/* The next 5 must be kept together and in sequence so that a test that checks +for any one of them can use a range. */ +#define MATCH_COMMIT (-997) +#define MATCH_PRUNE (-996) +#define MATCH_SKIP (-995) +#define MATCH_SKIP_ARG (-994) +#define MATCH_THEN (-993) +#define MATCH_BACKTRACK_MAX MATCH_THEN +#define MATCH_BACKTRACK_MIN MATCH_COMMIT + +/* Group frame type values. Zero means the frame is not a group frame. The +lower 16 bits are used for data (e.g. the capture number). Group frames are +used for most groups so that information about the start is easily available at +the end without having to scan back through intermediate frames (backtrack +points). */ + +#define GF_CAPTURE 0x00010000u +#define GF_NOCAPTURE 0x00020000u +#define GF_CONDASSERT 0x00030000u +#define GF_RECURSE 0x00040000u + +/* Masks for the identity and data parts of the group frame type. */ + +#define GF_IDMASK(a) ((a) & 0xffff0000u) +#define GF_DATAMASK(a) ((a) & 0x0000ffffu) + +/* Repetition types */ + +enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS }; + +/* Min and max values for the common repeats; a maximum of UINT32_MAX => +infinity. */ + +static const uint32_t rep_min[] = { + 0, 0, /* * and *? */ + 1, 1, /* + and +? */ + 0, 0, /* ? and ?? */ + 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */ + 0, 1, 0 }; /* OP_CRPOS{STAR, PLUS, QUERY} */ + +static const uint32_t rep_max[] = { + UINT32_MAX, UINT32_MAX, /* * and *? */ + UINT32_MAX, UINT32_MAX, /* + and +? */ + 1, 1, /* ? and ?? */ + 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */ + UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */ + +/* Repetition types - must include OP_CRPOSRANGE (not needed above) */ + +static const uint32_t rep_typ[] = { + REPTYPE_MAX, REPTYPE_MIN, /* * and *? */ + REPTYPE_MAX, REPTYPE_MIN, /* + and +? */ + REPTYPE_MAX, REPTYPE_MIN, /* ? and ?? */ + REPTYPE_MAX, REPTYPE_MIN, /* OP_CRRANGE and OP_CRMINRANGE */ + REPTYPE_POS, REPTYPE_POS, /* OP_CRPOSSTAR, OP_CRPOSPLUS */ + REPTYPE_POS, REPTYPE_POS }; /* OP_CRPOSQUERY, OP_CRPOSRANGE */ + +/* Numbers for RMATCH calls at backtracking points. When these lists are +changed, the code at RETURN_SWITCH below must be updated in sync. */ + +enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, + RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, + RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, + RM31, RM32, RM33, RM34, RM35, RM36, RM37 }; + +#ifdef SUPPORT_WIDE_CHARS +enum { RM100=100, RM101 }; +#endif + +#ifdef SUPPORT_UNICODE +enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, + RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224, RM225 }; +#endif + +/* Define short names for general fields in the current backtrack frame, which +is always pointed to by the F variable. Occasional references to fields in +other frames are written out explicitly. There are also some fields in the +current frame whose names start with "temp" that are used for short-term, +localised backtracking memory. These are #defined with Lxxx names at the point +of use and undefined afterwards. */ + +#define Fback_frame F->back_frame +#define Fcapture_last F->capture_last +#define Fcurrent_recurse F->current_recurse +#define Fecode F->ecode +#define Feptr F->eptr +#define Fgroup_frame_type F->group_frame_type +#define Flast_group_offset F->last_group_offset +#define Flength F->length +#define Fmark F->mark +#define Frdepth F->rdepth +#define Fstart_match F->start_match +#define Foffset_top F->offset_top +#define Foccu F->occu +#define Fop F->op +#define Fovector F->ovector +#define Freturn_id F->return_id + + +#ifdef DEBUG_FRAMES_DISPLAY +/************************************************* +* Display current frames and contents * +*************************************************/ + +/* This debugging function displays the current set of frames and their +contents. It is not called automatically from anywhere, the intention being +that calls can be inserted where necessary when debugging frame-related +problems. + +Arguments: + f the file to write to + F the current top frame + P a previous frame of interest + frame_size the frame size + mb points to the match block + match_data points to the match data block + s identification text + +Returns: nothing +*/ + +static void +display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size, + match_block *mb, pcre2_match_data *match_data, const char *s, ...) +{ +uint32_t i; +heapframe *Q; +va_list ap; +va_start(ap, s); + +fprintf(f, "FRAMES "); +vfprintf(f, s, ap); +va_end(ap); + +if (P != NULL) fprintf(f, " P=%lu", + ((char *)P - (char *)(match_data->heapframes))/frame_size); +fprintf(f, "\n"); + +for (i = 0, Q = match_data->heapframes; + Q <= F; + i++, Q = (heapframe *)((char *)Q + frame_size)) + { + fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d", + i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode), + Q->back_frame, Q->return_id); + + if (Q->last_group_offset == PCRE2_UNSET) + fprintf(f, " lgoffset=unset\n"); + else + fprintf(f, " lgoffset=%lu\n", Q->last_group_offset/frame_size); + } +} + +#endif + + + +/************************************************* +* Process a callout * +*************************************************/ + +/* This function is called for all callouts, whether "standalone" or at the +start of a conditional group. Feptr will be pointing to either OP_CALLOUT or +OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized +with fixed values. + +Arguments: + F points to the current backtracking frame + mb points to the match block + lengthptr where to return the length of the callout item + +Returns: the return from the callout + or 0 if no callout function exists +*/ + +static int +do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr) +{ +int rc; +PCRE2_SIZE save0, save1; +PCRE2_SIZE *callout_ovector; +pcre2_callout_block *cb; + +*lengthptr = (*Fecode == OP_CALLOUT)? + PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE); + +if (mb->callout == NULL) return 0; /* No callout function provided */ + +/* The original matching code (pre 10.30) worked directly with the ovector +passed by the user, and this was passed to callouts. Now that the working +ovector is in the backtracking frame, it no longer needs to reserve space for +the overall match offsets (which would waste space in the frame). For backward +compatibility, however, we pass capture_top and offset_vector to the callout as +if for the extended ovector, and we ensure that the first two slots are unset +by preserving and restoring their current contents. Picky compilers complain if +references such as Fovector[-2] are use directly, so we set up a separate +pointer. */ + +callout_ovector = (PCRE2_SIZE *)(Fovector) - 2; + +/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields +are set externally. The first 3 never change; the last is updated for each +bumpalong. */ + +cb = mb->cb; +cb->capture_top = (uint32_t)Foffset_top/2 + 1; +cb->capture_last = Fcapture_last; +cb->offset_vector = callout_ovector; +cb->mark = mb->nomatch_mark; +cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject); +cb->pattern_position = GET(Fecode, 1); +cb->next_item_length = GET(Fecode, 1 + LINK_SIZE); + +if (*Fecode == OP_CALLOUT) /* Numerical callout */ + { + cb->callout_number = Fecode[1 + 2*LINK_SIZE]; + cb->callout_string_offset = 0; + cb->callout_string = NULL; + cb->callout_string_length = 0; + } +else /* String callout */ + { + cb->callout_number = 0; + cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE); + cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1; + cb->callout_string_length = + *lengthptr - (1 + 4*LINK_SIZE) - 2; + } + +save0 = callout_ovector[0]; +save1 = callout_ovector[1]; +callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET; +rc = mb->callout(cb, mb->callout_data); +callout_ovector[0] = save0; +callout_ovector[1] = save1; +cb->callout_flags = 0; +return rc; +} + + + +/************************************************* +* Match a back-reference * +*************************************************/ + +/* This function is called only when it is known that the offset lies within +the offsets that have so far been used in the match. Note that in caseless +UTF-8 mode, the number of subject bytes matched may be different to the number +of reference bytes. (In theory this could also happen in UTF-16 mode, but it +seems unlikely.) + +Arguments: + offset index into the offset vector + caseless TRUE if caseless + F the current backtracking frame pointer + mb points to match block + lengthptr pointer for returning the length matched + +Returns: = 0 sucessful match; number of code units matched is set + < 0 no match + > 0 partial match +*/ + +static int +match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb, + PCRE2_SIZE *lengthptr) +{ +PCRE2_SPTR p; +PCRE2_SIZE length; +PCRE2_SPTR eptr; +PCRE2_SPTR eptr_start; + +/* Deal with an unset group. The default is no match, but there is an option to +match an empty string. */ + +if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET) + { + if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) + { + *lengthptr = 0; + return 0; /* Match */ + } + else return -1; /* No match */ + } + +/* Separate the caseless and UTF cases for speed. */ + +eptr = eptr_start = Feptr; +p = mb->start_subject + Fovector[offset]; +length = Fovector[offset+1] - Fovector[offset]; + +if (caseless) + { +#if defined SUPPORT_UNICODE + BOOL utf = (mb->poptions & PCRE2_UTF) != 0; + + if (utf || (mb->poptions & PCRE2_UCP) != 0) + { + PCRE2_SPTR endptr = p + length; + + /* Match characters up to the end of the reference. NOTE: the number of + code units matched may differ, because in UTF-8 there are some characters + whose upper and lower case codes have different numbers of bytes. For + example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3 + bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a + sequence of two of the latter. It is important, therefore, to check the + length along the reference, not along the subject (earlier code did this + wrong). UCP without uses Unicode properties but without UTF encoding. */ + + while (p < endptr) + { + uint32_t c, d; + const ucd_record *ur; + if (eptr >= mb->end_subject) return 1; /* Partial match */ + + if (utf) + { + GETCHARINC(c, eptr); + GETCHARINC(d, p); + } + else + { + c = *eptr++; + d = *p++; + } + + ur = GET_UCD(d); + if (c != d && c != (uint32_t)((int)d + ur->other_case)) + { + const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset; + for (;;) + { + if (c < *pp) return -1; /* No match */ + if (c == *pp++) break; + } + } + } + } + else +#endif + + /* Not in UTF or UCP mode */ + { + for (; length > 0; length--) + { + uint32_t cc, cp; + if (eptr >= mb->end_subject) return 1; /* Partial match */ + cc = UCHAR21TEST(eptr); + cp = UCHAR21TEST(p); + if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc)) + return -1; /* No match */ + p++; + eptr++; + } + } + } + +/* In the caseful case, we can just compare the code units, whether or not we +are in UTF and/or UCP mode. When partial matching, we have to do this unit by +unit. */ + +else + { + if (mb->partial != 0) + { + for (; length > 0; length--) + { + if (eptr >= mb->end_subject) return 1; /* Partial match */ + if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /* No match */ + } + } + + /* Not partial matching */ + + else + { + if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */ + if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */ + eptr += length; + } + } + +*lengthptr = eptr - eptr_start; +return 0; /* Match */ +} + + + +/****************************************************************************** +******************************************************************************* + "Recursion" in the match() function + +The original match() function was highly recursive, but this proved to be the +source of a number of problems over the years, mostly because of the relatively +small system stacks that are commonly found. As new features were added to +patterns, various kludges were invented to reduce the amount of stack used, +making the code hard to understand in places. + +A version did exist that used individual frames on the heap instead of calling +match() recursively, but this ran substantially slower. The current version is +a refactoring that uses a vector of frames to remember backtracking points. +This runs no slower, and possibly even a bit faster than the original recursive +implementation. + +At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50 +frames) was allocated on the system stack. If this was not big enough, the heap +was used for a larger vector. However, it turns out that there are environments +where taking as little as 20KiB from the system stack is an embarrassment. +After another refactoring, the heap is used exclusively, but a pointer the +frames vector and its size are cached in the match_data block, so that there is +no new memory allocation if the same match_data block is used for multiple +matches (unless the frames vector has to be extended). +******************************************************************************* +******************************************************************************/ + + + + +/************************************************* +* Macros for the match() function * +*************************************************/ + +/* These macros pack up tests that are used for partial matching several times +in the code. The second one is used when we already know we are past the end of +the subject. We set the "hit end" flag if the pointer is at the end of the +subject and either (a) the pointer is past the earliest inspected character +(i.e. something has been matched, even if not part of the actual matched +string), or (b) the pattern contains a lookbehind. These are the conditions for +which adding more characters may allow the current match to continue. + +For hard partial matching, we immediately return a partial match. Otherwise, +carrying on means that a complete match on the current subject will be sought. +A partial match is returned only if no complete match can be found. */ + +#define CHECK_PARTIAL()\ + if (Feptr >= mb->end_subject) \ + { \ + SCHECK_PARTIAL(); \ + } + +#define SCHECK_PARTIAL()\ + if (mb->partial != 0 && \ + (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ + { \ + mb->hitend = TRUE; \ + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ + } + + +/* These macros are used to implement backtracking. They simulate a recursive +call to the match() function by means of a local vector of frames which +remember the backtracking points. */ + +#define RMATCH(ra,rb)\ + {\ + start_ecode = ra;\ + Freturn_id = rb;\ + goto MATCH_RECURSE;\ + L_##rb:;\ + } + +#define RRETURN(ra)\ + {\ + rrc = ra;\ + goto RETURN_SWITCH;\ + } + + + +/************************************************* +* Match from current position * +*************************************************/ + +/* This function is called to run one match attempt at a single starting point +in the subject. + +Performance note: It might be tempting to extract commonly used fields from the +mb structure (e.g. end_subject) into individual variables to improve +performance. Tests using gcc on a SPARC disproved this; in the first case, it +made performance worse. + +Arguments: + start_eptr starting character in subject + start_ecode starting position in compiled code + top_bracket number of capturing parentheses in the pattern + frame_size size of each backtracking frame + match_data pointer to the match_data block + mb pointer to "static" variables block + +Returns: MATCH_MATCH if matched ) these values are >= 0 + MATCH_NOMATCH if failed to match ) + negative MATCH_xxx value for PRUNE, SKIP, etc + negative PCRE2_ERROR_xxx value if aborted by an error condition + (e.g. stopped by repeated call or depth limit) +*/ + +static int +match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, + PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb) +{ +/* Frame-handling variables */ + +heapframe *F; /* Current frame pointer */ +heapframe *N = NULL; /* Temporary frame pointers */ +heapframe *P = NULL; + +heapframe *frames_top; /* End of frames vector */ +heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */ +PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ + +/* Local variables that do not need to be preserved over calls to RRMATCH(). */ + +PCRE2_SPTR branch_end = NULL; +PCRE2_SPTR branch_start; +PCRE2_SPTR bracode; /* Temp pointer to start of group */ +PCRE2_SIZE offset; /* Used for group offsets */ +PCRE2_SIZE length; /* Used for various length calculations */ + +int rrc; /* Return from functions & backtracking "recursions" */ +#ifdef SUPPORT_UNICODE +int proptype; /* Type of character property */ +#endif + +uint32_t i; /* Used for local loops */ +uint32_t fc; /* Character values */ +uint32_t number; /* Used for group and other numbers */ +uint32_t reptype = 0; /* Type of repetition (0 to avoid compiler warning) */ +uint32_t group_frame_type; /* Specifies type for new group frames */ + +BOOL condition; /* Used in conditional groups */ +BOOL cur_is_word; /* Used in "word" tests */ +BOOL prev_is_word; /* Used in "word" tests */ + +/* UTF and UCP flags */ + +#ifdef SUPPORT_UNICODE +BOOL utf = (mb->poptions & PCRE2_UTF) != 0; +BOOL ucp = (mb->poptions & PCRE2_UCP) != 0; +#else +BOOL utf = FALSE; /* Required for convenience even when no Unicode support */ +#endif + +/* This is the length of the last part of a backtracking frame that must be +copied when a new frame is created. */ + +frame_copy_size = frame_size - offsetof(heapframe, eptr); + +/* Set up the first frame and the end of the frames vector. */ + +F = match_data->heapframes; +frames_top = (heapframe *)((char *)F + match_data->heapframes_size); + +Frdepth = 0; /* "Recursion" depth */ +Fcapture_last = 0; /* Number of most recent capture */ +Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */ +Fstart_match = Feptr = start_eptr; /* Current data pointer and start match */ +Fmark = NULL; /* Most recent mark */ +Foffset_top = 0; /* End of captures within the frame */ +Flast_group_offset = PCRE2_UNSET; /* Saved frame of most recent group */ +group_frame_type = 0; /* Not a start of group frame */ +goto NEW_FRAME; /* Start processing with this frame */ + +/* Come back here when we want to create a new frame for remembering a +backtracking point. */ + +MATCH_RECURSE: + +/* Set up a new backtracking frame. If the vector is full, get a new one, +doubling the size, but constrained by the heap limit (which is in KiB). */ + +N = (heapframe *)((char *)F + frame_size); +if ((heapframe *)((char *)N + frame_size) >= frames_top) + { + heapframe *new; + PCRE2_SIZE newsize; + PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes); + + if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2) + { + if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1) + return PCRE2_ERROR_NOMEMORY; + newsize = PCRE2_SIZE_MAX - 1; + } + else + newsize = match_data->heapframes_size * 2; + + if (newsize / 1024 >= mb->heap_limit) + { + PCRE2_SIZE old_size = match_data->heapframes_size / 1024; + if (mb->heap_limit <= old_size) + return PCRE2_ERROR_HEAPLIMIT; + else + { + PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size); + int over_bytes = match_data->heapframes_size % 1024; + if (over_bytes) max_delta -= (1024 - over_bytes); + newsize = match_data->heapframes_size + max_delta; + } + } + + /* With a heap limit set, the permitted additional size may not be enough for + another frame, so do a final check. */ + + if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT; + new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data); + if (new == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy(new, match_data->heapframes, usedsize); + + N = (heapframe *)((char *)new + usedsize); + F = (heapframe *)((char *)N - frame_size); + + match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data); + match_data->heapframes = new; + match_data->heapframes_size = newsize; + frames_top = (heapframe *)((char *)new + newsize); + } + +#ifdef DEBUG_SHOW_RMATCH +fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1); +if (group_frame_type != 0) + { + fprintf(stderr, " type=%x ", group_frame_type); + switch (GF_IDMASK(group_frame_type)) + { + case GF_CAPTURE: + fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_NOCAPTURE: + fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_CONDASSERT: + fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_RECURSE: + fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type)); + break; + + default: + fprintf(stderr, "*** unknown ***"); + break; + } + } +fprintf(stderr, "\n"); +#endif + +/* Copy those fields that must be copied into the new frame, increase the +"recursion" depth (i.e. the new frame's index) and then make the new frame +current. */ + +memcpy((char *)N + offsetof(heapframe, eptr), + (char *)F + offsetof(heapframe, eptr), + frame_copy_size); + +N->rdepth = Frdepth + 1; +F = N; + +/* Carry on processing with a new frame. */ + +NEW_FRAME: +Fgroup_frame_type = group_frame_type; +Fecode = start_ecode; /* Starting code pointer */ +Fback_frame = frame_size; /* Default is go back one frame */ + +/* If this is a special type of group frame, remember its offset for quick +access at the end of the group. If this is a recursion, set a new current +recursion value. */ + +if (group_frame_type != 0) + { + Flast_group_offset = (char *)F - (char *)match_data->heapframes; + if (GF_IDMASK(group_frame_type) == GF_RECURSE) + Fcurrent_recurse = GF_DATAMASK(group_frame_type); + group_frame_type = 0; + } + + +/* ========================================================================= */ +/* This is the main processing loop. First check that we haven't recorded too +many backtracks (search tree is too large), or that we haven't exceeded the +recursive depth limit (used too many backtracking frames). If not, process the +opcodes. */ + +if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT; +if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; + +#ifdef DEBUG_SHOW_OPS +fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n", + GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject); +#endif + +for (;;) + { +#ifdef DEBUG_SHOW_OPS +fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + OP_names[*Fecode]); +#endif + + Fop = (uint8_t)(*Fecode); /* Cast needed for 16-bit and 32-bit modes */ + switch(Fop) + { + /* ===================================================================== */ + /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close + any currently open capturing brackets. Unlike reaching the end of a group, + where we know the starting frame is at the top of the chained frames, in + this case we have to search back for the relevant frame in case other types + of group that use chained frames have intervened. Multiple OP_CLOSEs always + come innermost first, which matches the chain order. We can ignore this in + a recursion, because captures are not passed out of recursions. */ + + case OP_CLOSE: + if (Fcurrent_recurse == RECURSE_UNSET) + { + number = GET2(Fecode, 1); + offset = Flast_group_offset; + for(;;) + { + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (N->group_frame_type == (GF_CAPTURE | number)) break; + offset = P->last_group_offset; + } + offset = (number << 1) - 2; + Fcapture_last = number; + Fovector[offset] = P->eptr - mb->start_subject; + Fovector[offset+1] = Feptr - mb->start_subject; + if (offset >= Foffset_top) Foffset_top = offset + 2; + } + Fecode += PRIV(OP_lengths)[*Fecode]; + break; + + + /* ===================================================================== */ + /* Real or forced end of the pattern, assertion, or recursion. In an + assertion ACCEPT, update the last used pointer and remember the current + frame so that the captures and mark can be fished out of it. */ + + case OP_ASSERT_ACCEPT: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + assert_accept_frame = F; + RRETURN(MATCH_ACCEPT); + + /* For ACCEPT within a recursion, we have to find the most recent + recursion. If not in a recursion, fall through to code that is common with + OP_END. */ + + case OP_ACCEPT: + if (Fcurrent_recurse != RECURSE_UNSET) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Accept within recursion\n"); +#endif + offset = Flast_group_offset; + for(;;) + { + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; + offset = P->last_group_offset; + } + + /* N is now the frame of the recursion; the previous frame is at the + OP_RECURSE position. Go back there, copying the current subject position + and mark, and the start_match position (\K might have changed it), and + then move on past the OP_RECURSE. */ + + P->eptr = Feptr; + P->mark = Fmark; + P->start_match = Fstart_match; + F = P; + Fecode += 1 + LINK_SIZE; + continue; + } + /* Fall through */ + + /* OP_END itself can never be reached within a recursion because that is + picked up when the OP_KET that always precedes OP_END is reached. */ + + case OP_END: + + /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if + PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the + subject. In both cases, backtracking will then try other alternatives, if + any. */ + + if (Feptr == Fstart_match && + ((mb->moptions & PCRE2_NOTEMPTY) != 0 || + ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 && + Fstart_match == mb->start_subject + mb->start_offset))) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because empty string\n"); +#endif + RRETURN(MATCH_NOMATCH); + } + + /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not + the end of the subject. After (*ACCEPT) we fail the entire match (at this + position) but backtrack if we've reached the end of the pattern. This + applies whether or not we are in a recursion. */ + + if (Feptr < mb->end_subject && + ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0) + { + if (Fop == OP_END) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n"); +#endif + RRETURN(MATCH_NOMATCH); + } + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n"); +#endif + return MATCH_NOMATCH; /* (*ACCEPT) */ + } + + /* We have a successful match of the whole pattern. Record the result and + then do a direct return from the function. If there is space in the offset + vector, set any pairs that follow the highest-numbered captured string but + are less than the number of capturing groups in the pattern to PCRE2_UNSET. + It is documented that this happens. "Gaps" are set to PCRE2_UNSET + dynamically. It is only those at the end that need setting here. */ + + mb->end_match_ptr = Feptr; /* Record where we ended */ + mb->end_offset_top = Foffset_top; /* and how many extracts were taken */ + mb->mark = Fmark; /* and the last success mark */ + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + + match_data->ovector[0] = Fstart_match - mb->start_subject; + match_data->ovector[1] = Feptr - mb->start_subject; + + /* Set i to the smaller of the sizes of the external and frame ovectors. */ + + i = 2 * ((top_bracket + 1 > match_data->oveccount)? + match_data->oveccount : top_bracket + 1); + memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); + while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET; + return MATCH_MATCH; /* Note: NOT RRETURN */ + + + /*===================================================================== */ + /* Match any single character type except newline; have to take care with + CRLF newlines and partial matching. */ + + case OP_ANY: + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr == mb->end_subject - 1 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + /* Fall through */ + + /* Match any single character whatsoever. */ + + case OP_ALLANY: + if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */ + { /* not be updated before SCHECK_PARTIAL. */ + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; +#ifdef SUPPORT_UNICODE + if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); +#endif + Fecode++; + break; + + + /* ===================================================================== */ + /* Match a single code unit, even in UTF mode. This opcode really does + match any code unit, even newline. (It really should be called ANYCODEUNIT, + of course - the byte name is from pre-16 bit days.) */ + + case OP_ANYBYTE: + if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */ + { /* not be updated before SCHECK_PARTIAL. */ + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; + Fecode++; + break; + + + /* ===================================================================== */ + /* Match a single character, casefully */ + + case OP_CHAR: +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Fecode++; + GETCHARLEN(fc, Fecode, Flength); + if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr)) + { + CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } + for (; Flength > 0; Flength--) + { + if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + + /* Not UTF mode */ + { + if (mb->end_subject - Feptr < 1) + { + SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } + if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH); + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match a single character, caselessly. If we are at the end of the + subject, give up immediately. We get here only when the pattern character + has at most one other case. Characters with more than two cases are coded + as OP_PROP with the pseudo-property PT_CLIST. */ + + case OP_CHARI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Fecode++; + GETCHARLEN(fc, Fecode, Flength); + + /* If the pattern character's value is < 128, we know that its other case + (if any) is also < 128 (and therefore only one code unit long in all + code-unit widths), so we can use the fast lookup table. We checked above + that there is at least one character left in the subject. */ + + if (fc < 128) + { + uint32_t cc = UCHAR21(Feptr); + if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH); + Fecode++; + Feptr++; + } + + /* Otherwise we must pick up the subject character and use Unicode + property support to test its other case. Note that we cannot use the + value of "Flength" to check for sufficient bytes left, because the other + case of the character may have more or fewer code units. */ + + else + { + uint32_t dc; + GETCHARINC(dc, Feptr); + Fecode += Flength; + if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH); + } + } + + /* If UCP is set without UTF we must do the same as above, but with one + character per code unit. */ + + else if (ucp) + { + uint32_t cc = UCHAR21(Feptr); + fc = Fecode[1]; + if (fc < 128) + { + if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH); + } + else + { + if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH); + } + Feptr++; + Fecode += 2; + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF or UCP mode; use the table for characters < 256. */ + { + if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1]) + != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH); + Feptr++; + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match not a single character. */ + + case OP_NOT: + case OP_NOTI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t ch; + Fecode++; + GETCHARINC(ch, Fecode); + GETCHARINC(fc, Feptr); + if (ch == fc) + { + RRETURN(MATCH_NOMATCH); /* Caseful match */ + } + else if (Fop == OP_NOTI) /* If caseless */ + { + if (ch > 127) + ch = UCD_OTHERCASE(ch); + else + ch = (mb->fcc)[ch]; + if (ch == fc) RRETURN(MATCH_NOMATCH); + } + } + + /* UCP without UTF is as above, but with one character per code unit. */ + + else if (ucp) + { + uint32_t ch; + fc = UCHAR21INC(Feptr); + ch = Fecode[1]; + Fecode += 2; + + if (ch == fc) + { + RRETURN(MATCH_NOMATCH); /* Caseful match */ + } + else if (Fop == OP_NOTI) /* If caseless */ + { + if (ch > 127) + ch = UCD_OTHERCASE(ch); + else + ch = (mb->fcc)[ch]; + if (ch == fc) RRETURN(MATCH_NOMATCH); + } + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Neither UTF nor UCP is set */ + + { + uint32_t ch = Fecode[1]; + fc = UCHAR21INC(Feptr); + if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc)) + RRETURN(MATCH_NOMATCH); + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match a single character repeatedly. */ + +#define Loclength F->temp_size +#define Lstart_eptr F->temp_sptr[0] +#define Lcharptr F->temp_sptr[1] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lc F->temp_32[2] +#define Loc F->temp_32[3] + + case OP_EXACT: + case OP_EXACTI: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_POSUPTO: + case OP_POSUPTOI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_UPTO: + case OP_UPTOI: + reptype = REPTYPE_MAX; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_MINUPTO: + case OP_MINUPTOI: + reptype = REPTYPE_MIN; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_POSSTAR: + case OP_POSSTARI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATCHAR; + + case OP_POSPLUS: + case OP_POSPLUSI: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATCHAR; + + case OP_POSQUERY: + case OP_POSQUERYI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATCHAR; + + case OP_STAR: + case OP_STARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_PLUS: + case OP_PLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_QUERY: + case OP_QUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI); + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated single-character matches. We first check + for the minimum number of characters. If the minimum equals the maximum, we + are done. Otherwise, if minimizing, check the rest of the pattern for a + match; if there isn't one, advance up to the maximum, one character at a + time. + + If maximizing, advance up to the maximum number of matching characters, + until Feptr is past the end of the maximum run. If possessive, we are + then done (no backing up). Otherwise, match at this position; anything + other than no match is immediately returned. For nomatch, back up one + character, unless we are matching \R and the last thing matched was + \r\n, in which case, back up two code units until we reach the first + optional character position. + + The various UTF/non-UTF and caseful/caseless cases are handled separately, + for speed. */ + + REPEATCHAR: +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Lcharptr = Fecode; + GETCHARLEN(fc, Fecode, Flength); + Fecode += Flength; + + /* Handle multi-code-unit character matching, caseful and caseless. */ + + if (Flength > 1) + { + uint32_t othercase; + + if (Fop >= OP_STARI && /* Caseless */ + (othercase = UCD_OTHERCASE(fc)) != fc) + Loclength = PRIV(ord2utf)(othercase, Foccu); + else Loclength = 0; + + for (i = 1; i <= Lmin; i++) + { + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM202); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) + Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + break; + } + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM203); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + break; /* End of repeated wide character handling */ + } + + /* Length of UTF character is 1. Put it into the preserved variable and + fall through to the non-UTF code. */ + + Lc = fc; + } + else +#endif /* SUPPORT_UNICODE */ + + /* When not in UTF mode, load a single-code-unit character. Then proceed as + above, using Unicode casing if either UTF or UCP is set. */ + + Lc = *Fecode++; + + /* Caseless comparison */ + + if (Fop >= OP_STARI) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + /* Lc will be < 128 in UTF-8 mode. */ + Loc = mb->fcc[Lc]; +#else /* 16-bit & 32-bit */ +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + Loc = TABLE_GET(Lc, mb->fcc, Lc); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); + Feptr++; + } + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + RMATCH(Fecode, RM25); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); + Feptr++; + } + /* Control never gets here */ + } + + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM26); + Feptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + } + + /* Caseful comparisons (includes all multi-byte characters) */ + + else + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM27); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + + if (Lc != UCHAR21TEST(Feptr)) break; + Feptr++; + } + + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM28); + Feptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + } + break; + +#undef Loclength +#undef Lstart_eptr +#undef Lcharptr +#undef Lmin +#undef Lmax +#undef Lc +#undef Loc + + + /* ===================================================================== */ + /* Match a negated single one-byte character repeatedly. This is almost a + repeat of the code for a repeated single character, but I haven't found a + nice way of commoning these up that doesn't require a test of the + positive/negative option for each character match. Maybe that wouldn't add + very much to the time taken, but character matching *is* what this is all + about... */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lc F->temp_32[2] +#define Loc F->temp_32[3] + + case OP_NOTEXACT: + case OP_NOTEXACTI: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTUPTO: + case OP_NOTUPTOI: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = REPTYPE_MAX; + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = REPTYPE_MIN; + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR); + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated single-character non-matches. */ + + REPEATNOTCHAR: + GETCHARINCTEST(Lc, Fecode); + + /* The code is duplicated for the caseless and caseful cases, for speed, + since matching characters is likely to be quite common. First, ensure the + minimum number of matches are present. If Lmin = Lmax, we are done. + Otherwise, if minimizing, keep trying the rest of the expression and + advancing one matching character if failing, up to the maximum. + Alternatively, if maximizing, find the maximum number of characters and + work backwards. */ + + if (Fop >= OP_NOTSTARI) /* Caseless */ + { +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && Lc > 127) + Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + + Loc = TABLE_GET(Lc, mb->fcc, Lc); /* Other case from table */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH); + Feptr++; + } + } + + if (Lmin == Lmax) continue; /* Finished for exact count */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (;;) + { + RMATCH(Fecode, RM204); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif /*SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM29); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH); + Feptr++; + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(d, Feptr, len); + if (Lc == d || Loc == d) break; + Feptr += len; + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM205); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (Lc == *Feptr || Loc == *Feptr) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM30); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + } + } + + /* Caseful comparisons */ + + else + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); + } + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (;;) + { + RMATCH(Fecode, RM206); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM31); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(d, Feptr, len); + if (Lc == d) break; + Feptr += len; + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM207); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (Lc == *Feptr) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM32); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + } + } + break; + +#undef Lstart_eptr +#undef Lmin +#undef Lmax +#undef Lc +#undef Loc + + + /* ===================================================================== */ + /* Match a bit-mapped character class, possibly repeatedly. These opcodes + are used when all the characters in the class have values in the range + 0-255, and either the matching is caseful, or the characters are in the + range 0-127 when UTF processing is enabled. The only difference between + OP_CLASS and OP_NCLASS occurs when a data character outside the range is + encountered. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lstart_eptr F->temp_sptr[0] +#define Lbyte_map_address F->temp_sptr[1] +#define Lbyte_map ((unsigned char *)Lbyte_map_address) + + case OP_NCLASS: + case OP_CLASS: + { + Lbyte_map_address = Fecode + 1; /* Save for matching */ + Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */ + + /* Look past the end of the item to see if there is repeat information + following. Then obey similar code to character type repeats. */ + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + fc = *Feptr++; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + + /* If Lmax == Lmin we are done. Continue with main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + for (;;) + { + RMATCH(Fecode, RM200); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM23); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + fc = *Feptr++; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc > 255) + { + if (Fop == OP_CLASS) break; + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for (;;) + { + RMATCH(Fecode, RM201); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ + BACKCHAR(Feptr); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + fc = *Feptr; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) break; + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break; + Feptr++; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + while (Feptr >= Lstart_eptr) + { + RMATCH(Fecode, RM24); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + +#undef Lbyte_map_address +#undef Lbyte_map +#undef Lstart_eptr +#undef Lmin +#undef Lmax + + + /* ===================================================================== */ + /* Match an extended character class. In the 8-bit library, this opcode is + encountered only when UTF-8 mode mode is supported. In the 16-bit and + 32-bit libraries, codepoints greater than 255 may be encountered even when + UTF is not supported. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lxclass_data F->temp_sptr[1] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + { + Lxclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */ + Fecode += GET(Fecode, 1); /* Advance past the item */ + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + } + + /* If Lmax == Lmin we can just continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM100); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(fc, Feptr, len); +#else + fc = *Feptr; +#endif + if (!PRIV(xclass)(fc, Lxclass_data, utf)) break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + RMATCH(Fecode, RM101); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ +#ifdef SUPPORT_UNICODE + if (utf) BACKCHAR(Feptr); +#endif + } + RRETURN(MATCH_NOMATCH); + } + + /* Control never gets here */ + } +#endif /* SUPPORT_WIDE_CHARS: end of XCLASS */ + +#undef Lstart_eptr +#undef Lxclass_data +#undef Lmin +#undef Lmax + + + /* ===================================================================== */ + /* Match various character types when PCRE2_UCP is not set. These opcodes + are not generated when PCRE2_UCP is set - instead appropriate property + tests are compiled. */ + + case OP_NOT_DIGIT: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_DIGIT: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_NOT_WHITESPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_WHITESPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_NOT_WORDCHAR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_WORDCHAR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_ANYNL: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + } + else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + Fecode++; + break; + + case OP_NOT_HSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ + default: break; + } + Fecode++; + break; + + case OP_HSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: break; /* Byte and multibyte cases */ + default: RRETURN(MATCH_NOMATCH); + } + Fecode++; + break; + + case OP_NOT_VSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + Fecode++; + break; + + case OP_VSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + Fecode++; + break; + + +#ifdef SUPPORT_UNICODE + + /* ===================================================================== */ + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. */ + + case OP_PROP: + case OP_NOTPROP: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + { + const uint32_t *cp; + uint32_t chartype; + const ucd_record *prop = GET_UCD(fc); + BOOL notmatch = Fop == OP_NOTPROP; + + switch(Fecode[1]) + { + case PT_ANY: + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + + case PT_LAMP: + chartype = prop->chartype; + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_GC: + if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_PC: + if ((Fecode[2] == prop->chartype) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_SC: + if ((Fecode[2] == prop->script) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_SCX: + { + BOOL ok = (Fecode[2] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + + /* These are specials */ + + case PT_ALNUM: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) break;; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Fecode[2]; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } } + if (fc == *cp++) + { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; } + } + break; + + case PT_UCNC: + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BOOL: + { + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Fecode[2]) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + + /* This should never occur */ + + default: + return PCRE2_ERROR_INTERNAL; + } + + Fecode += 3; + } + break; + + + /* ===================================================================== */ + /* Match an extended Unicode sequence. We will get here only if the support + is in the binary; otherwise a compile-time error occurs. */ + + case OP_EXTUNI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf, + NULL); + } + CHECK_PARTIAL(); + Fecode++; + break; + +#endif /* SUPPORT_UNICODE */ + + + /* ===================================================================== */ + /* Match a single character type repeatedly. Note that the property type + does not need to be in a stack frame as it is not used within an RMATCH() + loop. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lctype F->temp_32[2] +#define Lpropvalue F->temp_32[3] + + case OP_TYPEEXACT: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX; + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPEPOSSTAR: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSPLUS: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSQUERY: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSUPTO: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + fc = *Fecode++ - OP_TYPESTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated character type matches. */ + + REPEATTYPE: + Lctype = *Fecode++; /* Code for the character type */ + +#ifdef SUPPORT_UNICODE + if (Lctype == OP_PROP || Lctype == OP_NOTPROP) + { + proptype = *Fecode++; + Lpropvalue = *Fecode++; + } + else proptype = -1; +#endif + + /* First, ensure the minimum number of matches are present. Use inline + code for maximizing the speed, and do the type test once at the start + (i.e. keep it out of the loops). As there are no calls to RMATCH in the + loops, we can use an ordinary variable for "notmatch". The code for UTF + mode is separated out for tidiness, except for Unicode property tests. */ + + if (Lmin > 0) + { +#ifdef SUPPORT_UNICODE + if (proptype >= 0) /* Property tests in all modes */ + { + BOOL notmatch = Lctype == OP_NOTPROP; + switch(proptype) + { + case PT_ANY: + if (notmatch) RRETURN(MATCH_NOMATCH); + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + } + break; + + case PT_LAMP: + for (i = 1; i <= Lmin; i++) + { + int chartype; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_GC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_PC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SCX: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_ALNUM: + for (i = 1; i <= Lmin; i++) + { + int category; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case PT_WORD: + for (i = 1; i <= Lmin; i++) + { + int chartype, category; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_CLIST: + for (i = 1; i <= Lmin; i++) + { + const uint32_t *cp; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) continue; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { + if (notmatch) break; + RRETURN(MATCH_NOMATCH); + } + if (fc == *cp++) + { + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + } + } + } + break; + + case PT_UCNC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BIDICL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BOOL: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + /* This should not occur */ + + default: + return PCRE2_ERROR_INTERNAL; + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, + mb->end_subject, utf, NULL); + } + CHECK_PARTIAL(); + } + } + else +#endif /* SUPPORT_UNICODE */ + +/* Handle all other cases in UTF mode */ + +#ifdef SUPPORT_UNICODE + if (utf) switch(Lctype) + { + case OP_ANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ALLANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ANYBYTE: + if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH); + Feptr += Lmin; + break; + + case OP_ANYNL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + HSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_DIGIT: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + default: + return PCRE2_ERROR_INTERNAL; + } /* End switch(Lctype) */ + + else +#endif /* SUPPORT_UNICODE */ + + /* Code for the non-UTF case for minimum matching of operators other + than OP_PROP and OP_NOTPROP. */ + + switch(Lctype) + { + case OP_ANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *Feptr == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + } + break; + + case OP_ALLANY: + if (Feptr > mb->end_subject - Lmin) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += Lmin; + break; + + /* This OP_ANYBYTE case will never be reached because \C gets turned + into OP_ALLANY in non-UTF mode. Cut out the code so that coverage + reports don't complain about it's never being used. */ + +/* case OP_ANYBYTE: +* if (Feptr > mb->end_subject - Lmin) +* { +* SCHECK_PARTIAL(); +* RRETURN(MATCH_NOMATCH); +* } +* Feptr += Lmin; +* break; +*/ + case OP_ANYNL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#if PCRE2_CODE_UNIT_WIDTH != 8 + case 0x2028: + case 0x2029: +#endif + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + break; + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + break; + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + } + + /* If Lmin = Lmax we are done. Continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, we have to test the rest of the pattern before each + subsequent match. This means we cannot use a local "notmatch" variable as + in the other cases. As all 4 temporary 32-bit values in the frame are + already in use, just test the type each time. */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (proptype >= 0) + { + switch(proptype) + { + case PT_ANY: + for (;;) + { + RMATCH(Fecode, RM208); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_LAMP: + for (;;) + { + int chartype; + RMATCH(Fecode, RM209); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_GC: + for (;;) + { + RMATCH(Fecode, RM210); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_PC: + for (;;) + { + RMATCH(Fecode, RM211); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_SC: + for (;;) + { + RMATCH(Fecode, RM212); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_SCX: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM225); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_ALNUM: + for (;;) + { + int category; + RMATCH(Fecode, RM213); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (;;) + { + RMATCH(Fecode, RM214); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + break; + } + } + /* Control never gets here */ + + case PT_WORD: + for (;;) + { + int chartype, category; + RMATCH(Fecode, RM215); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || + category == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_CLIST: + for (;;) + { + const uint32_t *cp; + RMATCH(Fecode, RM216); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (Lctype == OP_NOTPROP) continue; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { + if (Lctype == OP_NOTPROP) break; + RRETURN(MATCH_NOMATCH); + } + if (fc == *cp++) + { + if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + break; + } + } + } + /* Control never gets here */ + + case PT_UCNC: + for (;;) + { + RMATCH(Fecode, RM217); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_BIDICL: + for (;;) + { + RMATCH(Fecode, RM224); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_BOOL: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM223); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + /* This should never occur */ + default: + return PCRE2_ERROR_INTERNAL; + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (;;) + { + RMATCH(Fecode, RM218); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, + utf, NULL); + } + CHECK_PARTIAL(); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* UTF mode for non-property testing character types. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (;;) + { + RMATCH(Fecode, RM219); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + GETCHARINC(fc, Feptr); + switch(Lctype) + { + case OP_ANY: /* This is the non-NL case */ + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + fc == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + break; + + case OP_HSPACE: + switch(fc) + { + HSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_VSPACE: + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + break; + + case OP_VSPACE: + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_DIGIT: + if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM33); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) + RRETURN(MATCH_NOMATCH); + fc = *Feptr++; + switch(Lctype) + { + case OP_ANY: /* This is the non-NL case */ + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + fc == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#if PCRE2_CODE_UNIT_WIDTH != 8 + case 0x2028: + case 0x2029: +#endif + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(fc) + { + default: break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_HSPACE: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + break; + } + break; + + case OP_NOT_VSPACE: + switch(fc) + { + default: break; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_VSPACE: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + break; + } + break; + + case OP_NOT_DIGIT: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + } + } + /* Control never gets here */ + } + + /* If maximizing, it is worth using inline code for speed, doing the type + test once at the start (i.e. keep it out of the loops). Once again, + "notmatch" can be an ordinary local variable because the loops do not call + RMATCH. */ + + else + { + Lstart_eptr = Feptr; /* Remember where we started */ + +#ifdef SUPPORT_UNICODE + if (proptype >= 0) + { + BOOL notmatch = Lctype == OP_NOTPROP; + switch(proptype) + { + case PT_ANY: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if (notmatch) break; + Feptr+= len; + } + break; + + case PT_LAMP: + for (i = Lmin; i < Lmax; i++) + { + int chartype; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + break; + Feptr+= len; + } + break; + + case PT_GC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_PC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SCX: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) break; + Feptr+= len; + } + break; + + case PT_ALNUM: + for (i = Lmin; i < Lmax; i++) + { + int category; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == notmatch) + break; + Feptr+= len; + } + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) goto ENDLOOP99; /* Break the loop */ + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) + goto ENDLOOP99; /* Break the loop */ + break; + } + Feptr+= len; + } + ENDLOOP99: + break; + + case PT_WORD: + for (i = Lmin; i < Lmax; i++) + { + int chartype, category; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || + category == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) + break; + Feptr+= len; + } + break; + + case PT_CLIST: + for (i = Lmin; i < Lmax; i++) + { + const uint32_t *cp; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (!notmatch) goto GOT_MAX; + } + else +#endif + { + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else goto GOT_MAX; } + if (fc == *cp++) + { if (notmatch) goto GOT_MAX; else break; } + } + } + + Feptr += len; + } + GOT_MAX: + break; + + case PT_UCNC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + break; + Feptr += len; + } + break; + + case PT_BIDICL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_BOOL: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) break; + Feptr+= len; + } + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + + /* Feptr is now past the end of the maximum run */ + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM222); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + if (utf) BACKCHAR(Feptr); + } + } + + /* Match extended Unicode grapheme clusters. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, + utf, NULL); + } + CHECK_PARTIAL(); + } + + /* Feptr is now past the end of the maximum run */ + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start + of the run while backtracking because the use of \C in UTF mode can + cause BACKCHAR to move back past Lstart_eptr. This is just palliative; + the use of \C in UTF mode is fraught with danger. */ + + for(;;) + { + int lgb, rgb; + PCRE2_SPTR fptr; + + if (Feptr <= Lstart_eptr) break; /* At start of char run */ + RMATCH(Fecode, RM220); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + /* Backtracking over an extended grapheme cluster involves inspecting + the previous two characters (if present) to see if a break is + permitted between them. */ + + Feptr--; + if (!utf) fc = *Feptr; else + { + BACKCHAR(Feptr); + GETCHAR(fc, Feptr); + } + rgb = UCD_GRAPHBREAK(fc); + + for (;;) + { + if (Feptr <= Lstart_eptr) break; /* At start of char run */ + fptr = Feptr - 1; + if (!utf) fc = *fptr; else + { + BACKCHAR(fptr); + GETCHAR(fc, fptr); + } + lgb = UCD_GRAPHBREAK(fc); + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; + Feptr = fptr; + rgb = lgb; + } + } + } + + else +#endif /* SUPPORT_UNICODE */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + switch(Lctype) + { + case OP_ANY: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(Feptr)) break; + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ALLANY: + if (Lmax < UINT32_MAX) + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + } + else + { + Feptr = mb->end_subject; /* Unlimited UTF-8 repeat */ + SCHECK_PARTIAL(); + } + break; + + /* The "byte" (i.e. "code unit") case is the same as non-UTF */ + + case OP_ANYBYTE: + fc = Lmax - Lmin; + if (fc > (uint32_t)(mb->end_subject - Feptr)) + { + Feptr = mb->end_subject; + SCHECK_PARTIAL(); + } + else Feptr += fc; + break; + + case OP_ANYNL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc == CHAR_CR) + { + if (++Feptr >= mb->end_subject) break; + if (UCHAR21(Feptr) == CHAR_LF) Feptr++; + } + else + { + if (fc != CHAR_LF && + (mb->bsr_convention == PCRE2_BSR_ANYCRLF || + (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL +#ifndef EBCDIC + && fc != 0x2028 && fc != 0x2029 +#endif /* Not EBCDIC */ + ))) + break; + Feptr += len; + } + } + break; + + case OP_NOT_HSPACE: + case OP_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + BOOL gotspace; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + switch(fc) + { + HSPACE_CASES: gotspace = TRUE; break; + default: gotspace = FALSE; break; + } + if (gotspace == (Lctype == OP_NOT_HSPACE)) break; + Feptr += len; + } + break; + + case OP_NOT_VSPACE: + case OP_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + BOOL gotspace; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + switch(fc) + { + VSPACE_CASES: gotspace = TRUE; break; + default: gotspace = FALSE; break; + } + if (gotspace == (Lctype == OP_NOT_VSPACE)) break; + Feptr += len; + } + break; + + case OP_NOT_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break; + Feptr+= len; + } + break; + + case OP_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break; + Feptr+= len; + } + break; + + case OP_NOT_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break; + Feptr+= len; + } + break; + + case OP_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break; + Feptr+= len; + } + break; + + case OP_NOT_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break; + Feptr+= len; + } + break; + + case OP_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break; + Feptr+= len; + } + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go + too far. */ + + for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM221); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && + UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR) + Feptr--; + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + switch(Lctype) + { + case OP_ANY: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(Feptr)) break; + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *Feptr == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + fc = Lmax - Lmin; + if (fc > (uint32_t)(mb->end_subject - Feptr)) + { + Feptr = mb->end_subject; + SCHECK_PARTIAL(); + } + else Feptr += fc; + break; + + case OP_ANYNL: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + fc = *Feptr; + if (fc == CHAR_CR) + { + if (++Feptr >= mb->end_subject) break; + if (*Feptr == CHAR_LF) Feptr++; + } + else + { + if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF || + (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL +#if PCRE2_CODE_UNIT_WIDTH != 8 + && fc != 0x2028 && fc != 0x2029 +#endif + ))) break; + Feptr++; + } + } + break; + + case OP_NOT_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: Feptr++; break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + goto ENDLOOP00; + } + } + ENDLOOP00: + break; + + case OP_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: goto ENDLOOP01; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + Feptr++; break; + } + } + ENDLOOP01: + break; + + case OP_NOT_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: Feptr++; break; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + goto ENDLOOP02; + } + } + ENDLOOP02: + break; + + case OP_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: goto ENDLOOP03; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + Feptr++; break; + } + } + ENDLOOP03: + break; + + case OP_NOT_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0) + break; + Feptr++; + } + break; + + case OP_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0) + break; + Feptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0) + break; + Feptr++; + } + break; + + case OP_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0) + break; + Feptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0) + break; + Feptr++; + } + break; + + case OP_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0) + break; + Feptr++; + } + break; + + default: + return PCRE2_ERROR_INTERNAL; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM34); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF && + Feptr[-1] == CHAR_CR) Feptr--; + } + } + } + break; /* End of repeat character type processing */ + +#undef Lstart_eptr +#undef Lmin +#undef Lmax +#undef Lctype +#undef Lpropvalue + + + /* ===================================================================== */ + /* Match a back reference, possibly repeatedly. Look past the end of the + item to see if there is repeat information following. The OP_REF and + OP_REFI opcodes are used for a reference to a numbered group or to a + non-duplicated named group. For a duplicated named group, OP_DNREF and + OP_DNREFI are used. In this case we must scan the list of groups to which + the name refers, and use the first one that is set. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lcaseless F->temp_32[2] +#define Lstart F->temp_sptr[0] +#define Loffset F->temp_size + + case OP_DNREF: + case OP_DNREFI: + Lcaseless = (Fop == OP_DNREFI); + { + int count = GET2(Fecode, 1+IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + Fecode += 1 + 2*IMM2_SIZE; + + while (count-- > 0) + { + Loffset = (GET2(slot, 0) << 1) - 2; + if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break; + slot += mb->name_entry_size; + } + } + goto REF_REPEAT; + + case OP_REF: + case OP_REFI: + Lcaseless = (Fop == OP_REFI); + Loffset = (GET2(Fecode, 1) << 1) - 2; + Fecode += 1 + IMM2_SIZE; + + /* Set up for repetition, or handle the non-repeated case. The maximum and + minimum must be in the heap frame, but as they are short-term values, we + use temporary fields. */ + + REF_REPEAT: + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + reptype = rep_typ[*Fecode - OP_CRSTAR]; + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + { + rrc = match_ref(Loffset, Lcaseless, F, mb, &length); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + Feptr += length; + continue; /* With the main loop */ + } + + /* Handle repeated back references. If a set group has length zero, just + continue with the main loop, because it matches however many times. For an + unset reference, if the minimum is zero, we can also just continue. We can + also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset + group behave as a zero-length group. For any other unset cases, carrying + on will result in NOMATCH. */ + + if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) + { + if (Fovector[Loffset] == Fovector[Loffset + 1]) continue; + } + else /* Group is not set */ + { + if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) + continue; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + PCRE2_SIZE slength; + rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += slength; + } + + /* If min = max, we are done. They are not both allowed to be zero. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep trying and advancing the pointer. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + PCRE2_SIZE slength; + RMATCH(Fecode, RM20); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += slength; + } + /* Control never gets here */ + } + + /* If maximizing, find the longest string and work backwards, as long as + the matched lengths for each iteration are the same. */ + + else + { + BOOL samelengths = TRUE; + Lstart = Feptr; /* Starting position */ + Flength = Fovector[Loffset+1] - Fovector[Loffset]; + + for (i = Lmin; i < Lmax; i++) + { + PCRE2_SIZE slength; + rrc = match_ref(Loffset, Lcaseless, F, mb, &slength); + if (rrc != 0) + { + /* Can't use CHECK_PARTIAL because we don't want to update Feptr in + the soft partial matching case. */ + + if (rrc > 0 && mb->partial != 0 && + mb->end_subject > mb->start_used_ptr) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + } + + if (slength != Flength) samelengths = FALSE; + Feptr += slength; + } + + /* If the length matched for each repetition is the same as the length of + the captured group, we can easily work backwards. This is the normal + case. However, in caseless UTF-8 mode there are pairs of case-equivalent + characters whose lengths (in terms of code units) differ. However, this + is very rare, so we handle it by re-matching fewer and fewer times. */ + + if (samelengths) + { + while (Feptr >= Lstart) + { + RMATCH(Fecode, RM21); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr -= Flength; + } + } + + /* The rare case of non-matching lengths. Re-scan the repetition for each + iteration. We know that match_ref() will succeed every time. */ + + else + { + Lmax = i; + for (;;) + { + RMATCH(Fecode, RM22); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr == Lstart) break; /* Failed after minimal repetition */ + Feptr = Lstart; + Lmax--; + for (i = Lmin; i < Lmax; i++) + { + PCRE2_SIZE slength; + (void)match_ref(Loffset, Lcaseless, F, mb, &slength); + Feptr += slength; + } + } + } + + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + +#undef Lcaseless +#undef Lmin +#undef Lmax +#undef Lstart +#undef Loffset + + + +/* ========================================================================= */ +/* Opcodes for the start of various parenthesized items */ +/* ========================================================================= */ + + /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the + (*THEN) is within the current branch by comparing the address of OP_THEN + that is passed back with the end of the branch. If (*THEN) is within the + current branch, and the branch is one of two or more alternatives (it + either starts or ends with OP_ALT), we have reached the limit of THEN's + action, so convert the return code to NOMATCH, which will cause normal + backtracking to happen from now on. Otherwise, THEN is passed back to an + outer alternative. This implements Perl's treatment of parenthesized + groups, where a group not containing | does not affect the current + alternative, that is, (X) is NOT the same as (X|(*F)). */ + + + /* ===================================================================== */ + /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive + bracket group, indicating that it may occur zero times. It may repeat + infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in + the pattern. Brackets with fixed upper repeat limits are compiled as a + number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO. + Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */ + +#define Lnext_ecode F->temp_sptr[0] + + case OP_BRAZERO: + Lnext_ecode = Fecode + 1; + RMATCH(Lnext_ecode, RM9); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT); + Fecode = Lnext_ecode + 1 + LINK_SIZE; + break; + + case OP_BRAMINZERO: + Lnext_ecode = Fecode + 1; + do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT); + RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode++; + break; + +#undef Lnext_ecode + + case OP_SKIPZERO: + Fecode++; + do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + break; + + + /* ===================================================================== */ + /* Handle possessive brackets with an unlimited repeat. The end of these + brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without + going further in the pattern. */ + +#define Lframe_type F->temp_32[0] +#define Lmatched_once F->temp_32[1] +#define Lzero_allowed F->temp_32[2] +#define Lstart_eptr F->temp_sptr[0] +#define Lstart_group F->temp_sptr[1] + + case OP_BRAPOSZERO: + Lzero_allowed = TRUE; /* Zero repeat is allowed */ + Fecode += 1; + if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS) + goto POSSESSIVE_CAPTURE; + goto POSSESSIVE_NON_CAPTURE; + + case OP_BRAPOS: + case OP_SBRAPOS: + Lzero_allowed = FALSE; /* Zero repeat not allowed */ + + POSSESSIVE_NON_CAPTURE: + Lframe_type = GF_NOCAPTURE; /* Remembered frame type */ + goto POSSESSIVE_GROUP; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + Lzero_allowed = FALSE; /* Zero repeat not allowed */ + + POSSESSIVE_CAPTURE: + number = GET2(Fecode, 1+LINK_SIZE); + Lframe_type = GF_CAPTURE | number; /* Remembered frame type */ + + POSSESSIVE_GROUP: + Lmatched_once = FALSE; /* Never matched */ + Lstart_group = Fecode; /* Start of this group */ + + for (;;) + { + Lstart_eptr = Feptr; /* Position at group start */ + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8); + if (rrc == MATCH_KETRPOS) + { + Lmatched_once = TRUE; /* Matched at least once */ + if (Feptr == Lstart_eptr) /* Empty match; skip to end */ + { + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + break; + } + + Fecode = Lstart_group; + continue; + } + + /* See comment above about handling THEN. */ + + if (rrc == MATCH_THEN) + { + PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1); + if (mb->verb_ecode_ptr < next_ecode && + (*Fecode == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + } + + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) break; + } + + /* Success if matched something or zero repeat allowed */ + + if (Lmatched_once || Lzero_allowed) + { + Fecode += 1 + LINK_SIZE; + break; + } + + RRETURN(MATCH_NOMATCH); + +#undef Lmatched_once +#undef Lzero_allowed +#undef Lframe_type +#undef Lstart_eptr +#undef Lstart_group + + + /* ===================================================================== */ + /* Handle non-capturing brackets that cannot match an empty string. When we + get to the final alternative within the brackets, as long as there are no + THEN's in the pattern, we can optimize by not recording a new backtracking + point. (Ideally we should test for a THEN within this group, but we don't + have that information.) Don't do this if we are at the very top level, + however, because that would make handling assertions and once-only brackets + messier when there is nothing to go back to. */ + +#define Lframe_type F->temp_32[0] /* Set for all that use GROUPLOOP */ +#define Lnext_branch F->temp_sptr[0] /* Used only in OP_BRA handling */ + + case OP_BRA: + if (mb->hasthen || Frdepth == 0) + { + Lframe_type = 0; + goto GROUPLOOP; + } + + for (;;) + { + Lnext_branch = Fecode + GET(Fecode, 1); + if (*Lnext_branch != OP_ALT) break; + + /* This is never the final branch. We do not need to test for MATCH_THEN + here because this code is not used when there is a THEN in the pattern. */ + + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode = Lnext_branch; + } + + /* Hit the start of the final branch. Continue at this level. */ + + Fecode += PRIV(OP_lengths)[*Fecode]; + break; + +#undef Lnext_branch + + + /* ===================================================================== */ + /* Handle a capturing bracket, other than those that are possessive with an + unlimited repeat. */ + + case OP_CBRA: + case OP_SCBRA: + Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE); + goto GROUPLOOP; + + + /* ===================================================================== */ + /* Atomic groups and non-capturing brackets that can match an empty string + must record a backtracking point and also set up a chained frame. */ + + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_SBRA: + Lframe_type = GF_NOCAPTURE | Fop; + + GROUPLOOP: + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2); + if (rrc == MATCH_THEN) + { + PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1); + if (mb->verb_ecode_ptr < next_ecode && + (*Fecode == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + } + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); + } + /* Control never reaches here. */ + +#undef Lframe_type + + + /* ===================================================================== */ + /* Pattern recursion either matches the current regex, or some + subexpression. The offset data is the offset to the starting bracket from + the start of the whole pattern. This is so that it works from duplicated + subpatterns. For a whole-pattern recursion, we have to infer the number + zero. */ + +#define Lframe_type F->temp_32[0] +#define Lstart_branch F->temp_sptr[0] + + case OP_RECURSE: + bracode = mb->start_code + GET(Fecode, 1); + number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE); + + /* If we are already in a pattern recursion, check for repeating the same + one without changing the subject pointer or the last referenced character + in the subject. This should catch convoluted mutual recursions; some + simple cases are caught at compile time. However, there are rare cases when + this check needs to be turned off. In this case, actual recursion loops + will be caught by the match or heap limits. */ + + if (Fcurrent_recurse != RECURSE_UNSET) + { + offset = Flast_group_offset; + while (offset != PCRE2_UNSET) + { + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (N->group_frame_type == (GF_RECURSE | number)) + { + if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used && + (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0) + return PCRE2_ERROR_RECURSELOOP; + break; + } + offset = P->last_group_offset; + } + } + + /* Remember the current last referenced character and then run the + recursion branch by branch. */ + + F->recurse_last_used = mb->last_used_ptr; + Lstart_branch = bracode; + Lframe_type = GF_RECURSE | number; + + for (;;) + { + PCRE2_SPTR next_ecode; + + group_frame_type = Lframe_type; + RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11); + next_ecode = Lstart_branch + GET(Lstart_branch,1); + + /* Handle backtracking verbs, which are defined in a range that can + easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to + escape beyond a recursion; they cause a NOMATCH for the entire recursion. + + When one of these verbs triggers, the current recursion group number is + recorded. If it matches the recursion we are processing, the verb + happened within the recursion and we must deal with it. Otherwise it must + have happened after the recursion completed, and so has to be passed + back. See comment above about handling THEN. */ + + if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX && + mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE)) + { + if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode && + (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + else RRETURN(MATCH_NOMATCH); + } + + /* Note that carrying on after (*ACCEPT) in a recursion is handled in the + OP_ACCEPT code. Nothing needs to be done here. */ + + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Lstart_branch = next_ecode; + if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH); + } + /* Control never reaches here. */ + +#undef Lframe_type +#undef Lstart_branch + + + /* ===================================================================== */ + /* Positive assertions are like other groups except that PCRE doesn't allow + the effect of (*THEN) to escape beyond an assertion; it is therefore + treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its + captures and mark retained. Any other return is an error. */ + +#define Lframe_type F->temp_32[0] + + case OP_ASSERT: + case OP_ASSERTBACK: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + Lframe_type = GF_NOCAPTURE | Fop; + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3); + if (rrc == MATCH_ACCEPT) + { + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; + break; + } + if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); + } + + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + break; + +#undef Lframe_type + + + /* ===================================================================== */ + /* Handle negative assertions. Loop for each non-matching branch as for + positive assertions. */ + +#define Lframe_type F->temp_32[0] + + case OP_ASSERT_NOT: + case OP_ASSERTBACK_NOT: + Lframe_type = GF_NOCAPTURE | Fop; + + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4); + switch(rrc) + { + case MATCH_ACCEPT: /* Assertion matched, therefore it fails. */ + case MATCH_MATCH: + RRETURN (MATCH_NOMATCH); + + case MATCH_NOMATCH: /* Branch failed, try next if present. */ + case MATCH_THEN: + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED; + break; + + case MATCH_COMMIT: /* Assertion forced to fail, therefore continue. */ + case MATCH_SKIP: + case MATCH_PRUNE: + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + goto ASSERT_NOT_FAILED; + + default: /* Pass back any other return */ + RRETURN(rrc); + } + } + + /* None of the branches have matched or there was a backtrack to (*COMMIT), + (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a + negative assertion, so carry on. */ + + ASSERT_NOT_FAILED: + Fecode += 1 + LINK_SIZE; + break; + +#undef Lframe_type + + + /* ===================================================================== */ + /* The callout item calls an external function, if one is provided, passing + details of the match so far. This is mainly for debugging, though the + function is able to force a failure. */ + + case OP_CALLOUT: + case OP_CALLOUT_STR: + rrc = do_callout(F, mb, &length); + if (rrc > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + Fecode += length; + break; + + + /* ===================================================================== */ + /* Conditional group: compilation checked that there are no more than two + branches. If the condition is false, skipping the first branch takes us + past the end of the item if there is only one branch, but that's exactly + what we want. */ + + case OP_COND: + case OP_SCOND: + + /* The variable Flength will be added to Fecode when the condition is + false, to get to the second branch. Setting it to the offset to the ALT or + KET, then incrementing Fecode achieves this effect. However, if the second + branch is non-existent, we must point to the KET so that the end of the + group is correctly processed. We now have Fecode pointing to the condition + or callout. */ + + Flength = GET(Fecode, 1); /* Offset to the second branch */ + if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE; + Fecode += 1 + LINK_SIZE; /* From this opcode */ + + /* Because of the way auto-callout works during compile, a callout item is + inserted between OP_COND and an assertion condition. Such a callout can + also be inserted manually. */ + + if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR) + { + rrc = do_callout(F, mb, &length); + if (rrc > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + + /* Advance Fecode past the callout, so it now points to the condition. We + must adjust Flength so that the value of Fecode+Flength is unchanged. */ + + Fecode += length; + Flength -= length; + } + + /* Test the various possible conditions */ + + condition = FALSE; + switch(*Fecode) + { + case OP_RREF: /* Group recursion test */ + if (Fcurrent_recurse != RECURSE_UNSET) + { + number = GET2(Fecode, 1); + condition = (number == RREF_ANY || number == Fcurrent_recurse); + } + break; + + case OP_DNRREF: /* Duplicate named group recursion test */ + if (Fcurrent_recurse != RECURSE_UNSET) + { + int count = GET2(Fecode, 1 + IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + while (count-- > 0) + { + number = GET2(slot, 0); + condition = number == Fcurrent_recurse; + if (condition) break; + slot += mb->name_entry_size; + } + } + break; + + case OP_CREF: /* Numbered group used test */ + offset = (GET2(Fecode, 1) << 1) - 2; /* Doubled ref number */ + condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET; + break; + + case OP_DNCREF: /* Duplicate named group used test */ + { + int count = GET2(Fecode, 1 + IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + while (count-- > 0) + { + offset = (GET2(slot, 0) << 1) - 2; + condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET; + if (condition) break; + slot += mb->name_entry_size; + } + } + break; + + case OP_FALSE: + case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */ + break; + + case OP_TRUE: + condition = TRUE; + break; + + /* The condition is an assertion. Run code similar to the assertion code + above. */ + +#define Lpositive F->temp_32[0] +#define Lstart_branch F->temp_sptr[0] + + default: + Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK); + Lstart_branch = Fecode; + + for (;;) + { + group_frame_type = GF_CONDASSERT | *Fecode; + RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5); + + switch(rrc) + { + case MATCH_ACCEPT: /* Save captures */ + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + + /* Fall through */ + /* In the case of a match, the captures have already been put into + the current frame. */ + + case MATCH_MATCH: + condition = Lpositive; /* TRUE for positive assertion */ + break; + + /* PCRE doesn't allow the effect of (*THEN) to escape beyond an + assertion; it is therefore always treated as NOMATCH. */ + + case MATCH_NOMATCH: + case MATCH_THEN: + Lstart_branch += GET(Lstart_branch, 1); + if (*Lstart_branch == OP_ALT) continue; /* Try next branch */ + condition = !Lpositive; /* TRUE for negative assertion */ + break; + + /* These force no match without checking other branches. */ + + case MATCH_COMMIT: + case MATCH_SKIP: + case MATCH_PRUNE: + condition = !Lpositive; + break; + + default: + RRETURN(rrc); + } + break; /* Out of the branch loop */ + } + + /* If the condition is true, find the end of the assertion so that + advancing past it gets us to the start of the first branch. */ + + if (condition) + { + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + } + break; /* End of assertion condition */ + } + +#undef Lpositive +#undef Lstart_branch + + /* Choose branch according to the condition. */ + + Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength; + + /* If the opcode is OP_SCOND it means we are at a repeated conditional + group that might match an empty string. We must therefore descend a level + so that the start is remembered for checking. For OP_COND we can just + continue at this level. */ + + if (Fop == OP_SCOND) + { + group_frame_type = GF_NOCAPTURE | Fop; + RMATCH(Fecode, RM35); + RRETURN(rrc); + } + break; + + + +/* ========================================================================= */ +/* End of start of parenthesis opcodes */ +/* ========================================================================= */ + + + /* ===================================================================== */ + /* Move the subject pointer back by one fixed amount. This occurs at the + start of each branch that has a fixed length in a lookbehind assertion. If + we are too close to the start to move back, fail. When working with UTF-8 + we move back a number of characters, not bytes. */ + + case OP_REVERSE: + number = GET2(Fecode, 1); +#ifdef SUPPORT_UNICODE + if (utf) + { + while (number-- > 0) + { + if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + + /* No UTF support, or not in UTF mode: count is code unit count */ + + { + if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH); + Feptr -= number; + } + + /* Save the earliest consulted character, then skip to next opcode */ + + if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr; + Fecode += 1 + IMM2_SIZE; + break; + + + /* ===================================================================== */ + /* Move the subject pointer back by a variable amount. This occurs at the + start of each branch of a lookbehind assertion when the branch has a + variable, but limited, length. A loop is needed to try matching the branch + after moving back different numbers of characters. If we are too close to + the start to move back even the minimum amount, fail. When working with + UTF-8 we move back a number of characters, not bytes. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Leptr F->temp_sptr[0] + + case OP_VREVERSE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + Leptr = Feptr; + + /* Move back by the maximum branch length and then work forwards. This + ensures that items such as \d{3,5} get the maximum length, which is + relevant for captures, and makes for Perl compatibility. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = 0; i < Lmax; i++) + { + if (Feptr == mb->start_subject) + { + if (i < Lmin) RRETURN(MATCH_NOMATCH); + Lmax = i; + break; + } + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + + /* No UTF support or not in UTF mode */ + + { + ptrdiff_t diff = Feptr - mb->start_subject; + uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0); + if (Lmin > available) RRETURN(MATCH_NOMATCH); + if (Lmax > available) Lmax = available; + Feptr -= Lmax; + } + + /* Now try matching, moving forward one character on failure, until we + reach the mimimum back length. */ + + for (;;) + { + RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH); + Feptr++; +#ifdef SUPPORT_UNICODE + if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); } +#endif + } + /* Control never reaches here */ + +#undef Lmin +#undef Lmax +#undef Leptr + + /* ===================================================================== */ + /* An alternation is the end of a branch; scan along to find the end of the + bracketed group. */ + + case OP_ALT: + branch_end = Fecode; + do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT); + break; + + + /* ===================================================================== */ + /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the + starting frame was added to the chained frames in order to remember the + starting subject position for the group. (Not true for OP_BRA when it's a + whole pattern recursion, but that is handled separately below.)*/ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + case OP_KETRPOS: + + bracode = Fecode - GET(Fecode, 1); + + if (branch_end == NULL) branch_end = Fecode; + branch_start = bracode; + while (branch_start + GET(branch_start, 1) != branch_end) + branch_start += GET(branch_start, 1); + branch_end = NULL; + + /* Point N to the frame at the start of the most recent group, and P to its + predecessor. Remember the subject pointer at the start of the group. */ + + if (*bracode != OP_BRA && *bracode != OP_COND) + { + N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset); + P = (heapframe *)((char *)N - frame_size); + Flast_group_offset = P->last_group_offset; + +#ifdef DEBUG_SHOW_RMATCH + fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n", + N->rdepth, N->group_frame_type, + (char *)P->eptr - (char *)mb->start_subject); +#endif + + /* If we are at the end of an assertion that is a condition, return a + match, discarding any intermediate backtracking points. Copy back the + mark setting and the captures into the frame before N so that they are + set on return. Doing this for all assertions, both positive and negative, + seems to match what Perl does. */ + + if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT) + { + memcpy((char *)P + offsetof(heapframe, ovector), Fovector, + Foffset_top * sizeof(PCRE2_SIZE)); + P->offset_top = Foffset_top; + P->mark = Fmark; + Fback_frame = (char *)F - (char *)P; + RRETURN(MATCH_MATCH); + } + } + else P = NULL; /* Indicates starting frame not recorded */ + + /* The group was not a conditional assertion. */ + + switch (*bracode) + { + /* Whole pattern recursion is handled as a recursion into group 0, but + the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing + group - a design mistake: it should perhaps have been capture group 0. + Anyway, that means the end of such recursion must be handled here. It is + detected by checking for an immediately following OP_END when we are + recursing in group 0. If this is not the end of a whole-pattern + recursion, there is nothing to be done. */ + + case OP_BRA: + if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break; + + /* It is the end of whole-pattern recursion. */ + + offset = Flast_group_offset; + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + Flast_group_offset = P->last_group_offset; + + /* Reinstate the previous set of captures and then carry on after the + recursion call. */ + + memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, + Foffset_top * sizeof(PCRE2_SIZE)); + Foffset_top = P->offset_top; + Fcapture_last = P->capture_last; + Fcurrent_recurse = P->current_recurse; + Fecode = P->ecode + 1 + LINK_SIZE; + continue; /* With next opcode */ + + case OP_COND: /* No need to do anything for these */ + case OP_SCOND: + break; + + /* Non-atomic positive assertions are like OP_BRA, except that the + subject pointer must be put back to where it was at the start of the + assertion. For a variable lookbehind, check its end point. */ + + case OP_ASSERTBACK_NA: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NA: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + Feptr = P->eptr; + break; + + /* Atomic positive assertions are like OP_ONCE, except that in addition + the subject pointer must be put back to where it was at the start of the + assertion. For a variable lookbehind, check its end point. */ + + case OP_ASSERTBACK: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + Feptr = P->eptr; + /* Fall through */ + + /* For an atomic group, discard internal backtracking points. We must + also ensure that any remaining branches within the top-level of the group + are not tried. Do this by adjusting the code pointer within the backtrack + frame so that it points to the final branch. */ + + case OP_ONCE: + Fback_frame = ((char *)F - (char *)P); + for (;;) + { + uint32_t y = GET(P->ecode,1); + if ((P->ecode)[y] != OP_ALT) break; + P->ecode += y; + } + break; + + /* A matching negative assertion returns MATCH, which is turned into + NOMATCH at the assertion level. For a variable lookbehind, check its end + point. */ + + case OP_ASSERTBACK_NOT: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NOT: + RRETURN(MATCH_MATCH); + + /* At the end of a script run, apply the script-checking rules. This code + will never by exercised if Unicode support it not compiled, because in + that environment script runs cause an error at compile time. */ + + case OP_SCRIPT_RUN: + if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH); + break; + + /* Whole-pattern recursion is coded as a recurse into group 0, and is + handled with OP_BRA above. Other recursion is handled here. */ + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + number = GET2(bracode, 1+LINK_SIZE); + + /* Handle a recursively called group. We reinstate the previous set of + captures and then carry on after the recursion call. */ + + if (Fcurrent_recurse == number) + { + P = (heapframe *)((char *)N - frame_size); + memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, + Foffset_top * sizeof(PCRE2_SIZE)); + Foffset_top = P->offset_top; + Fcapture_last = P->capture_last; + Fcurrent_recurse = P->current_recurse; + Fecode = P->ecode + 1 + LINK_SIZE; + continue; /* With next opcode */ + } + + /* Deal with actual capturing. */ + + offset = (number << 1) - 2; + Fcapture_last = number; + Fovector[offset] = P->eptr - mb->start_subject; + Fovector[offset+1] = Feptr - mb->start_subject; + if (offset >= Foffset_top) Foffset_top = offset + 2; + break; + } /* End actions relating to the starting opcode */ + + /* OP_KETRPOS is a possessive repeating ket. Remember the current position, + and return the MATCH_KETRPOS. This makes it possible to do the repeats one + at a time from the outer level. This must precede the empty string test - + in this case that test is done at the outer level. */ + + if (*Fecode == OP_KETRPOS) + { + memcpy((char *)P + offsetof(heapframe, eptr), + (char *)F + offsetof(heapframe, eptr), + frame_copy_size); + RRETURN(MATCH_KETRPOS); + } + + /* Handle the different kinds of closing brackets. A non-repeating ket + needs no special action, just continuing at this level. This also happens + for the repeating kets if the group matched no characters, in order to + forcibly break infinite loops. Otherwise, the repeating kets try the rest + of the pattern or restart from the preceding bracket, in the appropriate + order. */ + + if (Fop != OP_KET && (P == NULL || Feptr != P->eptr)) + { + if (Fop == OP_KETRMIN) + { + RMATCH(Fecode + 1 + LINK_SIZE, RM6); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode -= GET(Fecode, 1); + break; /* End of ket processing */ + } + + /* Repeat the maximum number of times (KETRMAX) */ + + RMATCH(bracode, RM7); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + + /* Carry on at this level for a non-repeating ket, or after matching an + empty string, or after repeating for a maximum number of times. */ + + Fecode += 1 + LINK_SIZE; + break; + + + /* ===================================================================== */ + /* Start and end of line assertions, not multiline mode. */ + + case OP_CIRC: /* Start of line, unless PCRE2_NOTBOL is set. */ + if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_SOD: /* Unconditional start of subject */ + if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + /* When PCRE2_NOTEOL is unset, assert before the subject end, or a + terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */ + + case OP_DOLL: + if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH); + if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; + + /* Fall through */ + /* Unconditional end of subject assertion (\z). */ + + case OP_EOD: + if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Fecode++; + break; + + /* End of subject or ending \n assertion (\Z) */ + + case OP_EODN: + ASSERT_NL_OR_EOS: + if (Feptr < mb->end_subject && + (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen)) + { + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + RRETURN(MATCH_NOMATCH); + } + + /* Either at end of string or \n before end. */ + + if (mb->partial != 0) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Fecode++; + break; + + + /* ===================================================================== */ + /* Start and end of line assertions, multiline mode. */ + + /* Start of subject unless notbol, or after any newline except for one at + the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */ + + case OP_CIRCM: + if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject) + RRETURN(MATCH_NOMATCH); + if (Feptr != mb->start_subject && + ((Feptr == mb->end_subject && + (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) || + !WAS_NEWLINE(Feptr))) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + /* Assert before any newline, or before end of subject unless noteol is + set. */ + + case OP_DOLLM: + if (Feptr < mb->end_subject) + { + if (!IS_NEWLINE(Feptr)) + { + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + RRETURN(MATCH_NOMATCH); + } + } + else + { + if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH); + SCHECK_PARTIAL(); + } + Fecode++; + break; + + + /* ===================================================================== */ + /* Start of match assertion */ + + case OP_SOM: + if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + + /* ===================================================================== */ + /* Reset the start of match point */ + + case OP_SET_SOM: + Fstart_match = Feptr; + Fecode++; + break; + + + /* ===================================================================== */ + /* Word boundary assertions. Find out if the previous and current + characters are "word" characters. It takes a bit more work in UTF mode. + Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is + not set. When it is set, use Unicode properties if available, even when not + in UTF mode. Remember the earliest and latest consulted characters. */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + if (Feptr == mb->check_subject) prev_is_word = FALSE; else + { + PCRE2_SPTR lastptr = Feptr - 1; +#ifdef SUPPORT_UNICODE + if (utf) + { + BACKCHAR(lastptr); + GETCHAR(fc, lastptr); + } + else +#endif /* SUPPORT_UNICODE */ + fc = *lastptr; + if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr; +#ifdef SUPPORT_UNICODE + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + prev_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif /* SUPPORT_UNICODE */ + prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0; + } + + /* Get status of next character */ + + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + cur_is_word = FALSE; + } + else + { + PCRE2_SPTR nextptr = Feptr + 1; +#ifdef SUPPORT_UNICODE + if (utf) + { + FORWARDCHARTEST(nextptr, mb->end_subject); + GETCHAR(fc, Feptr); + } + else +#endif /* SUPPORT_UNICODE */ + fc = *Feptr; + if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; +#ifdef SUPPORT_UNICODE + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + cur_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif /* SUPPORT_UNICODE */ + cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0; + } + + /* Now see if the situation is what we want */ + + if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)? + cur_is_word == prev_is_word : cur_is_word != prev_is_word) + RRETURN(MATCH_NOMATCH); + break; + + + /* ===================================================================== */ + /* Backtracking (*VERB)s, with and without arguments. Note that if the + pattern is successfully matched, we do not come back from RMATCH. */ + + case OP_MARK: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12); + + /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an + argument, and we must check whether that argument matches this MARK's + argument. It is passed back in mb->verb_skip_ptr. If it does match, we + return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject + position that corresponds to this mark. Otherwise, pass back the return + code unaltered. */ + + if (rrc == MATCH_SKIP_ARG && + PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0) + { + mb->verb_skip_ptr = Feptr; /* Pass back current position */ + RRETURN(MATCH_SKIP); + } + RRETURN(rrc); + + case OP_FAIL: + RRETURN(MATCH_NOMATCH); + + /* Record the current recursing group number in mb->verb_current_recurse + when a backtracking return such as MATCH_COMMIT is given. This enables the + recurse processing to catch verbs from within the recursion. */ + + case OP_COMMIT: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_COMMIT); + + case OP_COMMIT_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_COMMIT); + + case OP_PRUNE: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_PRUNE); + + case OP_PRUNE_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_PRUNE); + + case OP_SKIP: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_skip_ptr = Feptr; /* Pass back current position */ + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_SKIP); + + /* Note that, for Perl compatibility, SKIP with an argument does NOT set + nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was + not a matching mark, we have to re-run the match, ignoring the SKIP_ARG + that failed and any that precede it (either they also failed, or were not + triggered). To do this, we maintain a count of executed SKIP_ARGs. If a + SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg + set to the count of the one that failed. */ + + case OP_SKIP_ARG: + mb->skip_arg_count++; + if (mb->skip_arg_count <= mb->ignore_skip_arg) + { + Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1]; + break; + } + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + /* Pass back the current skip name and return the special MATCH_SKIP_ARG + return code. This will either be caught by a matching MARK, or get to the + top, where it causes a rematch with mb->ignore_skip_arg set to the value of + mb->skip_arg_count. */ + + mb->verb_skip_ptr = Fecode + 2; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_SKIP_ARG); + + /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that + the branch in which it occurs can be determined. */ + + case OP_THEN: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_ecode_ptr = Fecode; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_THEN); + + case OP_THEN_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_ecode_ptr = Fecode; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_THEN); + + + /* ===================================================================== */ + /* There's been some horrible disaster. Arrival here can only mean there is + something seriously wrong in the code above or the OP_xxx definitions. */ + + default: + return PCRE2_ERROR_INTERNAL; + } + + /* Do not insert any code in here without much thought; it is assumed + that "continue" in the code above comes out to here to repeat the main + loop. */ + + } /* End of main loop */ +/* Control never reaches here */ + + +/* ========================================================================= */ +/* The RRETURN() macro jumps here. The number that is saved in Freturn_id +indicates which label we actually want to return to. The value in Frdepth is +the index number of the frame in the vector. The return value has been placed +in rrc. */ + +#define LBL(val) case val: goto L_RM##val; + +RETURN_SWITCH: +if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; +if (Frdepth == 0) return rrc; /* Exit from the top level */ +F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */ +mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ + +#ifdef DEBUG_SHOW_RMATCH +fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id); +#endif + +switch (Freturn_id) + { + LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) + LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) + LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) + LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) + LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) + +#ifdef SUPPORT_WIDE_CHARS + LBL(100) LBL(101) +#endif + +#ifdef SUPPORT_UNICODE + LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) + LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) + LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) + LBL(221) LBL(222) LBL(223) LBL(224) LBL(225) +#endif + + default: + return PCRE2_ERROR_INTERNAL; + } +#undef LBL +} + + +/************************************************* +* Match a Regular Expression * +*************************************************/ + +/* This function applies a compiled pattern to a subject string and picks out +portions of the string if it matches. Two elements in the vector are set for +each substring: the offsets to the start and end of the substring. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block + mcontext points a PCRE2 context + +Returns: > 0 => success; value is the number of ovector pairs filled + = 0 => success, but ovector is not big enough + = -1 => failed to match (PCRE2_ERROR_NOMATCH) + = -2 => partial match (PCRE2_ERROR_PARTIAL) + < -2 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) +{ +int rc; +int was_zero_terminated = 0; +const uint8_t *start_bits = NULL; +const pcre2_real_code *re = (const pcre2_real_code *)code; + +BOOL anchored; +BOOL firstline; +BOOL has_first_cu = FALSE; +BOOL has_req_cu = FALSE; +BOOL startline; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_SPTR memchr_found_first_cu; +PCRE2_SPTR memchr_found_first_cu2; +#endif + +PCRE2_UCHAR first_cu = 0; +PCRE2_UCHAR first_cu2 = 0; +PCRE2_UCHAR req_cu = 0; +PCRE2_UCHAR req_cu2 = 0; + +PCRE2_SPTR bumpalong_limit; +PCRE2_SPTR end_subject; +PCRE2_SPTR true_end_subject; +PCRE2_SPTR start_match; +PCRE2_SPTR req_cu_ptr; +PCRE2_SPTR start_partial; +PCRE2_SPTR match_partial; + +#ifdef SUPPORT_JIT +BOOL use_jit; +#endif + +/* This flag is needed even when Unicode is not supported for convenience +(it is used by the IS_NEWLINE macro). */ + +BOOL utf = FALSE; + +#ifdef SUPPORT_UNICODE +BOOL ucp = FALSE; +BOOL allow_invalid; +uint32_t fragment_options = 0; +#ifdef SUPPORT_JIT +BOOL jit_checked_utf = FALSE; +#endif +#endif /* SUPPORT_UNICODE */ + +PCRE2_SIZE frame_size; +PCRE2_SIZE heapframes_size; + +/* We need to have mb as a pointer to a match block, because the IS_NEWLINE +macro is used below, and it expects NLBLOCK to be defined as a pointer. */ + +pcre2_callout_block cb; +match_block actual_match_block; +match_block *mb = &actual_match_block; + +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; + +start_match = subject + start_offset; +req_cu_ptr = start_match - 1; +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } +true_end_subject = end_subject = subject + length; + +if (start_offset > length) return PCRE2_ERROR_BADOFFSET; + +/* Check that the first field in the block is the magic number. */ + +if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + +/* Check the code unit width. */ + +if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) + return PCRE2_ERROR_BADMODE; + +/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the +options variable for this function. Users of PCRE2 who are not calling the +function directly would like to have a way of setting these flags, in the same +way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with +constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and +(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now +transfer to the options for this function. The bits are guaranteed to be +adjacent, but do not have the same values. This bit of Boolean trickery assumes +that the match-time bits are not more significant than the flag bits. If by +accident this is not the case, a compile-time division by zero error will +occur. */ + +#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) +#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) +options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); +#undef FF +#undef OO + +/* If the pattern was successfully studied with JIT support, we will run the +JIT executable instead of the rest of this function. Most options must be set +at compile time for the JIT code to be usable. */ + +#ifdef SUPPORT_JIT +use_jit = (re->executable_jit != NULL && + (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0); +#endif + +/* Initialize UTF/UCP parameters. */ + +#ifdef SUPPORT_UNICODE +utf = (re->overall_options & PCRE2_UTF) != 0; +allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0; +ucp = (re->overall_options & PCRE2_UCP) != 0; +#endif /* SUPPORT_UNICODE */ + +/* Convert the partial matching flags into an integer. */ + +mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 : + ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0; + +/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same +time. */ + +if (mb->partial != 0 && + ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) + return PCRE2_ERROR_BADOPTION; + +/* It is an error to set an offset limit without setting the flag at compile +time. */ + +if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET && + (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + +/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, +free the memory that was obtained. Set the field to NULL for no match cases. */ + +if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + { + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + } +match_data->subject = NULL; + +/* Zero the error offset in case the first code unit is invalid UTF. */ + +match_data->startchar = 0; + + +/* ============================= JIT matching ============================== */ + +/* Prepare for JIT matching. Check a UTF string for validity unless no check is +requested or invalid UTF can be handled. We check only the portion of the +subject that might be be inspected during matching - from the offset minus the +maximum lookbehind to the given length. This saves time when a small part of a +large subject is being matched by the use of a starting offset. Note that the +maximum lookbehind is a number of characters, not code units. */ + +#ifdef SUPPORT_JIT +if (use_jit) + { +#ifdef SUPPORT_UNICODE + if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + unsigned int i; +#endif + + /* For 8-bit and 16-bit UTF, check that the first code unit is a valid + character start. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; +#if PCRE2_CODE_UNIT_WIDTH == 8 + return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ +#else + return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ +#endif + } +#endif /* WIDTH != 32 */ + + /* Move back by the maximum lookbehind, just in case it happens at the very + start of matching. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + for (i = re->max_lookbehind; i > 0 && start_match > subject; i--) + { + start_match--; + while (start_match > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*start_match & 0xc0) == 0x80) +#else /* 16-bit */ + (*start_match & 0xfc00) == 0xdc00) +#endif + start_match--; + } +#else /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + start_match -= re->max_lookbehind; + else + start_match = subject; +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* Validate the relevant portion of the subject. Adjust the offset of an + invalid code point to be an absolute offset in the whole string. */ + + match_data->rc = PRIV(valid_utf)(start_match, + length - (start_match - subject), &(match_data->startchar)); + if (match_data->rc != 0) + { + match_data->startchar += start_match - subject; + return match_data->rc; + } + jit_checked_utf = TRUE; + } +#endif /* SUPPORT_UNICODE */ + + /* If JIT returns BADOPTION, which means that the selected complete or + partial matching mode was not compiled, fall through to the interpreter. */ + + rc = pcre2_jit_match(code, subject, length, start_offset, options, + match_data, mcontext); + if (rc != PCRE2_ERROR_JIT_BADOPTION) + { + match_data->subject_length = length; + if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy((void *)match_data->subject, subject, length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } + return rc; + } + } +#endif /* SUPPORT_JIT */ + +/* ========================= End of JIT matching ========================== */ + + +/* Proceed with non-JIT matching. The default is to allow lookbehinds to the +start of the subject. A UTF check when there is a non-zero offset may change +this. */ + +mb->check_subject = subject; + +/* If a UTF subject string was not checked for validity in the JIT code above, +check it here, and handle support for invalid UTF strings. The check above +happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset. +If we get here in those circumstances, it means the subject string is valid, +but for some reason JIT matching was not successful. There is no need to check +the subject again. + +We check only the portion of the subject that might be be inspected during +matching - from the offset minus the maximum lookbehind to the given length. +This saves time when a small part of a large subject is being matched by the +use of a starting offset. Note that the maximum lookbehind is a number of +characters, not code units. + +Note also that support for invalid UTF forces a check, overriding the setting +of PCRE2_NO_CHECK_UTF. */ + +#ifdef SUPPORT_UNICODE +if (utf && +#ifdef SUPPORT_JIT + !jit_checked_utf && +#endif + ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid)) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + BOOL skipped_bad_start = FALSE; +#endif + + /* For 8-bit and 16-bit UTF, check that the first code unit is a valid + character start. If we are handling invalid UTF, just skip over such code + units. Otherwise, give an appropriate error. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (allow_invalid) + { + while (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + start_match++; + skipped_bad_start = TRUE; + } + } + else if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; +#if PCRE2_CODE_UNIT_WIDTH == 8 + return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ +#else + return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ +#endif + } +#endif /* WIDTH != 32 */ + + /* The mb->check_subject field points to the start of UTF checking; + lookbehinds can go back no further than this. */ + + mb->check_subject = start_match; + + /* Move back by the maximum lookbehind, just in case it happens at the very + start of matching, but don't do this if we skipped bad 8-bit or 16-bit code + units above. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (!skipped_bad_start) + { + unsigned int i; + for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--) + { + mb->check_subject--; + while (mb->check_subject > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*mb->check_subject & 0xc0) == 0x80) +#else /* 16-bit */ + (*mb->check_subject & 0xfc00) == 0xdc00) +#endif + mb->check_subject--; + } + } +#else /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + mb->check_subject -= re->max_lookbehind; + else + mb->check_subject = subject; +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* Validate the relevant portion of the subject. There's a loop in case we + encounter bad UTF in the characters preceding start_match which we are + scanning because of a lookbehind. */ + + for (;;) + { + match_data->rc = PRIV(valid_utf)(mb->check_subject, + length - (mb->check_subject - subject), &(match_data->startchar)); + + if (match_data->rc == 0) break; /* Valid UTF string */ + + /* Invalid UTF string. Adjust the offset to be an absolute offset in the + whole string. If we are handling invalid UTF strings, set end_subject to + stop before the bad code unit, and set the options to "not end of line". + Otherwise return the error. */ + + match_data->startchar += mb->check_subject - subject; + if (!allow_invalid || match_data->rc > 0) return match_data->rc; + end_subject = subject + match_data->startchar; + + /* If the end precedes start_match, it means there is invalid UTF in the + extra code units we reversed over because of a lookbehind. Advance past the + first bad code unit, and then skip invalid character starting code units in + 8-bit and 16-bit modes, and try again with the original end point. */ + + if (end_subject < start_match) + { + mb->check_subject = end_subject + 1; +#if PCRE2_CODE_UNIT_WIDTH != 32 + while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject)) + mb->check_subject++; +#endif + end_subject = true_end_subject; + } + + /* Otherwise, set the not end of line option, and do the match. */ + + else + { + fragment_options = PCRE2_NOTEOL; + break; + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* A NULL match context means "use a default context", but we take the memory +control functions from the pattern. */ + +if (mcontext == NULL) + { + mcontext = (pcre2_match_context *)(&PRIV(default_match_context)); + mb->memctl = re->memctl; + } +else mb->memctl = mcontext->memctl; + +anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; +firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0; +startline = (re->flags & PCRE2_STARTLINE) != 0; +bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)? + true_end_subject : subject + mcontext->offset_limit; + +/* Initialize and set up the fixed fields in the callout block, with a pointer +in the match block. */ + +mb->cb = &cb; +cb.version = 2; +cb.subject = subject; +cb.subject_length = (PCRE2_SIZE)(end_subject - subject); +cb.callout_flags = 0; + +/* Fill in the remaining fields in the match block, except for moptions, which +gets set later. */ + +mb->callout = mcontext->callout; +mb->callout_data = mcontext->callout_data; + +mb->start_subject = subject; +mb->start_offset = start_offset; +mb->end_subject = end_subject; +mb->true_end_subject = true_end_subject; +mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; +mb->allowemptypartial = (re->max_lookbehind > 0) || + (re->flags & PCRE2_MATCH_EMPTY) != 0; +mb->poptions = re->overall_options; /* Pattern options */ +mb->ignore_skip_arg = 0; +mb->mark = mb->nomatch_mark = NULL; /* In case never set */ + +/* The name table is needed for finding all the numbers associated with a +given name, for condition testing. The code follows the name table. */ + +mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); +mb->name_count = re->name_count; +mb->name_entry_size = re->name_entry_size; +mb->start_code = mb->name_table + re->name_count * re->name_entry_size; + +/* Process the \R and newline settings. */ + +mb->bsr_convention = re->bsr_convention; +mb->nltype = NLTYPE_FIXED; +switch(re->newline_convention) + { + case PCRE2_NEWLINE_CR: + mb->nllen = 1; + mb->nl[0] = CHAR_CR; + break; + + case PCRE2_NEWLINE_LF: + mb->nllen = 1; + mb->nl[0] = CHAR_NL; + break; + + case PCRE2_NEWLINE_NUL: + mb->nllen = 1; + mb->nl[0] = CHAR_NUL; + break; + + case PCRE2_NEWLINE_CRLF: + mb->nllen = 2; + mb->nl[0] = CHAR_CR; + mb->nl[1] = CHAR_NL; + break; + + case PCRE2_NEWLINE_ANY: + mb->nltype = NLTYPE_ANY; + break; + + case PCRE2_NEWLINE_ANYCRLF: + mb->nltype = NLTYPE_ANYCRLF; + break; + + default: return PCRE2_ERROR_INTERNAL; + } + +/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE +vector at the end, whose size depends on the number of capturing parentheses in +the pattern. It is not used at all if there are no capturing parentheses. + + frame_size is the total size of each frame + match_data->heapframes is the pointer to the frames vector + match_data->heapframes_size is the allocated size of the vector + +We must pad the frame_size for alignment to ensure subsequent frames are as +aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE +array, that does not guarantee it is suitably aligned for pointers, as some +architectures have pointers that are larger than a size_t. */ + +frame_size = (offsetof(heapframe, ovector) + + re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) & + ~(HEAPFRAME_ALIGNMENT - 1); + +/* Limits set in the pattern override the match context only if they are +smaller. */ + +mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)? + mcontext->heap_limit : re->limit_heap); + +mb->match_limit = (mcontext->match_limit < re->limit_match)? + mcontext->match_limit : re->limit_match; + +mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? + mcontext->depth_limit : re->limit_depth; + +/* If a pattern has very many capturing parentheses, the frame size may be very +large. Set the initial frame vector size to ensure that there are at least 10 +available frames, but enforce a minimum of START_FRAMES_SIZE. If this is +greater than the heap limit, get as large a vector as possible. */ + +heapframes_size = frame_size * 10; +if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE; +if (heapframes_size / 1024 > mb->heap_limit) + { + PCRE2_SIZE max_size = 1024 * mb->heap_limit; + if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT; + heapframes_size = max_size; + } + +/* If an existing frame vector in the match_data block is large enough, we can +use it. Otherwise, free any pre-existing vector and get a new one. */ + +if (match_data->heapframes_size < heapframes_size) + { + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); + match_data->heapframes = match_data->memctl.malloc(heapframes_size, + match_data->memctl.memory_data); + if (match_data->heapframes == NULL) + { + match_data->heapframes_size = 0; + return PCRE2_ERROR_NOMEMORY; + } + match_data->heapframes_size = heapframes_size; + } + +/* Write to the ovector within the first frame to mark every capture unset and +to avoid uninitialized memory read errors when it is copied to a new frame. */ + +memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff, + frame_size - offsetof(heapframe, ovector)); + +/* Pointers to the individual character tables */ + +mb->lcc = re->tables + lcc_offset; +mb->fcc = re->tables + fcc_offset; +mb->ctypes = re->tables + ctypes_offset; + +/* Set up the first code unit to match, if available. If there's no first code +unit there may be a bitmap of possible first characters. */ + +if ((re->flags & PCRE2_FIRSTSET) != 0) + { + has_first_cu = TRUE; + first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit); + if ((re->flags & PCRE2_FIRSTCASELESS) != 0) + { + first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu); +#else + if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } +else + if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0) + start_bits = re->start_bitmap; + +/* There may also be a "last known required character" set. */ + +if ((re->flags & PCRE2_LASTSET) != 0) + { + has_req_cu = TRUE; + req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit); + if ((re->flags & PCRE2_LASTCASELESS) != 0) + { + req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu); +#else + if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } + + +/* ==========================================================================*/ + +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +the loop runs just once. */ + +#ifdef SUPPORT_UNICODE +FRAGMENT_RESTART: +#endif + +start_partial = match_partial = NULL; +mb->hitend = FALSE; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +memchr_found_first_cu = NULL; +memchr_found_first_cu2 = NULL; +#endif + +for(;;) + { + PCRE2_SPTR new_start_match; + + /* ----------------- Start of match optimizations ---------------- */ + + /* There are some optimizations that avoid running the match if a known + starting point is not found, or if a known later code unit is not present. + However, there is an option (settable at compile time) that disables these, + for testing and for ensuring that all callouts do actually occur. */ + + if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + /* If firstline is TRUE, the start of the match is constrained to the first + line of a multiline string. That is, the match must be before or at the + first newline following the start of matching. Temporarily adjust + end_subject so that we stop the scans for a first code unit at a newline. + If the match fails at the newline, later code breaks the loop. */ + + if (firstline) + { + PCRE2_SPTR t = start_match; +#ifdef SUPPORT_UNICODE + if (utf) + { + while (t < end_subject && !IS_NEWLINE(t)) + { + t++; + ACROSSCHAR(t < end_subject, t, t++); + } + } + else +#endif + while (t < end_subject && !IS_NEWLINE(t)) t++; + end_subject = t; + } + + /* Anchored: check the first code unit if one is recorded. This may seem + pointless but it can help in detecting a no match case without scanning for + the required code unit. */ + + if (anchored) + { + if (has_first_cu || start_bits != NULL) + { + BOOL ok = start_match < end_subject; + if (ok) + { + PCRE2_UCHAR c = UCHAR21TEST(start_match); + ok = has_first_cu && (c == first_cu || c == first_cu2); + if (!ok && start_bits != NULL) + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + ok = (start_bits[c/8] & (1u << (c&7))) != 0; + } + } + if (!ok) + { + rc = MATCH_NOMATCH; + break; + } + } + } + + /* Not anchored. Advance to a unique first code unit if there is one. */ + + else + { + if (has_first_cu) + { + if (first_cu != first_cu2) /* Caseless */ + { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + PCRE2_UCHAR smc; + while (start_match < end_subject && + (smc = UCHAR21TEST(start_match)) != first_cu && + smc != first_cu2) + start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ + + PCRE2_SPTR pp1 = NULL; + PCRE2_SPTR pp2 = NULL; + PCRE2_SIZE searchlength = end_subject - start_match; + + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ + + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) + { + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; + } + + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; + + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) + { + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; + } + + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + + if (pp1 == NULL) + start_match = (pp2 == NULL)? end_subject : pp2; + else + start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; + +#endif /* 8-bit handling */ + } + + /* The caseful case is much simpler. */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (start_match < end_subject && UCHAR21TEST(start_match) != + first_cu) + start_match++; +#else + start_match = memchr(start_match, first_cu, end_subject - start_match); + if (start_match == NULL) start_match = end_subject; +#endif + } + + /* If we can't find the required first code unit, having reached the + true end of the subject, break the bumpalong loop, to force a match + failure, except when doing partial matching, when we let the next cycle + run at the end of the subject. To see why, consider the pattern + /(?<=abc)def/, which partially matches "abc", even though the string + does not contain the starting character "d". If we have not reached the + true end of the subject (PCRE2_FIRSTLINE caused end_subject to be + temporarily modified) we also let the cycle run, because the matching + string is legitimately allowed to start with the first code unit of a + newline. */ + + if (mb->partial == 0 && start_match >= mb->end_subject) + { + rc = MATCH_NOMATCH; + break; + } + } + + /* If there's no first code unit, advance to just after a linebreak for a + multiline match if required. */ + + else if (startline) + { + if (start_match > mb->start_subject + start_offset) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + { + start_match++; + ACROSSCHAR(start_match < end_subject, start_match, start_match++); + } + } + else +#endif + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + start_match++; + + /* If we have just passed a CR and the newline option is ANY or + ANYCRLF, and we are now at a LF, advance the match position by one + more code unit. */ + + if (start_match[-1] == CHAR_CR && + (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) && + start_match < end_subject && + UCHAR21TEST(start_match) == CHAR_NL) + start_match++; + } + } + + /* If there's no first code unit or a requirement for a multiline line + start, advance to a non-unique first code unit if any have been + identified. The bitmap contains only 256 bits. When code units are 16 or + 32 bits wide, all code units greater than 254 set the 255 bit. */ + + else if (start_bits != NULL) + { + while (start_match < end_subject) + { + uint32_t c = UCHAR21TEST(start_match); +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + if ((start_bits[c/8] & (1u << (c&7))) != 0) break; + start_match++; + } + + /* See comment above in first_cu checking about the next few lines. */ + + if (mb->partial == 0 && start_match >= mb->end_subject) + { + rc = MATCH_NOMATCH; + break; + } + } + } /* End first code unit handling */ + + /* Restore fudged end_subject */ + + end_subject = mb->end_subject; + + /* The following two optimizations must be disabled for partial matching. */ + + if (mb->partial == 0) + { + PCRE2_SPTR p; + + /* The minimum matching length is a lower bound; no string of that length + may actually match the pattern. Although the value is, strictly, in + characters, we treat it as code units to avoid spending too much time in + this optimization. */ + + if (end_subject - start_match < re->minlength) + { + rc = MATCH_NOMATCH; + break; + } + + /* If req_cu is set, we know that that code unit must appear in the + subject for the (non-partial) match to succeed. If the first code unit is + set, req_cu must be later in the subject; otherwise the test starts at + the match point. This optimization can save a huge amount of backtracking + in patterns with nested unlimited repeats that aren't going to match. + Writing separate code for caseful/caseless versions makes it go faster, + as does using an autoincrement and backing off on a match. As in the case + of the first code unit, using memchr() in the 8-bit library gives a big + speed up. Unlike the first_cu check above, we do not need to call + memchr() twice in the caseless case because we only need to check for the + presence of the character in either case, not find the first occurrence. + + The search can be skipped if the code unit was found later than the + current starting point in a previous iteration of the bumpalong loop. + + HOWEVER: when the subject string is very, very long, searching to its end + can take a long time, and give bad performance on quite ordinary + anchored patterns. This showed up when somebody was matching something + like /^\d+C/ on a 32-megabyte string... so we don't do this when the + string is sufficiently long, but it's worth searching a lot more for + unanchored patterns. */ + + p = start_match + (has_first_cu? 1:0); + if (has_req_cu && p > req_cu_ptr) + { + PCRE2_SIZE check_length = end_subject - start_match; + + if (check_length < REQ_CU_MAX || + (!anchored && check_length < REQ_CU_MAX * 1000)) + { + if (req_cu != req_cu2) /* Caseless */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + uint32_t pp = UCHAR21INCTEST(p); + if (pp == req_cu || pp == req_cu2) { p--; break; } + } +#else /* 8-bit code units */ + PCRE2_SPTR pp = p; + p = memchr(pp, req_cu, end_subject - pp); + if (p == NULL) + { + p = memchr(pp, req_cu2, end_subject - pp); + if (p == NULL) p = end_subject; + } +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ + } + + /* The caseful case */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + if (UCHAR21INCTEST(p) == req_cu) { p--; break; } + } + +#else /* 8-bit code units */ + p = memchr(p, req_cu, end_subject - p); + if (p == NULL) p = end_subject; +#endif + } + + /* If we can't find the required code unit, break the bumpalong loop, + forcing a match failure. */ + + if (p >= end_subject) + { + rc = MATCH_NOMATCH; + break; + } + + /* If we have found the required code unit, save the point where we + found it, so that we don't search again next time round the bumpalong + loop if the start hasn't yet passed this code unit. */ + + req_cu_ptr = p; + } + } + } + } + + /* ------------ End of start of match optimizations ------------ */ + + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) + { + rc = MATCH_NOMATCH; + break; + } + + /* OK, we can now run the match. If "hitend" is set afterwards, remember the + first starting point for which a partial match was found. */ + + cb.start_match = (PCRE2_SIZE)(start_match - subject); + cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH; + + mb->start_used_ptr = start_match; + mb->last_used_ptr = start_match; +#ifdef SUPPORT_UNICODE + mb->moptions = options | fragment_options; +#else + mb->moptions = options; +#endif + mb->match_call_count = 0; + mb->end_offset_top = 0; + mb->skip_arg_count = 0; + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Calling match()\n"); +#endif + + rc = match(start_match, mb->start_code, re->top_bracket, frame_size, + match_data, mb); + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ match() returned %d\n\n", rc); +#endif + + if (mb->hitend && start_partial == NULL) + { + start_partial = mb->start_used_ptr; + match_partial = start_match; + } + + switch(rc) + { + /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched + the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP + entirely. The only way we can do that is to re-do the match at the same + point, with a flag to force SKIP with an argument to be ignored. Just + treating this case as NOMATCH does not work because it does not check other + alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */ + + case MATCH_SKIP_ARG: + new_start_match = start_match; + mb->ignore_skip_arg = mb->skip_arg_count; + break; + + /* SKIP passes back the next starting point explicitly, but if it is no + greater than the match we have just done, treat it as NOMATCH. */ + + case MATCH_SKIP: + if (mb->verb_skip_ptr > start_match) + { + new_start_match = mb->verb_skip_ptr; + break; + } + /* Fall through */ + + /* NOMATCH and PRUNE advance by one character. THEN at this level acts + exactly like PRUNE. Unset ignore SKIP-with-argument. */ + + case MATCH_NOMATCH: + case MATCH_PRUNE: + case MATCH_THEN: + mb->ignore_skip_arg = 0; + new_start_match = start_match + 1; +#ifdef SUPPORT_UNICODE + if (utf) + ACROSSCHAR(new_start_match < end_subject, new_start_match, + new_start_match++); +#endif + break; + + /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ + + case MATCH_COMMIT: + rc = MATCH_NOMATCH; + goto ENDLOOP; + + /* Any other return is either a match, or some kind of error. */ + + default: + goto ENDLOOP; + } + + /* Control reaches here for the various types of "no match at this point" + result. Reset the code to MATCH_NOMATCH for subsequent checking. */ + + rc = MATCH_NOMATCH; + + /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first + newline in the subject (though it may continue over the newline). Therefore, + if we have just failed to match, starting at a newline, do not continue. */ + + if (firstline && IS_NEWLINE(start_match)) break; + + /* Advance to new matching position */ + + start_match = new_start_match; + + /* Break the loop if the pattern is anchored or if we have passed the end of + the subject. */ + + if (anchored || start_match > end_subject) break; + + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF + or ANY or ANYCRLF, advance the match position by one more code unit. In + normal matching start_match will aways be greater than the first position at + this stage, but a failed *SKIP can cause a return at the same point, which is + why the first test exists. */ + + if (start_match > subject + start_offset && + start_match[-1] == CHAR_CR && + start_match < end_subject && + *start_match == CHAR_NL && + (re->flags & PCRE2_HASCRORLF) == 0 && + (mb->nltype == NLTYPE_ANY || + mb->nltype == NLTYPE_ANYCRLF || + mb->nllen == 2)) + start_match++; + + mb->mark = NULL; /* Reset for start of next match attempt */ + } /* End of for(;;) "bumpalong" loop */ + +/* ==========================================================================*/ + +/* When we reach here, one of the following stopping conditions is true: + +(1) The match succeeded, either completely, or partially; + +(2) The pattern is anchored or the match was failed after (*COMMIT); + +(3) We are past the end of the subject or the bumpalong limit; + +(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because + this option requests that a match occur at or before the first newline in + the subject. + +(5) Some kind of error occurred. + +*/ + +ENDLOOP: + +/* If end_subject != true_end_subject, it means we are handling invalid UTF, +and have just processed a non-terminal fragment. If this resulted in no match +or a partial match we must carry on to the next fragment (a partial match is +returned to the caller only at the very end of the subject). A loop is used to +avoid trying to match against empty fragments; if the pattern can match an +empty string it would have done so already. */ + +#ifdef SUPPORT_UNICODE +if (utf && end_subject != true_end_subject && + (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL)) + { + for (;;) + { + /* Advance past the first bad code unit, and then skip invalid character + starting code units in 8-bit and 16-bit modes. */ + + start_match = end_subject + 1; + +#if PCRE2_CODE_UNIT_WIDTH != 32 + while (start_match < true_end_subject && NOT_FIRSTCU(*start_match)) + start_match++; +#endif + + /* If we have hit the end of the subject, there isn't another non-empty + fragment, so give up. */ + + if (start_match >= true_end_subject) + { + rc = MATCH_NOMATCH; /* In case it was partial */ + match_partial = NULL; + break; + } + + /* Check the rest of the subject */ + + mb->check_subject = start_match; + rc = PRIV(valid_utf)(start_match, length - (start_match - subject), + &(match_data->startchar)); + + /* The rest of the subject is valid UTF. */ + + if (rc == 0) + { + mb->end_subject = end_subject = true_end_subject; + fragment_options = PCRE2_NOTBOL; + goto FRAGMENT_RESTART; + } + + /* A subsequent UTF error has been found; if the next fragment is + non-empty, set up to process it. Otherwise, let the loop advance. */ + + else if (rc < 0) + { + mb->end_subject = end_subject = start_match + match_data->startchar; + if (end_subject > start_match) + { + fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL; + goto FRAGMENT_RESTART; + } + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* Fill in fields that are always returned in the match data. */ + +match_data->code = re; +match_data->mark = mb->mark; +match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER; + +/* Handle a fully successful match. Set the return code to the number of +captured strings, or 0 if there were too many to fit into the ovector, and then +set the remaining returned values before returning. Make a copy of the subject +string if requested. */ + +if (rc == MATCH_MATCH) + { + match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)? + 0 : (int)mb->end_offset_top/2 + 1; + match_data->subject_length = length; + match_data->startchar = start_match - subject; + match_data->leftchar = mb->start_used_ptr - subject; + match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? + mb->last_used_ptr : mb->end_match_ptr) - subject; + if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy((void *)match_data->subject, subject, length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } + else match_data->subject = subject; + + return match_data->rc; + } + +/* Control gets here if there has been a partial match, an error, or if the +overall match attempt has failed at all permitted starting positions. Any mark +data is in the nomatch_mark field. */ + +match_data->mark = mb->nomatch_mark; + +/* For anything other than nomatch or partial match, just return the code. */ + +if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc; + +/* Handle a partial match. If a "soft" partial match was requested, searching +for a complete match will have continued, and the value of rc at this point +will be MATCH_NOMATCH. For a "hard" partial match, it will already be +PCRE2_ERROR_PARTIAL. */ + +else if (match_partial != NULL) + { + match_data->subject = subject; + match_data->subject_length = length; + match_data->ovector[0] = match_partial - subject; + match_data->ovector[1] = end_subject - subject; + match_data->startchar = match_partial - subject; + match_data->leftchar = start_partial - subject; + match_data->rightchar = end_subject - subject; + match_data->rc = PCRE2_ERROR_PARTIAL; + } + +/* Else this is the classic nomatch case. */ + +else match_data->rc = PCRE2_ERROR_NOMATCH; + +return match_data->rc; +} + +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + +/* End of pcre2_match.c */ diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c new file mode 100644 index 0000000..757dab9 --- /dev/null +++ b/src/pcre2_match_data.c @@ -0,0 +1,185 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + + +/************************************************* +* Create a match data block given ovector size * +*************************************************/ + +/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also +imposed because the oveccount field in a match data block is uintt6_t. */ + +PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION +pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) +{ +pcre2_match_data *yield; +if (oveccount < 1) oveccount = 1; +if (oveccount > UINT16_MAX) oveccount = UINT16_MAX; +yield = PRIV(memctl_malloc)( + offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE), + (pcre2_memctl *)gcontext); +if (yield == NULL) return NULL; +yield->oveccount = oveccount; +yield->flags = 0; +yield->heapframes = NULL; +yield->heapframes_size = 0; +return yield; +} + + + +/************************************************* +* Create a match data block using pattern data * +*************************************************/ + +/* If no context is supplied, use the memory allocator from the code. */ + +PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION +pcre2_match_data_create_from_pattern(const pcre2_code *code, + pcre2_general_context *gcontext) +{ +if (gcontext == NULL) gcontext = (pcre2_general_context *)code; +return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, + gcontext); +} + + + +/************************************************* +* Free a match data block * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_match_data_free(pcre2_match_data *match_data) +{ +if (match_data != NULL) + { + if (match_data->heapframes != NULL) + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); + if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->memctl.free(match_data, match_data->memctl.memory_data); + } +} + + + +/************************************************* +* Get last mark in match * +*************************************************/ + +PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION +pcre2_get_mark(pcre2_match_data *match_data) +{ +return match_data->mark; +} + + + +/************************************************* +* Get pointer to ovector * +*************************************************/ + +PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION +pcre2_get_ovector_pointer(pcre2_match_data *match_data) +{ +return match_data->ovector; +} + + + +/************************************************* +* Get number of ovector slots * +*************************************************/ + +PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION +pcre2_get_ovector_count(pcre2_match_data *match_data) +{ +return match_data->oveccount; +} + + + +/************************************************* +* Get starting code unit in match * +*************************************************/ + +PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION +pcre2_get_startchar(pcre2_match_data *match_data) +{ +return match_data->startchar; +} + + + +/************************************************* +* Get size of match data block * +*************************************************/ + +PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION +pcre2_get_match_data_size(pcre2_match_data *match_data) +{ +return offsetof(pcre2_match_data, ovector) + + 2 * (match_data->oveccount) * sizeof(PCRE2_SIZE); +} + + + +/************************************************* +* Get heapframes size * +*************************************************/ + +PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION +pcre2_get_match_data_heapframes_size(pcre2_match_data *match_data) +{ +return match_data->heapframes_size; +} + +/* End of pcre2_match_data.c */ diff --git a/src/pcre2_newline.c b/src/pcre2_newline.c new file mode 100644 index 0000000..6e9366d --- /dev/null +++ b/src/pcre2_newline.c @@ -0,0 +1,243 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains internal functions for testing newlines when more than +one kind of newline is to be recognized. When a newline is found, its length is +returned. In principle, we could implement several newline "types", each +referring to a different set of newline characters. At present, PCRE2 supports +only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, +and NLTYPE_ANY. The full list of Unicode newline characters is taken from +http://unicode.org/unicode/reports/tr18/. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + + +/************************************************* +* Check for newline at given position * +*************************************************/ + +/* This function is called only via the IS_NEWLINE macro, which does so only +when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed +newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit +pointed to by ptr is less than the end of the string. + +Arguments: + ptr pointer to possible newline + type the newline type + endptr pointer to the end of the string + lenptr where to return the length + utf TRUE if in utf mode + +Returns: TRUE or FALSE +*/ + +BOOL +PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, + uint32_t *lenptr, BOOL utf) +{ +uint32_t c; + +#ifdef SUPPORT_UNICODE +if (utf) { GETCHAR(c, ptr); } else c = *ptr; +#else +(void)utf; +c = *ptr; +#endif /* SUPPORT_UNICODE */ + +if (type == NLTYPE_ANYCRLF) switch(c) + { + case CHAR_LF: + *lenptr = 1; + return TRUE; + + case CHAR_CR: + *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; + return TRUE; + + default: + return FALSE; + } + +/* NLTYPE_ANY */ + +else switch(c) + { +#ifdef EBCDIC + case CHAR_NEL: +#endif + case CHAR_LF: + case CHAR_VT: + case CHAR_FF: + *lenptr = 1; + return TRUE; + + case CHAR_CR: + *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; + return TRUE; + +#ifndef EBCDIC +#if PCRE2_CODE_UNIT_WIDTH == 8 + case CHAR_NEL: + *lenptr = utf? 2 : 1; + return TRUE; + + case 0x2028: /* LS */ + case 0x2029: /* PS */ + *lenptr = 3; + return TRUE; + +#else /* 16-bit or 32-bit code units */ + case CHAR_NEL: + case 0x2028: /* LS */ + case 0x2029: /* PS */ + *lenptr = 1; + return TRUE; +#endif +#endif /* Not EBCDIC */ + + default: + return FALSE; + } +} + + + +/************************************************* +* Check for newline at previous position * +*************************************************/ + +/* This function is called only via the WAS_NEWLINE macro, which does so only +when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed +newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial +value of ptr is greater than the start of the string that is being processed. + +Arguments: + ptr pointer to possible newline + type the newline type + startptr pointer to the start of the string + lenptr where to return the length + utf TRUE if in utf mode + +Returns: TRUE or FALSE +*/ + +BOOL +PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, + uint32_t *lenptr, BOOL utf) +{ +uint32_t c; +ptr--; + +#ifdef SUPPORT_UNICODE +if (utf) + { + BACKCHAR(ptr); + GETCHAR(c, ptr); + } +else c = *ptr; +#else +(void)utf; +c = *ptr; +#endif /* SUPPORT_UNICODE */ + +if (type == NLTYPE_ANYCRLF) switch(c) + { + case CHAR_LF: + *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; + return TRUE; + + case CHAR_CR: + *lenptr = 1; + return TRUE; + + default: + return FALSE; + } + +/* NLTYPE_ANY */ + +else switch(c) + { + case CHAR_LF: + *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; + return TRUE; + +#ifdef EBCDIC + case CHAR_NEL: +#endif + case CHAR_VT: + case CHAR_FF: + case CHAR_CR: + *lenptr = 1; + return TRUE; + +#ifndef EBCDIC +#if PCRE2_CODE_UNIT_WIDTH == 8 + case CHAR_NEL: + *lenptr = utf? 2 : 1; + return TRUE; + + case 0x2028: /* LS */ + case 0x2029: /* PS */ + *lenptr = 3; + return TRUE; + +#else /* 16-bit or 32-bit code units */ + case CHAR_NEL: + case 0x2028: /* LS */ + case 0x2029: /* PS */ + *lenptr = 1; + return TRUE; +#endif +#endif /* Not EBCDIC */ + + default: + return FALSE; + } +} + +/* End of pcre2_newline.c */ diff --git a/src/pcre2_ord2utf.c b/src/pcre2_ord2utf.c new file mode 100644 index 0000000..1403730 --- /dev/null +++ b/src/pcre2_ord2utf.c @@ -0,0 +1,120 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This file contains a function that converts a Unicode character code point +into a UTF string. The behaviour is different for each code unit width. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/* If SUPPORT_UNICODE is not defined, this function will never be called. +Supply a dummy function because some compilers do not like empty source +modules. */ + +#ifndef SUPPORT_UNICODE +unsigned int +PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) +{ +(void)(cvalue); +(void)(buffer); +return 0; +} +#else /* SUPPORT_UNICODE */ + + +/************************************************* +* Convert code point to UTF * +*************************************************/ + +/* +Arguments: + cvalue the character value + buffer pointer to buffer for result + +Returns: number of code units placed in the buffer +*/ + +unsigned int +PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) +{ +/* Convert to UTF-8 */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +int i, j; +for (i = 0; i < PRIV(utf8_table1_size); i++) + if ((int)cvalue <= PRIV(utf8_table1)[i]) break; +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*buffer = PRIV(utf8_table2)[i] | cvalue; +return i + 1; + +/* Convert to UTF-16 */ + +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (cvalue <= 0xffff) + { + *buffer = (PCRE2_UCHAR)cvalue; + return 1; + } +cvalue -= 0x10000; +*buffer++ = 0xd800 | (cvalue >> 10); +*buffer = 0xdc00 | (cvalue & 0x3ff); +return 2; + +/* Convert to UTF-32 */ + +#else +*buffer = (PCRE2_UCHAR)cvalue; +return 1; +#endif +} +#endif /* SUPPORT_UNICODE */ + +/* End of pcre_ord2utf.c */ diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c new file mode 100644 index 0000000..a29f5ef --- /dev/null +++ b/src/pcre2_pattern_info.c @@ -0,0 +1,432 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/************************************************* +* Return info about compiled pattern * +*************************************************/ + +/* +Arguments: + code points to compiled code + what what information is required + where where to put the information; if NULL, return length + +Returns: 0 when data returned + > 0 when length requested + < 0 on error or unset value +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) +{ +const pcre2_real_code *re = (pcre2_real_code *)code; + +if (where == NULL) /* Requests field length */ + { + switch(what) + { + case PCRE2_INFO_ALLOPTIONS: + case PCRE2_INFO_ARGOPTIONS: + case PCRE2_INFO_BACKREFMAX: + case PCRE2_INFO_BSR: + case PCRE2_INFO_CAPTURECOUNT: + case PCRE2_INFO_DEPTHLIMIT: + case PCRE2_INFO_EXTRAOPTIONS: + case PCRE2_INFO_FIRSTCODETYPE: + case PCRE2_INFO_FIRSTCODEUNIT: + case PCRE2_INFO_HASBACKSLASHC: + case PCRE2_INFO_HASCRORLF: + case PCRE2_INFO_HEAPLIMIT: + case PCRE2_INFO_JCHANGED: + case PCRE2_INFO_LASTCODETYPE: + case PCRE2_INFO_LASTCODEUNIT: + case PCRE2_INFO_MATCHEMPTY: + case PCRE2_INFO_MATCHLIMIT: + case PCRE2_INFO_MAXLOOKBEHIND: + case PCRE2_INFO_MINLENGTH: + case PCRE2_INFO_NAMEENTRYSIZE: + case PCRE2_INFO_NAMECOUNT: + case PCRE2_INFO_NEWLINE: + return sizeof(uint32_t); + + case PCRE2_INFO_FIRSTBITMAP: + return sizeof(const uint8_t *); + + case PCRE2_INFO_JITSIZE: + case PCRE2_INFO_SIZE: + case PCRE2_INFO_FRAMESIZE: + return sizeof(size_t); + + case PCRE2_INFO_NAMETABLE: + return sizeof(PCRE2_SPTR); + } + } + +if (re == NULL) return PCRE2_ERROR_NULL; + +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE2_ERROR_BADMAGIC. */ + +if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + +/* Check that this pattern was compiled in the correct bit mode */ + +if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; + +switch(what) + { + case PCRE2_INFO_ALLOPTIONS: + *((uint32_t *)where) = re->overall_options; + break; + + case PCRE2_INFO_ARGOPTIONS: + *((uint32_t *)where) = re->compile_options; + break; + + case PCRE2_INFO_BACKREFMAX: + *((uint32_t *)where) = re->top_backref; + break; + + case PCRE2_INFO_BSR: + *((uint32_t *)where) = re->bsr_convention; + break; + + case PCRE2_INFO_CAPTURECOUNT: + *((uint32_t *)where) = re->top_bracket; + break; + + case PCRE2_INFO_DEPTHLIMIT: + *((uint32_t *)where) = re->limit_depth; + if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET; + break; + + case PCRE2_INFO_EXTRAOPTIONS: + *((uint32_t *)where) = re->extra_options; + break; + + case PCRE2_INFO_FIRSTCODETYPE: + *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : + ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; + break; + + case PCRE2_INFO_FIRSTCODEUNIT: + *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? + re->first_codeunit : 0; + break; + + case PCRE2_INFO_FIRSTBITMAP: + *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)? + &(re->start_bitmap[0]) : NULL; + break; + + case PCRE2_INFO_FRAMESIZE: + *((size_t *)where) = offsetof(heapframe, ovector) + + re->top_bracket * 2 * sizeof(PCRE2_SIZE); + break; + + case PCRE2_INFO_HASBACKSLASHC: + *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; + break; + + case PCRE2_INFO_HASCRORLF: + *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; + break; + + case PCRE2_INFO_HEAPLIMIT: + *((uint32_t *)where) = re->limit_heap; + if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET; + break; + + case PCRE2_INFO_JCHANGED: + *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0; + break; + + case PCRE2_INFO_JITSIZE: +#ifdef SUPPORT_JIT + *((size_t *)where) = (re->executable_jit != NULL)? + PRIV(jit_get_size)(re->executable_jit) : 0; +#else + *((size_t *)where) = 0; +#endif + break; + + case PCRE2_INFO_LASTCODETYPE: + *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0; + break; + + case PCRE2_INFO_LASTCODEUNIT: + *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? + re->last_codeunit : 0; + break; + + case PCRE2_INFO_MATCHEMPTY: + *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0; + break; + + case PCRE2_INFO_MATCHLIMIT: + *((uint32_t *)where) = re->limit_match; + if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; + break; + + case PCRE2_INFO_MAXLOOKBEHIND: + *((uint32_t *)where) = re->max_lookbehind; + break; + + case PCRE2_INFO_MINLENGTH: + *((uint32_t *)where) = re->minlength; + break; + + case PCRE2_INFO_NAMEENTRYSIZE: + *((uint32_t *)where) = re->name_entry_size; + break; + + case PCRE2_INFO_NAMECOUNT: + *((uint32_t *)where) = re->name_count; + break; + + case PCRE2_INFO_NAMETABLE: + *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); + break; + + case PCRE2_INFO_NEWLINE: + *((uint32_t *)where) = re->newline_convention; + break; + + case PCRE2_INFO_SIZE: + *((size_t *)where) = re->blocksize; + break; + + default: return PCRE2_ERROR_BADOPTION; + } + +return 0; +} + + + +/************************************************* +* Callout enumerator * +*************************************************/ + +/* +Arguments: + code points to compiled code + callback function called for each callout block + callout_data user data passed to the callback + +Returns: 0 when successfully completed + < 0 on local error + != 0 for callback error +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) +{ +pcre2_real_code *re = (pcre2_real_code *)code; +pcre2_callout_enumerate_block cb; +PCRE2_SPTR cc; +#ifdef SUPPORT_UNICODE +BOOL utf; +#endif + +if (re == NULL) return PCRE2_ERROR_NULL; + +#ifdef SUPPORT_UNICODE +utf = (re->overall_options & PCRE2_UTF) != 0; +#endif + +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE2_ERROR_BADMAGIC. */ + +if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + +/* Check that this pattern was compiled in the correct bit mode */ + +if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; + +cb.version = 0; +cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + + re->name_count * re->name_entry_size; + +while (TRUE) + { + int rc; + switch (*cc) + { + case OP_END: + return 0; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + cc += PRIV(OP_lengths)[*cc]; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + cc += PRIV(OP_lengths)[*cc]; +#ifdef SUPPORT_UNICODE + if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2; +#endif + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + cc += GET(cc, 1); + break; +#endif + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + cc += PRIV(OP_lengths)[*cc] + cc[1]; + break; + + case OP_CALLOUT: + cb.pattern_position = GET(cc, 1); + cb.next_item_length = GET(cc, 1 + LINK_SIZE); + cb.callout_number = cc[1 + 2*LINK_SIZE]; + cb.callout_string_offset = 0; + cb.callout_string_length = 0; + cb.callout_string = NULL; + rc = callback(&cb, callout_data); + if (rc != 0) return rc; + cc += PRIV(OP_lengths)[*cc]; + break; + + case OP_CALLOUT_STR: + cb.pattern_position = GET(cc, 1); + cb.next_item_length = GET(cc, 1 + LINK_SIZE); + cb.callout_number = 0; + cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE); + cb.callout_string_length = + GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2; + cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1; + rc = callback(&cb, callout_data); + if (rc != 0) return rc; + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + + default: + cc += PRIV(OP_lengths)[*cc]; + break; + } + } +} + +/* End of pcre2_pattern_info.c */ diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c new file mode 100644 index 0000000..870e283 --- /dev/null +++ b/src/pcre2_printint.c @@ -0,0 +1,886 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains a PCRE private debugging function for printing out the +internal form of a compiled regular expression, along with some supporting +local functions. This source file is #included in pcre2test.c at each supported +code unit width, with PCRE2_SUFFIX set appropriately, just like the functions +that comprise the library. It can also optionally be included in +pcre2_compile.c for detailed debugging in error situations. */ + + +/* Tables of operator names. The same 8-bit table is used for all code unit +widths, so it must be defined only once. The list itself is defined in +pcre2_internal.h, which is #included by pcre2test before this file. */ + +#ifndef OP_LISTS_DEFINED +static const char *OP_names[] = { OP_NAME_LIST }; +#define OP_LISTS_DEFINED +#endif + +/* The functions and tables herein must all have mode-dependent names. */ + +#define OP_lengths PCRE2_SUFFIX(OP_lengths_) +#define get_ucpname PCRE2_SUFFIX(get_ucpname_) +#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) +#define print_char PCRE2_SUFFIX(print_char_) +#define print_custring PCRE2_SUFFIX(print_custring_) +#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_) +#define print_prop PCRE2_SUFFIX(print_prop_) + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in pcre2_internal.h. +The contents of the table are, however, mode-dependent. */ + +static const uint8_t OP_lengths[] = { OP_LENGTHS }; + + + +/************************************************* +* Print one character from a string * +*************************************************/ + +/* In UTF mode the character may occupy more than one code unit. + +Arguments: + f file to write to + ptr pointer to first code unit of the character + utf TRUE if string is UTF (will be FALSE if UTF is not supported) + +Returns: number of additional code units used +*/ + +static unsigned int +print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf) +{ +uint32_t c = *ptr; +BOOL one_code_unit = !utf; + +/* If UTF is supported and requested, check for a valid single code unit. */ + +#ifdef SUPPORT_UNICODE +if (utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + one_code_unit = c < 0x80; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + one_code_unit = (c & 0xfc00) != 0xd800; +#else + one_code_unit = (c & 0xfffff800u) != 0xd800u; +#endif /* CODE_UNIT_WIDTH */ + } +#endif /* SUPPORT_UNICODE */ + +/* Handle a valid one-code-unit character at any width. */ + +if (one_code_unit) + { + if (PRINTABLE(c)) fprintf(f, "%c", (char)c); + else if (c < 0x80) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%02x}", c); + return 0; + } + +/* Code for invalid UTF code units and multi-unit UTF characters is different +for each width. If UTF is not supported, control should never get here, but we +need a return statement to keep the compiler happy. */ + +#ifndef SUPPORT_UNICODE +return 0; +#else + +/* Malformed UTF-8 should occur only if the sanity check has been turned off. +Rather than swallow random bytes, just stop if we hit a bad one. Print it with +\X instead of \x as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +if ((c & 0xc0) != 0xc0) + { + fprintf(f, "\\X{%x}", c); /* Invalid starting byte */ + return 0; + } +else + { + int i; + int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ + int s = 6*a; + c = (c & PRIV(utf8_table3)[a]) << s; + for (i = 1; i <= a; i++) + { + if ((ptr[i] & 0xc0) != 0x80) + { + fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */ + return i - 1; + } + s -= 6; + c |= (ptr[i] & 0x3f) << s; + } + fprintf(f, "\\x{%x}", c); + return a; +} +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + +/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one. +Print it with \X instead of \x as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 16 +if ((ptr[1] & 0xfc00) != 0xdc00) + { + fprintf(f, "\\X{%x}", c); + return 0; + } +c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; +fprintf(f, "\\x{%x}", c); +return 1; +#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */ + +/* For UTF-32 we get here only for a malformed code unit, which should only +occur if the sanity check has been turned off. Print it with \X instead of \x +as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +fprintf(f, "\\X{%x}", c); +return 0; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ +#endif /* SUPPORT_UNICODE */ +} + + + +/************************************************* +* Print string as a list of code units * +*************************************************/ + +/* These take no account of UTF as they always print each individual code unit. +The string is zero-terminated for print_custring(); the length is given for +print_custring_bylen(). + +Arguments: + f file to write to + ptr point to the string + len length for print_custring_bylen() + +Returns: nothing +*/ + +static void +print_custring(FILE *f, PCRE2_SPTR ptr) +{ +while (*ptr != '\0') + { + uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + +static void +print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len) +{ +for (; len > 0; len--) + { + uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + + + +/************************************************* +* Find Unicode property name * +*************************************************/ + +/* When there is no UTF/UCP support, the table of names does not exist. This +function should not be called in such configurations, because a pattern that +tries to use Unicode properties won't compile. Rather than put lots of #ifdefs +into the main code, however, we just put one into this function. + +Now that the table contains both full names and their abbreviations, we do some +fiddling to try to get the full name, which is either the longer of two found +names, or a 3-character script name. */ + +static const char * +get_ucpname(unsigned int ptype, unsigned int pvalue) +{ +#ifdef SUPPORT_UNICODE +int count = 0; +const char *yield = "??"; +size_t len = 0; +unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype; + +for (int i = PRIV(utt_size) - 1; i >= 0; i--) + { + const ucp_type_table *u = PRIV(utt) + i; + + if ((ptype == u->type || ptypex == u->type) && pvalue == u->value) + { + const char *s = PRIV(utt_names) + u->name_offset; + size_t sl = strlen(s); + + if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX)) + { + yield = s; + break; + } + + if (sl > len) + { + yield = s; + len = sl; + } + + if (++count >= 2) break; + } + } + +return yield; + +#else /* No UTF support */ +(void)ptype; +(void)pvalue; +return "??"; +#endif /* SUPPORT_UNICODE */ +} + + + +/************************************************* +* Print Unicode property value * +*************************************************/ + +/* "Normal" properties can be printed from tables. The PT_CLIST property is a +pseudo-property that contains a pointer to a list of case-equivalent +characters. + +Arguments: + f file to write to + code pointer in the compiled code + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after) +{ +if (code[1] != PT_CLIST) + { + const char *sc = (code[1] == PT_SC)? "script:" : ""; + const char *s = get_ucpname(code[1], code[2]); + fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after); + } +else + { + const uint32_t *p = PRIV(ucd_caseless_sets) + code[2]; + fprintf (f, "%s%sclist", before, (*code == OP_PROP)? "" : "not "); + while (*p < NOTACHAR) fprintf(f, " %04x", *p++); + fprintf(f, "%s", after); + } +} + + + +/************************************************* +* Print compiled pattern * +*************************************************/ + +/* The print_lengths flag controls whether offsets and lengths of items are +printed. Lenths can be turned off from pcre2test so that automatic tests on +bytecode can be written that do not depend on the value of LINK_SIZE. + +Arguments: + re a compiled pattern + f the file to write to + print_lengths show various lengths + +Returns: nothing +*/ + +static void +pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths) +{ +PCRE2_SPTR codestart, nametable, code; +uint32_t nesize = re->name_entry_size; +BOOL utf = (re->overall_options & PCRE2_UTF) != 0; + +nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); +code = codestart = nametable + re->name_count * re->name_entry_size; + +for(;;) + { + PCRE2_SPTR ccode; + uint32_t c; + int i; + const char *flag = " "; + unsigned int extra = 0; + + if (print_lengths) + fprintf(f, "%3d ", (int)(code - codestart)); + else + fprintf(f, " "); + + switch(*code) + { +/* ========================================================================== */ + /* These cases are never obeyed. This is a fudge that causes a compile- + time error if the vectors OP_names or OP_lengths, which are indexed + by opcode, are not the correct length. It seems to be the only way to do + such a check at compile time, as the sizeof() operator does not work in + the C preprocessor. */ + + case OP_TABLE_LENGTH: + case OP_TABLE_LENGTH + + ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && + (sizeof(OP_lengths) == OP_TABLE_LENGTH)): + return; +/* ========================================================================== */ + + case OP_END: + fprintf(f, " %s\n", OP_names[*code]); + fprintf(f, "------------------------------------------------------------------\n"); + return; + + case OP_CHAR: + fprintf(f, " "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHAR); + fprintf(f, "\n"); + continue; + + case OP_CHARI: + fprintf(f, " /i "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHARI); + fprintf(f, "\n"); + continue; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); + break; + + case OP_BRA: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_ALT: + case OP_KET: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_COND: + case OP_SCOND: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_REVERSE: + if (print_lengths) fprintf(f, "%3d ", GET2(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_VREVERSE: + if (print_lengths) fprintf(f, "%3d %d ", GET2(code, 1), + GET2(code, 1 + IMM2_SIZE)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_CLOSE: + fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); + break; + + case OP_CREF: + fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); + break; + + case OP_DNCREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s Cond ref <", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_RREF: + c = GET2(code, 1); + if (c == RREF_ANY) + fprintf(f, " Cond recurse any"); + else + fprintf(f, " Cond recurse %d", c); + break; + + case OP_DNRREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s Cond recurse <", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_FALSE: + fprintf(f, " Cond false"); + break; + + case OP_TRUE: + fprintf(f, " Cond true"); + break; + + case OP_STARI: + case OP_MINSTARI: + case OP_POSSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + flag = "/i"; + /* Fall through */ + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + fprintf(f, " %s ", flag); + + if (*code >= OP_TYPESTAR) + { + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) + { + print_prop(f, code + 1, "", " "); + extra = 2; + } + else fprintf(f, "%s", OP_names[code[1]]); + } + else extra = print_char(f, code+1, utf); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_EXACTI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_POSUPTOI: + flag = "/i"; + /* Fall through */ + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + fprintf(f, " %s ", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "{"); + if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); + else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); + break; + + case OP_TYPEEXACT: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + { + print_prop(f, code + IMM2_SIZE + 1, " ", " "); + extra = 2; + } + else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]); + fprintf(f, "{"); + if (*code != OP_TYPEEXACT) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); + else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); + break; + + case OP_NOTI: + flag = "/i"; + /* Fall through */ + case OP_NOT: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]"); + break; + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPOSSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + flag = "/i"; + /* Fall through */ + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPOSSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]%s", OP_names[*code]); + break; + + case OP_NOTEXACTI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTPOSUPTOI: + flag = "/i"; + /* Fall through */ + + case OP_NOTEXACT: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTPOSUPTO: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "]{"); + if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); + else + if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); + break; + + case OP_RECURSE: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_REFI: + flag = "/i"; + /* Fall through */ + case OP_REF: + fprintf(f, " %s \\%d", flag, GET2(code,1)); + ccode = code + OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_DNREFI: + flag = "/i"; + /* Fall through */ + case OP_DNREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s \\k<", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + ccode = code + OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_CALLOUT: + fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE], + GET(code, 1), GET(code, 1 + LINK_SIZE)); + break; + + case OP_CALLOUT_STR: + c = code[1 + 4*LINK_SIZE]; + fprintf(f, " %s %c", OP_names[*code], c); + extra = GET(code, 1 + 2*LINK_SIZE); + print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE); + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + if (c == PRIV(callout_start_delims)[i]) + { + c = PRIV(callout_end_delims)[i]; + break; + } + fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1), + GET(code, 1 + LINK_SIZE)); + break; + + case OP_PROP: + case OP_NOTPROP: + print_prop(f, code, " ", ""); + break; + + /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm + in having this code always here, and it makes it less messy without all + those #ifdefs. */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + BOOL printmap, invertmap; + + fprintf(f, " ["); + + /* Negative XCLASS has an inverted map whereas the original opcodes have + already done the inversion. */ + + invertmap = FALSE; + if (*code == OP_XCLASS) + { + extra = GET(code, 1); + ccode = code + LINK_SIZE + 1; + printmap = (*ccode & XCL_MAP) != 0; + if ((*ccode & XCL_NOT) != 0) + { + invertmap = (*ccode & XCL_HASPROP) == 0; + fprintf(f, "^"); + } + ccode++; + } + else /* CLASS or NCLASS */ + { + printmap = TRUE; + ccode = code + 1; + } + + /* Print a bit map */ + + if (printmap) + { + uint8_t inverted_map[32]; + uint8_t *map = (uint8_t *)ccode; + + if (invertmap) + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i]; + map = inverted_map; + } + + for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1u << (i&7))) != 0) + { + int j; + for (j = i+1; j < 256; j++) + if ((map[j/8] & (1u << (j&7))) == 0) break; + if (i == '-' || i == ']') fprintf(f, "\\"); + if (PRINTABLE(i)) fprintf(f, "%c", i); + else fprintf(f, "\\x%02x", i); + if (--j > i) + { + if (j != i + 1) fprintf(f, "-"); + if (j == '-' || j == ']') fprintf(f, "\\"); + if (PRINTABLE(j)) fprintf(f, "%c", j); + else fprintf(f, "\\x%02x", j); + } + i = j; + } + } + ccode += 32 / sizeof(PCRE2_UCHAR); + } + } + + /* For an XCLASS there is always some additional data */ + + if (*code == OP_XCLASS) + { + PCRE2_UCHAR ch; + while ((ch = *ccode++) != XCL_END) + { + const char *notch = ""; + + switch(ch) + { + case XCL_NOTPROP: + notch = "^"; + /* Fall through */ + + case XCL_PROP: + { + unsigned int ptype = *ccode++; + unsigned int pvalue = *ccode++; + const char *s; + + switch(ptype) + { + case PT_PXGRAPH: + fprintf(f, "[:%sgraph:]", notch); + break; + + case PT_PXPRINT: + fprintf(f, "[:%sprint:]", notch); + break; + + case PT_PXPUNCT: + fprintf(f, "[:%spunct:]", notch); + break; + + case PT_PXXDIGIT: + fprintf(f, "[:%sxdigit:]", notch); + break; + + default: + s = get_ucpname(ptype, pvalue); + fprintf(f, "\\%c{%c%s}", ((notch[0] == '^')? 'P':'p'), + toupper(s[0]), s+1); + break; + } + } + break; + + default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } + break; + } + } + } + + /* Indicate a non-UTF class which was created by negation */ + + fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); + + /* Handle repeats after a class or a back reference */ + + CLASS_REF_REPEAT: + switch(*ccode) + { + unsigned int min, max; + + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fprintf(f, "%s", OP_names[*ccode]); + extra += OP_lengths[*ccode]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + min = GET2(ccode,1); + max = GET2(ccode,1 + IMM2_SIZE); + if (max == 0) fprintf(f, "{%u,}", min); + else fprintf(f, "{%u,%u}", min, max); + if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); + else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+"); + extra += OP_lengths[*ccode]; + break; + + /* Do nothing if it's not a repeat; this code stops picky compilers + warning about the lack of a default code path. */ + + default: + break; + } + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + fprintf(f, " %s ", OP_names[*code]); + print_custring_bylen(f, code + 2, code[1]); + extra += code[1]; + break; + + case OP_THEN: + fprintf(f, " %s", OP_names[*code]); + break; + + case OP_CIRCM: + case OP_DOLLM: + flag = "/m"; + /* Fall through */ + + /* Anything else is just an item with no data, but possibly a flag. */ + + default: + fprintf(f, " %s %s", flag, OP_names[*code]); + break; + } + + code += OP_lengths[*code] + extra; + fprintf(f, "\n"); + } +} + +/* End of pcre2_printint.c */ diff --git a/src/pcre2_script_run.c b/src/pcre2_script_run.c new file mode 100644 index 0000000..4926fa6 --- /dev/null +++ b/src/pcre2_script_run.c @@ -0,0 +1,344 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains the function for checking a script run. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/************************************************* +* Check script run * +*************************************************/ + +/* A script run is conceptually a sequence of characters all in the same +Unicode script. However, it isn't quite that simple. There are special rules +for scripts that are commonly used together, and also special rules for digits. +This function implements the appropriate checks, which is possible only when +PCRE2 is compiled with Unicode support. The function returns TRUE if there is +no Unicode support; however, it should never be called in that circumstance +because an error is given by pcre2_compile() if a script run is called for in a +version of PCRE2 compiled without Unicode support. + +Arguments: + pgr point to the first character + endptr point after the last character + utf TRUE if in UTF mode + +Returns: TRUE if this is a valid script run +*/ + +/* These are states in the checking process. */ + +enum { SCRIPT_UNSET, /* Requirement as yet unknown */ + SCRIPT_MAP, /* Bitmap contains acceptable scripts */ + SCRIPT_HANPENDING, /* Have had only Han characters */ + SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */ + SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */ + SCRIPT_HANHANGUL /* Expect Han or Hangul */ + }; + +#define UCD_MAPSIZE (ucp_Unknown/32 + 1) +#define FULL_MAPSIZE (ucp_Script_Count/32 + 1) + +BOOL +PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf) +{ +#ifdef SUPPORT_UNICODE +uint32_t require_state = SCRIPT_UNSET; +uint32_t require_map[FULL_MAPSIZE]; +uint32_t map[FULL_MAPSIZE]; +uint32_t require_digitset = 0; +uint32_t c; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +(void)utf; /* Avoid compiler warning */ +#endif + +/* Any string containing fewer than 2 characters is a valid script run. */ + +if (ptr >= endptr) return TRUE; +GETCHARINCTEST(c, ptr); +if (ptr >= endptr) return TRUE; + +/* Initialize the require map. This is a full-size bitmap that has a bit for +every script, as opposed to the maps in ucd_script_sets, which only have bits +for scripts less than ucp_Unknown - those that appear in script extension +lists. */ + +for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0; + +/* Scan strings of two or more characters, checking the Unicode characteristics +of each code point. There is special code for scripts that can be combined with +characters from the Han Chinese script. This may be used in conjunction with +four other scripts in these combinations: + +. Han with Hiragana and Katakana is allowed (for Japanese). +. Han with Bopomofo is allowed (for Taiwanese Mandarin). +. Han with Hangul is allowed (for Korean). + +If the first significant character's script is one of the four, the required +script type is immediately known. However, if the first significant +character's script is Han, we have to keep checking for a non-Han character. +Hence the SCRIPT_HANPENDING state. */ + +for (;;) + { + const ucd_record *ucd = GET_UCD(c); + uint32_t script = ucd->script; + + /* If the script is Unknown, the string is not a valid script run. Such + characters can only form script runs of length one (see test above). */ + + if (script == ucp_Unknown) return FALSE; + + /* A character without any script extensions whose script is Inherited or + Common is always accepted with any script. If there are extensions, the + following processing happens for all scripts. */ + + if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common)) + { + BOOL OK; + + /* Set up a full-sized map for this character that can include bits for all + scripts. Copy the scriptx map for this character (which covers those + scripts that appear in script extension lists), set the remaining values to + zero, and then, except for Common or Inherited, add this script's bit to + the map. */ + + memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t)); + memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t)); + if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script); + + /* Handle the different checking states */ + + switch(require_state) + { + /* First significant character - it might follow Common or Inherited + characters that do not have any script extensions. */ + + case SCRIPT_UNSET: + switch(script) + { + case ucp_Han: + require_state = SCRIPT_HANPENDING; + break; + + case ucp_Hiragana: + case ucp_Katakana: + require_state = SCRIPT_HANHIRAKATA; + break; + + case ucp_Bopomofo: + require_state = SCRIPT_HANBOPOMOFO; + break; + + case ucp_Hangul: + require_state = SCRIPT_HANHANGUL; + break; + + default: + memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t)); + require_state = SCRIPT_MAP; + break; + } + break; + + /* The first significant character was Han. An inspection of the Unicode + 11.0.0 files shows that there are the following types of Script Extension + list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul + scripts: + + . Bopomofo + Han + . Han + Hiragana + Katakana + . Hiragana + Katakana + . Bopopmofo + Hangul + Han + Hiragana + Katakana + + The following code tries to make sense of this. */ + +#define FOUND_BOPOMOFO 1 +#define FOUND_HIRAGANA 2 +#define FOUND_KATAKANA 4 +#define FOUND_HANGUL 8 + + case SCRIPT_HANPENDING: + if (script != ucp_Han) /* Another Han does nothing */ + { + uint32_t chspecial = 0; + + if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO; + if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA; + if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA; + if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL; + + if (chspecial == 0) return FALSE; /* Not allowed with Han */ + + if (chspecial == FOUND_BOPOMOFO) + require_state = SCRIPT_HANBOPOMOFO; + else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA)) + require_state = SCRIPT_HANHIRAKATA; + + /* Otherwise this character must be allowed with all of them, so remain + in the pending state. */ + } + break; + + /* Previously encountered one of the "with Han" scripts. Check that + this character is appropriate. */ + + case SCRIPT_HANHIRAKATA: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) + + MAPBIT(map, ucp_Katakana) == 0) return FALSE; + break; + + case SCRIPT_HANBOPOMOFO: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE; + break; + + case SCRIPT_HANHANGUL: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE; + break; + + /* Previously encountered one or more characters that are allowed with a + list of scripts. */ + + case SCRIPT_MAP: + OK = FALSE; + + for (int i = 0; i < FULL_MAPSIZE; i++) + { + if ((require_map[i] & map[i]) != 0) + { + OK = TRUE; + break; + } + } + + if (!OK) return FALSE; + + /* The rest of the string must be in this script, but we have to + allow for the Han complications. */ + + switch(script) + { + case ucp_Han: + require_state = SCRIPT_HANPENDING; + break; + + case ucp_Hiragana: + case ucp_Katakana: + require_state = SCRIPT_HANHIRAKATA; + break; + + case ucp_Bopomofo: + require_state = SCRIPT_HANBOPOMOFO; + break; + + case ucp_Hangul: + require_state = SCRIPT_HANHANGUL; + break; + + /* Compute the intersection of the required list of scripts and the + allowed scripts for this character. */ + + default: + for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i]; + break; + } + + break; + } + } /* End checking character's script and extensions. */ + + /* The character is in an acceptable script. We must now ensure that all + decimal digits in the string come from the same set. Some scripts (e.g. + Common, Arabic) have more than one set of decimal digits. This code does + not allow mixing sets, even within the same script. The vector called + PRIV(ucd_digit_sets)[] contains, in its first element, the number of + following elements, and then, in ascending order, the code points of the + '9' characters in every set of 10 digits. Each set is identified by the + offset in the vector of its '9' character. An initial check of the first + value picks up ASCII digits quickly. Otherwise, a binary chop is used. */ + + if (ucd->chartype == ucp_Nd) + { + uint32_t digitset; + + if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else + { + int mid; + int bot = 1; + int top = PRIV(ucd_digit_sets)[0]; + for (;;) + { + if (top <= bot + 1) /* <= rather than == is paranoia */ + { + digitset = top; + break; + } + mid = (top + bot) / 2; + if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid; + } + } + + /* A required value of 0 means "unset". */ + + if (require_digitset == 0) require_digitset = digitset; + else if (digitset != require_digitset) return FALSE; + } /* End digit handling */ + + /* If we haven't yet got to the end, pick up the next character. */ + + if (ptr >= endptr) return TRUE; + GETCHARINCTEST(c, ptr); + } /* End checking loop */ + +#else /* NOT SUPPORT_UNICODE */ +(void)ptr; +(void)endptr; +(void)utf; +return TRUE; +#endif /* SUPPORT_UNICODE */ +} + +/* End of pcre2_script_run.c */ diff --git a/src/pcre2_serialize.c b/src/pcre2_serialize.c new file mode 100644 index 0000000..ba17a26 --- /dev/null +++ b/src/pcre2_serialize.c @@ -0,0 +1,286 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains functions for serializing and deserializing +a sequence of compiled codes. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "pcre2_internal.h" + +/* Magic number to provide a small check against being handed junk. */ + +#define SERIALIZED_DATA_MAGIC 0x50523253u + +/* Deserialization is limited to the current PCRE version and +character width. */ + +#define SERIALIZED_DATA_VERSION \ + ((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16)) + +#define SERIALIZED_DATA_CONFIG \ + (sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16)) + + + +/************************************************* +* Serialize compiled patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes, + uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, + pcre2_general_context *gcontext) +{ +uint8_t *bytes; +uint8_t *dst_bytes; +int32_t i; +PCRE2_SIZE total_size; +const pcre2_real_code *re; +const uint8_t *tables; +pcre2_serialized_data *data; + +const pcre2_memctl *memctl = (gcontext != NULL) ? + &gcontext->memctl : &PRIV(default_compile_context).memctl; + +if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL) + return PCRE2_ERROR_NULL; + +if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; + +/* Compute total size. */ +total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH; +tables = NULL; + +for (i = 0; i < number_of_codes; i++) + { + if (codes[i] == NULL) return PCRE2_ERROR_NULL; + re = (const pcre2_real_code *)(codes[i]); + if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + if (tables == NULL) + tables = re->tables; + else if (tables != re->tables) + return PCRE2_ERROR_MIXEDTABLES; + total_size += re->blocksize; + } + +/* Initialize the byte stream. */ +bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data); +if (bytes == NULL) return PCRE2_ERROR_NOMEMORY; + +/* The controller is stored as a hidden parameter. */ +memcpy(bytes, memctl, sizeof(pcre2_memctl)); +bytes += sizeof(pcre2_memctl); + +data = (pcre2_serialized_data *)bytes; +data->magic = SERIALIZED_DATA_MAGIC; +data->version = SERIALIZED_DATA_VERSION; +data->config = SERIALIZED_DATA_CONFIG; +data->number_of_codes = number_of_codes; + +/* Copy all compiled code data. */ +dst_bytes = bytes + sizeof(pcre2_serialized_data); +memcpy(dst_bytes, tables, TABLES_LENGTH); +dst_bytes += TABLES_LENGTH; + +for (i = 0; i < number_of_codes; i++) + { + re = (const pcre2_real_code *)(codes[i]); + (void)memcpy(dst_bytes, (char *)re, re->blocksize); + + /* Certain fields in the compiled code block are re-set during + deserialization. In order to ensure that the serialized data stream is always + the same for the same pattern, set them to zero here. We can't assume the + copy of the pattern is correctly aligned for accessing the fields as part of + a structure. Note the use of sizeof(void *) in the second of these, to + specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a + pointer to uint8_t), gcc gives a warning because the first argument is also a + pointer to uint8_t. Casting the first argument to (void *) can stop this, but + it didn't stop Coverity giving the same complaint. */ + + (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, + sizeof(pcre2_memctl)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, + sizeof(void *)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, + sizeof(void *)); + + dst_bytes += re->blocksize; + } + +*serialized_bytes = bytes; +*serialized_size = total_size; +return number_of_codes; +} + + +/************************************************* +* Deserialize compiled patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes, + const uint8_t *bytes, pcre2_general_context *gcontext) +{ +const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; +const pcre2_memctl *memctl = (gcontext != NULL) ? + &gcontext->memctl : &PRIV(default_compile_context).memctl; + +const uint8_t *src_bytes; +pcre2_real_code *dst_re; +uint8_t *tables; +int32_t i, j; + +/* Sanity checks. */ + +if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL; +if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; +if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA; +if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; +if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; +if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; + +if (number_of_codes > data->number_of_codes) + number_of_codes = data->number_of_codes; + +src_bytes = bytes + sizeof(pcre2_serialized_data); + +/* Decode tables. The reference count for the tables is stored immediately +following them. */ + +tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data); +if (tables == NULL) return PCRE2_ERROR_NOMEMORY; + +memcpy(tables, src_bytes, TABLES_LENGTH); +*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes; +src_bytes += TABLES_LENGTH; + +/* Decode the byte stream. We must not try to read the size from the compiled +code block in the stream, because it might be unaligned, which causes errors on +hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type +of the blocksize field is given its own name to ensure that it is the same here +as in the block. */ + +for (i = 0; i < number_of_codes; i++) + { + CODE_BLOCKSIZE_TYPE blocksize; + memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize), + sizeof(CODE_BLOCKSIZE_TYPE)); + if (blocksize <= sizeof(pcre2_real_code)) + return PCRE2_ERROR_BADSERIALIZEDDATA; + + /* The allocator provided by gcontext replaces the original one. */ + + dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, + (pcre2_memctl *)gcontext); + if (dst_re == NULL) + { + memctl->free(tables, memctl->memory_data); + for (j = 0; j < i; j++) + { + memctl->free(codes[j], memctl->memory_data); + codes[j] = NULL; + } + return PCRE2_ERROR_NOMEMORY; + } + + /* The new allocator must be preserved. */ + + memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl), + src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl)); + if (dst_re->magic_number != MAGIC_NUMBER || + dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || + dst_re->name_count > MAX_NAME_COUNT) + { + memctl->free(dst_re, memctl->memory_data); + return PCRE2_ERROR_BADSERIALIZEDDATA; + } + + /* At the moment only one table is supported. */ + + dst_re->tables = tables; + dst_re->executable_jit = NULL; + dst_re->flags |= PCRE2_DEREF_TABLES; + + codes[i] = dst_re; + src_bytes += blocksize; + } + +return number_of_codes; +} + + +/************************************************* +* Get the number of serialized patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_get_number_of_codes(const uint8_t *bytes) +{ +const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; + +if (data == NULL) return PCRE2_ERROR_NULL; +if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; +if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; +if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; + +return data->number_of_codes; +} + + +/************************************************* +* Free the allocated stream * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_serialize_free(uint8_t *bytes) +{ +if (bytes != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } +} + +/* End of pcre2_serialize.c */ diff --git a/src/pcre2_string_utils.c b/src/pcre2_string_utils.c new file mode 100644 index 0000000..ebfa943 --- /dev/null +++ b/src/pcre2_string_utils.c @@ -0,0 +1,237 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2018-2021 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains internal functions for comparing and finding the length +of strings. These are used instead of strcmp() etc because the standard +functions work only on 8-bit data. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there some non-Unix environments that lack both memmove() and +bcopy(). */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +void * +PRIV(memmove)(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + +/************************************************* +* Compare two zero-terminated PCRE2 strings * +*************************************************/ + +/* +Arguments: + str1 first string + str2 second string + +Returns: 0, 1, or -1 +*/ + +int +PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2) +{ +PCRE2_UCHAR c1, c2; +while (*str1 != '\0' || *str2 != '\0') + { + c1 = *str1++; + c2 = *str2++; + if (c1 != c2) return ((c1 > c2) << 1) - 1; + } +return 0; +} + + +/************************************************* +* Compare zero-terminated PCRE2 & 8-bit strings * +*************************************************/ + +/* As the 8-bit string is almost always a literal, its type is specified as +const char *. + +Arguments: + str1 first string + str2 second string + +Returns: 0, 1, or -1 +*/ + +int +PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2) +{ +PCRE2_UCHAR c1, c2; +while (*str1 != '\0' || *str2 != '\0') + { + c1 = *str1++; + c2 = *str2++; + if (c1 != c2) return ((c1 > c2) << 1) - 1; + } +return 0; +} + + +/************************************************* +* Compare two PCRE2 strings, given a length * +*************************************************/ + +/* +Arguments: + str1 first string + str2 second string + len the length + +Returns: 0, 1, or -1 +*/ + +int +PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len) +{ +PCRE2_UCHAR c1, c2; +for (; len > 0; len--) + { + c1 = *str1++; + c2 = *str2++; + if (c1 != c2) return ((c1 > c2) << 1) - 1; + } +return 0; +} + + +/************************************************* +* Compare PCRE2 string to 8-bit string by length * +*************************************************/ + +/* As the 8-bit string is almost always a literal, its type is specified as +const char *. + +Arguments: + str1 first string + str2 second string + len the length + +Returns: 0, 1, or -1 +*/ + +int +PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len) +{ +PCRE2_UCHAR c1, c2; +for (; len > 0; len--) + { + c1 = *str1++; + c2 = *str2++; + if (c1 != c2) return ((c1 > c2) << 1) - 1; + } +return 0; +} + + +/************************************************* +* Find the length of a PCRE2 string * +*************************************************/ + +/* +Argument: the string +Returns: the length +*/ + +PCRE2_SIZE +PRIV(strlen)(PCRE2_SPTR str) +{ +PCRE2_SIZE c = 0; +while (*str++ != 0) c++; +return c; +} + + +/************************************************* +* Copy 8-bit 0-terminated string to PCRE2 string * +*************************************************/ + +/* Arguments: + str1 buffer to receive the string + str2 8-bit string to be copied + +Returns: the number of code units used (excluding trailing zero) +*/ + +PCRE2_SIZE +PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2) +{ +PCRE2_UCHAR *t = str1; +while (*str2 != 0) *t++ = *str2++; +*t = 0; +return t - str1; +} + +/* End of pcre2_string_utils.c */ diff --git a/src/pcre2_study.c b/src/pcre2_study.c new file mode 100644 index 0000000..792e696 --- /dev/null +++ b/src/pcre2_study.c @@ -0,0 +1,1915 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains functions for scanning a compiled pattern and +collecting data (e.g. minimum matching length). */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +/* The maximum remembered capturing brackets minimum. */ + +#define MAX_CACHE_BACKREF 128 + +/* Set a bit in the starting code unit bit map. */ + +#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1u << ((c)&7)) + +/* Returns from set_start_bits() */ + +enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP }; + + +/************************************************* +* Find the minimum subject length for a group * +*************************************************/ + +/* Scan a parenthesized group and compute the minimum length of subject that +is needed to match it. This is a lower bound; it does not mean there is a +string of that length that matches. In UTF mode, the result is in characters +rather than code units. The field in a compiled pattern for storing the minimum +length is 16-bits long (on the grounds that anything longer than that is +pathological), so we give up when we reach that amount. This also means that +integer overflow for really crazy patterns cannot happen. + +Backreference minimum lengths are cached to speed up multiple references. This +function is called only when the highest back reference in the pattern is less +than or equal to MAX_CACHE_BACKREF, which is one less than the size of the +caching vector. The zeroth element contains the number of the highest set +value. + +Arguments: + re compiled pattern block + code pointer to start of group (the bracket) + startcode pointer to start of the whole pattern's code + utf UTF flag + recurses chain of recurse_check to catch mutual recursion + countptr pointer to call count (to catch over complexity) + backref_cache vector for caching back references. + +This function is no longer called when the pattern contains (*ACCEPT); however, +the old code for returning -1 is retained, just in case. + +Returns: the minimum length + -1 \C in UTF-8 mode + or (*ACCEPT) + or pattern too complicated + -2 internal error (missing capturing bracket) + -3 internal error (opcode not listed) +*/ + +static int +find_minlength(const pcre2_real_code *re, PCRE2_SPTR code, + PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr, + int *backref_cache) +{ +int length = -1; +int branchlength = 0; +int prev_cap_recno = -1; +int prev_cap_d = 0; +int prev_recurse_recno = -1; +int prev_recurse_d = 0; +uint32_t once_fudge = 0; +BOOL had_recurse = FALSE; +BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; +PCRE2_SPTR nextbranch = code + GET(code, 1); +PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; +recurse_check this_recurse; + +/* If this is a "could be empty" group, its minimum length is 0. */ + +if (*code >= OP_SBRA && *code <= OP_SCOND) return 0; + +/* Skip over capturing bracket number */ + +if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE; + +/* A large and/or complex regex can take too long to process. */ + +if ((*countptr)++ > 1000) return -1; + +/* Scan along the opcodes for this branch. If we get to the end of the branch, +check the length against that of the other branches. If the accumulated length +passes 16-bits, reset to that value and skip the rest of the branch. */ + +for (;;) + { + int d, min, recno; + PCRE2_UCHAR op, *cs, *ce; + + if (branchlength >= UINT16_MAX) + { + branchlength = UINT16_MAX; + cc = (PCRE2_UCHAR *)nextbranch; + } + + op = *cc; + switch (op) + { + case OP_COND: + case OP_SCOND: + + /* If there is only one branch in a condition, the implied branch has zero + length, so we don't add anything. This covers the DEFINE "condition" + automatically. If there are two branches we can treat it the same as any + other non-capturing subpattern. */ + + cs = cc + GET(cc, 1); + if (*cs != OP_ALT) + { + cc = cs + 1 + LINK_SIZE; + break; + } + goto PROCESS_NON_CAPTURE; + + case OP_BRA: + /* There's a special case of OP_BRA, when it is wrapped round a repeated + OP_RECURSE. We'd like to process the latter at this level so that + remembering the value works for repeated cases. So we do nothing, but + set a fudge value to skip over the OP_KET after the recurse. */ + + if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET) + { + once_fudge = 1 + LINK_SIZE; + cc += 1 + LINK_SIZE; + break; + } + /* Fall through */ + + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_SBRA: + case OP_BRAPOS: + case OP_SBRAPOS: + PROCESS_NON_CAPTURE: + d = find_minlength(re, cc, startcode, utf, recurses, countptr, + backref_cache); + if (d < 0) return d; + branchlength += d; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + + /* To save time for repeated capturing subpatterns, we remember the + length of the previous one. Unfortunately we can't do the same for + the unnumbered ones above. Nor can we do this if (?| is present in the + pattern because captures with the same number are not then identical. */ + + case OP_CBRA: + case OP_SCBRA: + case OP_CBRAPOS: + case OP_SCBRAPOS: + recno = (int)GET2(cc, 1+LINK_SIZE); + if (dupcapused || recno != prev_cap_recno) + { + prev_cap_recno = recno; + prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr, + backref_cache); + if (prev_cap_d < 0) return prev_cap_d; + } + branchlength += prev_cap_d; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + + /* ACCEPT makes things far too complicated; we have to give up. In fact, + from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not + used. However, leave the code in place, just in case. */ + + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + return -1; + + /* Reached end of a branch; if it's a ket it is the end of a nested + call. If it's ALT it is an alternation in a nested call. If it is END it's + the end of the outer call. All can be handled by the same code. If the + length of any branch is zero, there is no need to scan any subsequent + branches. */ + + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_END: + if (length < 0 || (!had_recurse && branchlength < length)) + length = branchlength; + if (op != OP_ALT || length == 0) return length; + nextbranch = cc + GET(cc, 1); + cc += 1 + LINK_SIZE; + branchlength = 0; + had_recurse = FALSE; + break; + + /* Skip over assertive subpatterns */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + do cc += GET(cc, 1); while (*cc == OP_ALT); + /* Fall through */ + + /* Skip over things that don't match chars */ + + case OP_REVERSE: + case OP_VREVERSE: + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FALSE: + case OP_TRUE: + case OP_CALLOUT: + case OP_SOD: + case OP_SOM: + case OP_EOD: + case OP_EODN: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc += PRIV(OP_lengths)[*cc]; + break; + + case OP_CALLOUT_STR: + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + + /* Skip over a subpattern that has a {0} or {0,x} quantifier */ + + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + case OP_SKIPZERO: + cc += PRIV(OP_lengths)[*cc]; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + + /* Handle literal characters and + repetitions */ + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_PLUS: + case OP_PLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + branchlength++; + cc += 2; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + branchlength++; + cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2; + break; + + /* Handle exact repetitions. The count is already in characters, but we + may need to skip over a multibyte character in UTF mode. */ + + case OP_EXACT: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: + branchlength += GET2(cc,1); + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_TYPEEXACT: + branchlength += GET2(cc,1); + cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); + break; + + /* Handle single-char non-literal matchers */ + + case OP_PROP: + case OP_NOTPROP: + cc += 2; + /* Fall through */ + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_EXTUNI: + case OP_HSPACE: + case OP_NOT_HSPACE: + case OP_VSPACE: + case OP_NOT_VSPACE: + branchlength++; + cc++; + break; + + /* "Any newline" might match two characters, but it also might match just + one. */ + + case OP_ANYNL: + branchlength += 1; + cc++; + break; + + /* The single-byte matcher means we can't proceed in UTF mode. (In + non-UTF mode \C will actually be turned into OP_ALLANY, so won't ever + appear, but leave the code, just in case.) */ + + case OP_ANYBYTE: +#ifdef SUPPORT_UNICODE + if (utf) return -1; +#endif + branchlength++; + cc++; + break; + + /* For repeated character types, we have to test for \p and \P, which have + an extra two bytes of parameters. */ + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSQUERY: + if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; + cc += PRIV(OP_lengths)[op]; + break; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + if (cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; + cc += PRIV(OP_lengths)[op]; + break; + + /* Check a class for variable quantification */ + + case OP_CLASS: + case OP_NCLASS: +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + /* The original code caused an unsigned overflow in 64 bit systems, + so now we use a conditional statement. */ + if (op == OP_XCLASS) + cc += GET(cc, 1); + else + cc += PRIV(OP_lengths)[OP_CLASS]; +#else + cc += PRIV(OP_lengths)[OP_CLASS]; +#endif + + switch (*cc) + { + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + branchlength++; + /* Fall through */ + + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + branchlength += GET2(cc,1); + cc += 1 + 2 * IMM2_SIZE; + break; + + default: + branchlength++; + break; + } + break; + + /* Backreferences and subroutine calls (OP_RECURSE) are treated in the same + way: we find the minimum length for the subpattern. A recursion + (backreference or subroutine) causes an a flag to be set that causes the + length of this branch to be ignored. The logic is that a recursion can only + make sense if there is another alternative that stops the recursing. That + will provide the minimum length (when no recursion happens). + + If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket + matches an empty string (by default it causes a matching failure), so in + that case we must set the minimum length to zero. + + For backreferenes, if duplicate numbers are present in the pattern we check + for a reference to a duplicate. If it is, we don't know which version will + be referenced, so we have to set the minimum length to zero. */ + + /* Duplicate named pattern back reference. */ + + case OP_DNREF: + case OP_DNREFI: + if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) + { + int count = GET2(cc, 1+IMM2_SIZE); + PCRE2_UCHAR *slot = + (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + + GET2(cc, 1) * re->name_entry_size; + + d = INT_MAX; + + /* Scan all groups with the same name; find the shortest. */ + + while (count-- > 0) + { + int dd, i; + recno = GET2(slot, 0); + + if (recno <= backref_cache[0] && backref_cache[recno] >= 0) + dd = backref_cache[recno]; + else + { + ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + if (cs == NULL) return -2; + do ce += GET(ce, 1); while (*ce == OP_ALT); + + dd = 0; + if (!dupcapused || + (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + { + if (cc > cs && cc < ce) /* Simple recursion */ + { + had_recurse = TRUE; + } + else + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) + if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + { + had_recurse = TRUE; + } + else + { + this_recurse.prev = recurses; /* No recursion */ + this_recurse.group = cs; + dd = find_minlength(re, cs, startcode, utf, &this_recurse, + countptr, backref_cache); + if (dd < 0) return dd; + } + } + } + + backref_cache[recno] = dd; + for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1; + backref_cache[0] = recno; + } + + if (dd < d) d = dd; + if (d <= 0) break; /* No point looking at any more */ + slot += re->name_entry_size; + } + } + else d = 0; + cc += 1 + 2*IMM2_SIZE; + goto REPEAT_BACK_REFERENCE; + + /* Single back reference by number. References by name are converted to by + number when there is no duplication. */ + + case OP_REF: + case OP_REFI: + recno = GET2(cc, 1); + if (recno <= backref_cache[0] && backref_cache[recno] >= 0) + d = backref_cache[recno]; + else + { + int i; + d = 0; + + if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) + { + ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno); + if (cs == NULL) return -2; + do ce += GET(ce, 1); while (*ce == OP_ALT); + + if (!dupcapused || + (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL) + { + if (cc > cs && cc < ce) /* Simple recursion */ + { + had_recurse = TRUE; + } + else + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + { + had_recurse = TRUE; + } + else /* No recursion */ + { + this_recurse.prev = recurses; + this_recurse.group = cs; + d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr, + backref_cache); + if (d < 0) return d; + } + } + } + } + + backref_cache[recno] = d; + for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1; + backref_cache[0] = recno; + } + + cc += 1 + IMM2_SIZE; + + /* Handle repeated back references */ + + REPEAT_BACK_REFERENCE: + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: + min = 0; + cc++; + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + min = 1; + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + min = GET2(cc, 1); + cc += 1 + 2 * IMM2_SIZE; + break; + + default: + min = 1; + break; + } + + /* Take care not to overflow: (1) min and d are ints, so check that their + product is not greater than INT_MAX. (2) branchlength is limited to + UINT16_MAX (checked at the top of the loop). */ + + if ((d > 0 && (INT_MAX/d) < min) || UINT16_MAX - branchlength < min*d) + branchlength = UINT16_MAX; + else branchlength += min * d; + break; + + /* Recursion always refers to the first occurrence of a subpattern with a + given number. Therefore, we can always make use of caching, even when the + pattern contains multiple subpatterns with the same number. */ + + case OP_RECURSE: + cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1); + recno = GET2(cs, 1+LINK_SIZE); + if (recno == prev_recurse_recno) + { + branchlength += prev_recurse_d; + } + else + { + do ce += GET(ce, 1); while (*ce == OP_ALT); + if (cc > cs && cc < ce) /* Simple recursion */ + had_recurse = TRUE; + else + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + had_recurse = TRUE; + else + { + this_recurse.prev = recurses; + this_recurse.group = cs; + prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse, + countptr, backref_cache); + if (prev_recurse_d < 0) return prev_recurse_d; + prev_recurse_recno = recno; + branchlength += prev_recurse_d; + } + } + } + cc += 1 + LINK_SIZE + once_fudge; + once_fudge = 0; + break; + + /* Anything else does not or need not match a character. We can get the + item's length from the table, but for those that can match zero occurrences + of a character, we must take special action for UTF-8 characters. As it + happens, the "NOT" versions of these opcodes are used at present only for + ASCII characters, so they could be omitted from this list. However, in + future that may change, so we include them here so as not to leave a + gotcha for a future maintainer. */ + + case OP_UPTO: + case OP_UPTOI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_MINUPTO: + case OP_MINUPTOI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + + case OP_STAR: + case OP_STARI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_POSSTAR: + case OP_POSSTARI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + + case OP_QUERY: + case OP_QUERYI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_POSQUERY: + case OP_POSQUERYI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + + cc += PRIV(OP_lengths)[op]; +#ifdef SUPPORT_UNICODE + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + /* Skip these, but we need to add in the name length. */ + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + cc += PRIV(OP_lengths)[op] + cc[1]; + break; + + /* The remaining opcodes are just skipped over. */ + + case OP_CLOSE: + case OP_COMMIT: + case OP_FAIL: + case OP_PRUNE: + case OP_SET_SOM: + case OP_SKIP: + case OP_THEN: + cc += PRIV(OP_lengths)[op]; + break; + + /* This should not occur: we list all opcodes explicitly so that when + new ones get added they are properly considered. */ + + default: + return -3; + } + } +/* Control never gets here */ +} + + + +/************************************************* +* Set a bit and maybe its alternate case * +*************************************************/ + +/* Given a character, set its first code unit's bit in the table, and also the +corresponding bit for the other version of a letter if we are caseless. + +Arguments: + re points to the regex block + p points to the first code unit of the character + caseless TRUE if caseless + utf TRUE for UTF mode + ucp TRUE for UCP mode + +Returns: pointer after the character +*/ + +static PCRE2_SPTR +set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf, + BOOL ucp) +{ +uint32_t c = *p++; /* First code unit */ + +(void)utf; /* Stop compiler warnings when UTF not supported */ +(void)ucp; + +/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for +0xff. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 +if (c > 0xff) SET_BIT(0xff); else +#endif + +SET_BIT(c); + +/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find +the end of the character, even when caseless. */ + +#ifdef SUPPORT_UNICODE +if (utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (c >= 0xc0) GETUTF8INC(c, p); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p); +#endif + } +#endif /* SUPPORT_UNICODE */ + +/* If caseless, handle the other case of the character. */ + +if (caseless) + { +#ifdef SUPPORT_UNICODE + if (utf || ucp) + { + c = UCD_OTHERCASE(c); +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) + { + PCRE2_UCHAR buff[6]; + (void)PRIV(ord2utf)(c, buff); + SET_BIT(buff[0]); + } + else if (c < 256) SET_BIT(c); +#else /* 16-bit or 32-bit mode */ + if (c > 0xff) SET_BIT(0xff); else SET_BIT(c); +#endif + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF or UCP */ + + if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]); + } + +return p; +} + + + +/************************************************* +* Set bits for a positive character type * +*************************************************/ + +/* This function sets starting bits for a character type. In UTF-8 mode, we can +only do a direct setting for bytes less than 128, as otherwise there can be +confusion with bytes in the middle of UTF-8 characters. In a "traditional" +environment, the tables will only recognize ASCII characters anyway, but in at +least one Windows environment, some higher bytes bits were set in the tables. +So we deal with that case by considering the UTF-8 encoding. + +Arguments: + re the regex block + cbit type the type of character wanted + table_limit 32 for non-UTF-8; 16 for UTF-8 + +Returns: nothing +*/ + +static void +set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) +{ +uint32_t c; +for (c = 0; c < table_limit; c++) + re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type]; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +if (table_limit == 32) return; +for (c = 128; c < 256; c++) + { + if ((re->tables[cbits_offset + c/8] & (1u << (c&7))) != 0) + { + PCRE2_UCHAR buff[6]; + (void)PRIV(ord2utf)(c, buff); + SET_BIT(buff[0]); + } + } +#endif /* UTF-8 */ +} + + +/************************************************* +* Set bits for a negative character type * +*************************************************/ + +/* This function sets starting bits for a negative character type such as \D. +In UTF-8 mode, we can only do a direct setting for bytes less than 128, as +otherwise there can be confusion with bytes in the middle of UTF-8 characters. +Unlike in the positive case, where we can set appropriate starting bits for +specific high-valued UTF-8 characters, in this case we have to set the bits for +all high-valued characters. The lowest is 0xc2, but we overkill by starting at +0xc0 (192) for simplicity. + +Arguments: + re the regex block + cbit type the type of character wanted + table_limit 32 for non-UTF-8; 16 for UTF-8 + +Returns: nothing +*/ + +static void +set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) +{ +uint32_t c; +for (c = 0; c < table_limit; c++) + re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type])); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; +#endif +} + + + +/************************************************* +* Create bitmap of starting code units * +*************************************************/ + +/* This function scans a compiled unanchored expression recursively and +attempts to build a bitmap of the set of possible starting code units whose +values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause +the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode +we pass a value of 16 rather than 32 as the final argument. (See comments in +those functions for the reason.) + +The SSB_CONTINUE return is useful for parenthesized groups in patterns such as +(a*)b where the group provides some optional starting code units but scanning +must continue at the outer level to find at least one mandatory code unit. At +the outermost level, this function fails unless the result is SSB_DONE. + +We restrict recursion (for nested groups) to 1000 to avoid stack overflow +issues. + +Arguments: + re points to the compiled regex block + code points to an expression + utf TRUE if in UTF mode + ucp TRUE if in UCP mode + depthptr pointer to recurse depth + +Returns: SSB_FAIL => Failed to find any starting code units + SSB_DONE => Found mandatory starting code units + SSB_CONTINUE => Found optional starting code units + SSB_UNKNOWN => Hit an unrecognized opcode + SSB_TOODEEP => Recursion is too deep +*/ + +static int +set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp, + int *depthptr) +{ +uint32_t c; +int yield = SSB_DONE; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +int table_limit = utf? 16:32; +#else +int table_limit = 32; +#endif + +*depthptr += 1; +if (*depthptr > 1000) return SSB_TOODEEP; + +do + { + BOOL try_next = TRUE; + PCRE2_SPTR tcode = code + 1 + LINK_SIZE; + + if (*code == OP_CBRA || *code == OP_SCBRA || + *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; + + while (try_next) /* Loop for items in this branch */ + { + int rc; + PCRE2_SPTR ncode; + uint8_t *classmap = NULL; +#ifdef SUPPORT_WIDE_CHARS + PCRE2_UCHAR xclassflags; +#endif + + switch(*tcode) + { + /* If we reach something we don't understand, it means a new opcode has + been created that hasn't been added to this function. Hopefully this + problem will be discovered during testing. */ + + default: + return SSB_UNKNOWN; + + /* Fail for a valid opcode that implies no starting bits. */ + + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + case OP_ALLANY: + case OP_ANY: + case OP_ANYBYTE: + case OP_CIRCM: + case OP_CLOSE: + case OP_COMMIT: + case OP_COMMIT_ARG: + case OP_COND: + case OP_CREF: + case OP_FALSE: + case OP_TRUE: + case OP_DNCREF: + case OP_DNREF: + case OP_DNREFI: + case OP_DNRREF: + case OP_DOLL: + case OP_DOLLM: + case OP_END: + case OP_EOD: + case OP_EODN: + case OP_EXTUNI: + case OP_FAIL: + case OP_MARK: + case OP_NOT: + case OP_NOTEXACT: + case OP_NOTEXACTI: + case OP_NOTI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + case OP_NOTPROP: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_NOT_HSPACE: + case OP_NOT_VSPACE: + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_RECURSE: + case OP_REF: + case OP_REFI: + case OP_REVERSE: + case OP_VREVERSE: + case OP_RREF: + case OP_SCOND: + case OP_SET_SOM: + case OP_SKIP: + case OP_SKIP_ARG: + case OP_SOD: + case OP_SOM: + case OP_THEN: + case OP_THEN_ARG: + return SSB_FAIL; + + /* OP_CIRC happens only at the start of an anchored branch (multiline ^ + uses OP_CIRCM). Skip over it. */ + + case OP_CIRC: + tcode += PRIV(OP_lengths)[OP_CIRC]; + break; + + /* A "real" property test implies no starting bits, but the fake property + PT_CLIST identifies a list of characters. These lists are short, as they + are used for characters with more than one "other case", so there is no + point in recognizing them for OP_NOTPROP. */ + + case OP_PROP: + if (tcode[1] != PT_CLIST) return SSB_FAIL; + { + const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2]; + while ((c = *p++) < NOTACHAR) + { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) + { + PCRE2_UCHAR buff[6]; + (void)PRIV(ord2utf)(c, buff); + c = buff[0]; + } +#endif + if (c > 0xff) SET_BIT(0xff); else SET_BIT(c); + } + } + try_next = FALSE; + break; + + /* We can ignore word boundary tests. */ + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + tcode++; + break; + + /* For a positive lookahead assertion, inspect what immediately follows, + ignoring intermediate assertions and callouts. If the next item is one + that sets a mandatory character, skip this assertion. Otherwise, treat it + the same as other bracket groups. */ + + case OP_ASSERT: + case OP_ASSERT_NA: + ncode = tcode + GET(tcode, 1); + while (*ncode == OP_ALT) ncode += GET(ncode, 1); + ncode += 1 + LINK_SIZE; + + /* Skip irrelevant items */ + + for (BOOL done = FALSE; !done;) + { + switch (*ncode) + { + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + ncode += GET(ncode, 1); + while (*ncode == OP_ALT) ncode += GET(ncode, 1); + ncode += 1 + LINK_SIZE; + break; + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + ncode++; + break; + + case OP_CALLOUT: + ncode += PRIV(OP_lengths)[OP_CALLOUT]; + break; + + case OP_CALLOUT_STR: + ncode += GET(ncode, 1 + 2*LINK_SIZE); + break; + + default: + done = TRUE; + break; + } + } + + /* Now check the next significant item. */ + + switch(*ncode) + { + default: + break; + + case OP_PROP: + if (ncode[1] != PT_CLIST) break; + /* Fall through */ + case OP_ANYNL: + case OP_CHAR: + case OP_CHARI: + case OP_EXACT: + case OP_EXACTI: + case OP_HSPACE: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_PLUS: + case OP_PLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_VSPACE: + /* Note that these types will only be present in non-UCP mode. */ + case OP_DIGIT: + case OP_NOT_DIGIT: + case OP_WORDCHAR: + case OP_NOT_WORDCHAR: + case OP_WHITESPACE: + case OP_NOT_WHITESPACE: + tcode = ncode; + continue; /* With the following significant opcode */ + } + /* Fall through */ + + /* For a group bracket or a positive assertion without an immediately + following mandatory setting, recurse to set bits from within the + subpattern. If it can't find anything, we have to give up. If it finds + some mandatory character(s), we are done for this branch. Otherwise, + carry on scanning after the subpattern. */ + + case OP_BRA: + case OP_SBRA: + case OP_CBRA: + case OP_SCBRA: + case OP_BRAPOS: + case OP_SBRAPOS: + case OP_CBRAPOS: + case OP_SCBRAPOS: + case OP_ONCE: + case OP_SCRIPT_RUN: + rc = set_start_bits(re, tcode, utf, ucp, depthptr); + if (rc == SSB_DONE) + { + try_next = FALSE; + } + else if (rc == SSB_CONTINUE) + { + do tcode += GET(tcode, 1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + } + else return rc; /* FAIL, UNKNOWN, or TOODEEP */ + break; + + /* If we hit ALT or KET, it means we haven't found anything mandatory in + this branch, though we might have found something optional. For ALT, we + continue with the next alternative, but we have to arrange that the final + result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, + return SSB_CONTINUE: if this is the top level, that indicates failure, + but after a nested subpattern, it causes scanning to continue. */ + + case OP_ALT: + yield = SSB_CONTINUE; + try_next = FALSE; + break; + + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + return SSB_CONTINUE; + + /* Skip over callout */ + + case OP_CALLOUT: + tcode += PRIV(OP_lengths)[OP_CALLOUT]; + break; + + case OP_CALLOUT_STR: + tcode += GET(tcode, 1 + 2*LINK_SIZE); + break; + + /* Skip over lookbehind and negative lookahead assertions */ + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERTBACK_NA: + do tcode += GET(tcode, 1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; + + /* BRAZERO does the bracket, but carries on. */ + + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + rc = set_start_bits(re, ++tcode, utf, ucp, depthptr); + if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc; + do tcode += GET(tcode,1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; + + /* SKIPZERO skips the bracket. */ + + case OP_SKIPZERO: + tcode++; + do tcode += GET(tcode,1); while (*tcode == OP_ALT); + tcode += 1 + LINK_SIZE; + break; + + /* Single-char * or ? sets the bit and tries the next item */ + + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp); + break; + + case OP_STARI: + case OP_MINSTARI: + case OP_POSSTARI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp); + break; + + /* Single-char upto sets the bit and tries the next */ + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp); + break; + + case OP_UPTOI: + case OP_MINUPTOI: + case OP_POSUPTOI: + tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp); + break; + + /* At least one single char sets the bit and stops */ + + case OP_EXACT: + tcode += IMM2_SIZE; + /* Fall through */ + case OP_CHAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + (void)set_table_bit(re, tcode + 1, FALSE, utf, ucp); + try_next = FALSE; + break; + + case OP_EXACTI: + tcode += IMM2_SIZE; + /* Fall through */ + case OP_CHARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + (void)set_table_bit(re, tcode + 1, TRUE, utf, ucp); + try_next = FALSE; + break; + + /* Special spacing and line-terminating items. These recognize specific + lists of characters. The difference between VSPACE and ANYNL is that the + latter can match the two-character CRLF sequence, but that is not + relevant for finding the first character, so their code here is + identical. */ + + case OP_HSPACE: + SET_BIT(CHAR_HT); + SET_BIT(CHAR_SPACE); + + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + the bits for 0xA0 and for code units >= 255, independently of UTF. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + SET_BIT(0xA0); + SET_BIT(0xFF); +#else + /* For the 8-bit library in UTF-8 mode, set the bits for the first code + units of horizontal space characters. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + SET_BIT(0xC2); /* For U+00A0 */ + SET_BIT(0xE1); /* For U+1680, U+180E */ + SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ + SET_BIT(0xE3); /* For U+3000 */ + } + else +#endif + /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless + the code is EBCDIC. */ + { +#ifndef EBCDIC + SET_BIT(0xA0); +#endif /* Not EBCDIC */ + } +#endif /* 8-bit support */ + + try_next = FALSE; + break; + + case OP_ANYNL: + case OP_VSPACE: + SET_BIT(CHAR_LF); + SET_BIT(CHAR_VT); + SET_BIT(CHAR_FF); + SET_BIT(CHAR_CR); + + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + the bits for NEL and for code units >= 255, independently of UTF. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + SET_BIT(CHAR_NEL); + SET_BIT(0xFF); +#else + /* For the 8-bit library in UTF-8 mode, set the bits for the first code + units of vertical space characters. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + SET_BIT(0xC2); /* For U+0085 (NEL) */ + SET_BIT(0xE2); /* For U+2028, U+2029 */ + } + else +#endif + /* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */ + { + SET_BIT(CHAR_NEL); + } +#endif /* 8-bit support */ + + try_next = FALSE; + break; + + /* Single character types set the bits and stop. Note that if PCRE2_UCP + is set, we do not see these opcodes because \d etc are converted to + properties. Therefore, these apply in the case when only characters less + than 256 are recognized to match the types. */ + + case OP_NOT_DIGIT: + set_nottype_bits(re, cbit_digit, table_limit); + try_next = FALSE; + break; + + case OP_DIGIT: + set_type_bits(re, cbit_digit, table_limit); + try_next = FALSE; + break; + + case OP_NOT_WHITESPACE: + set_nottype_bits(re, cbit_space, table_limit); + try_next = FALSE; + break; + + case OP_WHITESPACE: + set_type_bits(re, cbit_space, table_limit); + try_next = FALSE; + break; + + case OP_NOT_WORDCHAR: + set_nottype_bits(re, cbit_word, table_limit); + try_next = FALSE; + break; + + case OP_WORDCHAR: + set_type_bits(re, cbit_word, table_limit); + try_next = FALSE; + break; + + /* One or more character type fudges the pointer and restarts, knowing + it will hit a single character type and stop there. */ + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + tcode++; + break; + + case OP_TYPEEXACT: + tcode += 1 + IMM2_SIZE; + break; + + /* Zero or more repeats of character types set the bits and then + try again. */ + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + tcode += IMM2_SIZE; /* Fall through */ + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + switch(tcode[1]) + { + default: + case OP_ANY: + case OP_ALLANY: + return SSB_FAIL; + + case OP_HSPACE: + SET_BIT(CHAR_HT); + SET_BIT(CHAR_SPACE); + + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + the bits for 0xA0 and for code units >= 255, independently of UTF. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + SET_BIT(0xA0); + SET_BIT(0xFF); +#else + /* For the 8-bit library in UTF-8 mode, set the bits for the first code + units of horizontal space characters. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + SET_BIT(0xC2); /* For U+00A0 */ + SET_BIT(0xE1); /* For U+1680, U+180E */ + SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ + SET_BIT(0xE3); /* For U+3000 */ + } + else +#endif + /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless + the code is EBCDIC. */ + { +#ifndef EBCDIC + SET_BIT(0xA0); +#endif /* Not EBCDIC */ + } +#endif /* 8-bit support */ + break; + + case OP_ANYNL: + case OP_VSPACE: + SET_BIT(CHAR_LF); + SET_BIT(CHAR_VT); + SET_BIT(CHAR_FF); + SET_BIT(CHAR_CR); + + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + the bits for NEL and for code units >= 255, independently of UTF. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + SET_BIT(CHAR_NEL); + SET_BIT(0xFF); +#else + /* For the 8-bit library in UTF-8 mode, set the bits for the first code + units of vertical space characters. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + SET_BIT(0xC2); /* For U+0085 (NEL) */ + SET_BIT(0xE2); /* For U+2028, U+2029 */ + } + else +#endif + /* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */ + { + SET_BIT(CHAR_NEL); + } +#endif /* 8-bit support */ + break; + + case OP_NOT_DIGIT: + set_nottype_bits(re, cbit_digit, table_limit); + break; + + case OP_DIGIT: + set_type_bits(re, cbit_digit, table_limit); + break; + + case OP_NOT_WHITESPACE: + set_nottype_bits(re, cbit_space, table_limit); + break; + + case OP_WHITESPACE: + set_type_bits(re, cbit_space, table_limit); + break; + + case OP_NOT_WORDCHAR: + set_nottype_bits(re, cbit_word, table_limit); + break; + + case OP_WORDCHAR: + set_type_bits(re, cbit_word, table_limit); + break; + } + + tcode += 2; + break; + + /* Extended class: if there are any property checks, or if this is a + negative XCLASS without a map, give up. If there are no property checks, + there must be wide characters on the XCLASS list, because otherwise an + XCLASS would not have been created. This means that code points >= 255 + are potential starters. In the UTF-8 case we can scan them and set bits + for the relevant leading bytes. */ + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + xclassflags = tcode[1 + LINK_SIZE]; + if ((xclassflags & XCL_HASPROP) != 0 || + (xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT) + return SSB_FAIL; + + /* We have a positive XCLASS or a negative one without a map. Set up the + map pointer if there is one, and fall through. */ + + classmap = ((xclassflags & XCL_MAP) == 0)? NULL : + (uint8_t *)(tcode + 1 + LINK_SIZE + 1); + + /* In UTF-8 mode, scan the character list and set bits for leading bytes, + then jump to handle the map. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (utf && (xclassflags & XCL_NOT) == 0) + { + PCRE2_UCHAR b, e; + PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32); + tcode += GET(tcode, 1); + + for (;;) switch (*p++) + { + case XCL_SINGLE: + b = *p++; + while ((*p & 0xc0) == 0x80) p++; + re->start_bitmap[b/8] |= (1u << (b&7)); + break; + + case XCL_RANGE: + b = *p++; + while ((*p & 0xc0) == 0x80) p++; + e = *p++; + while ((*p & 0xc0) == 0x80) p++; + for (; b <= e; b++) + re->start_bitmap[b/8] |= (1u << (b&7)); + break; + + case XCL_END: + goto HANDLE_CLASSMAP; + + default: + return SSB_UNKNOWN; /* Internal error, should not occur */ + } + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ +#endif /* SUPPORT_WIDE_CHARS */ + + /* It seems that the fall through comment must be outside the #ifdef if + it is to avoid the gcc compiler warning. */ + + /* Fall through */ + + /* Enter here for a negative non-XCLASS. In the 8-bit library, if we are + in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter + because it starts a character with a value > 255. In 8-bit non-UTF mode, + there is no difference between CLASS and NCLASS. In all other wide + character modes, set the 0xFF bit to indicate code units >= 255. */ + + case OP_NCLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) + { + re->start_bitmap[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ + memset(re->start_bitmap+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ + } +#elif PCRE2_CODE_UNIT_WIDTH != 8 + SET_BIT(0xFF); /* For characters >= 255 */ +#endif + /* Fall through */ + + /* Enter here for a positive non-XCLASS. If we have fallen through from + an XCLASS, classmap will already be set; just advance the code pointer. + Otherwise, set up classmap for a a non-XCLASS and advance past it. */ + + case OP_CLASS: + if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else + { + classmap = (uint8_t *)(++tcode); + tcode += 32 / sizeof(PCRE2_UCHAR); + } + + /* When wide characters are supported, classmap may be NULL. In UTF-8 + (sic) mode, the bits in a class bit map correspond to character values, + not to byte values. However, the bit map we are constructing is for byte + values. So we have to do a conversion for characters whose code point is + greater than 127. In fact, there are only two possible starting bytes for + characters in the range 128 - 255. */ + +#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8 + HANDLE_CLASSMAP: +#endif + if (classmap != NULL) + { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (utf) + { + for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c]; + for (c = 128; c < 256; c++) + { + if ((classmap[c/8] & (1u << (c&7))) != 0) + { + int d = (c >> 6) | 0xc0; /* Set bit for this starter */ + re->start_bitmap[d/8] |= (1u << (d&7)); /* and then skip on to the */ + c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ + } + } + } + else +#endif + /* In all modes except UTF-8, the two bit maps are compatible. */ + + { + for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c]; + } + } + + /* Act on what follows the class. For a zero minimum repeat, continue; + otherwise stop processing. */ + + switch (*tcode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSQUERY: + tcode++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; + else try_next = FALSE; + break; + + default: + try_next = FALSE; + break; + } + break; /* End of class handling case */ + } /* End of switch for opcodes */ + } /* End of try_next loop */ + + code += GET(code, 1); /* Advance to next branch */ + } +while (*code == OP_ALT); + +return yield; +} + + + +/************************************************* +* Study a compiled expression * +*************************************************/ + +/* This function is handed a compiled expression that it must study to produce +information that will speed up the matching. + +Argument: + re points to the compiled expression + +Returns: 0 normally; non-zero should never normally occur + 1 unknown opcode in set_start_bits + 2 missing capturing bracket + 3 unknown opcode in find_minlength +*/ + +int +PRIV(study)(pcre2_real_code *re) +{ +int count = 0; +PCRE2_UCHAR *code; +BOOL utf = (re->overall_options & PCRE2_UTF) != 0; +BOOL ucp = (re->overall_options & PCRE2_UCP) != 0; + +/* Find start of compiled code */ + +code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + + re->name_entry_size * re->name_count; + +/* For a pattern that has a first code unit, or a multiline pattern that +matches only at "line start", there is no point in seeking a list of starting +code units. */ + +if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) + { + int depth = 0; + int rc = set_start_bits(re, code, utf, ucp, &depth); + if (rc == SSB_UNKNOWN) return 1; + + /* If a list of starting code units was set up, scan the list to see if only + one or two were listed. Having only one listed is rare because usually a + single starting code unit will have been recognized and PCRE2_FIRSTSET set. + If two are listed, see if they are caseless versions of the same character; + if so we can replace the list with a caseless first code unit. This gives + better performance and is plausibly worth doing for patterns such as [Ww]ord + or (word|WORD). */ + + if (rc == SSB_DONE) + { + int i; + int a = -1; + int b = -1; + uint8_t *p = re->start_bitmap; + uint32_t flags = PCRE2_FIRSTMAPSET; + + for (i = 0; i < 256; p++, i += 8) + { + uint8_t x = *p; + if (x != 0) + { + int c; + uint8_t y = x & (~x + 1); /* Least significant bit */ + if (y != x) goto DONE; /* More than one bit set */ + + /* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and + all wide characters", so we cannot use it here. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (i == 248 && x == 0x80) goto DONE; +#endif + + /* Compute the character value */ + + c = i; + switch (x) + { + case 1: break; + case 2: c += 1; break; case 4: c += 2; break; + case 8: c += 3; break; case 16: c += 4; break; + case 32: c += 5; break; case 64: c += 6; break; + case 128: c += 7; break; + } + + /* c contains the code unit value, in the range 0-255. In 8-bit UTF + mode, only values < 128 can be used. In all the other cases, c is a + character value. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (utf && c > 127) goto DONE; +#endif + if (a < 0) a = c; /* First one found, save in a */ + else if (b < 0) /* Second one found */ + { + int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c); + +#ifdef SUPPORT_UNICODE + if (utf || ucp) + { + if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */ + if (c > 127) d = UCD_OTHERCASE(c); + } +#endif /* SUPPORT_UNICODE */ + + if (d != a) goto DONE; /* Not the other case of a */ + b = c; /* Save second in b */ + } + else goto DONE; /* More than two characters found */ + } + } + + /* Replace the start code unit bits with a first code unit, but only if it + is not the same as a required later code unit. This is because a search for + a required code unit starts after an explicit first code unit, but at a + code unit found from the bitmap. Patterns such as /a*a/ don't work + if both the start unit and required unit are the same. */ + + if (a >= 0 && + ( + (re->flags & PCRE2_LASTSET) == 0 || + ( + re->last_codeunit != (uint32_t)a && + (b < 0 || re->last_codeunit != (uint32_t)b) + ) + )) + { + re->first_codeunit = a; + flags = PCRE2_FIRSTSET; + if (b >= 0) flags |= PCRE2_FIRSTCASELESS; + } + + DONE: + re->flags |= flags; + } + } + +/* Find the minimum length of subject string. If the pattern can match an empty +string, the minimum length is already known. If the pattern contains (*ACCEPT) +all bets are off, and we don't even try to find a minimum length. If there are +more back references than the size of the vector we are going to cache them in, +do nothing. A pattern that complicated will probably take a long time to +analyze and may in any case turn out to be too complicated. Note that back +reference minima are held as 16-bit numbers. */ + +if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 && + re->top_backref <= MAX_CACHE_BACKREF) + { + int min; + int backref_cache[MAX_CACHE_BACKREF+1]; + backref_cache[0] = 0; /* Highest one that is set */ + min = find_minlength(re, code, code, utf, NULL, &count, backref_cache); + switch(min) + { + case -1: /* \C in UTF mode or over-complex regex */ + break; /* Leave minlength unchanged (will be zero) */ + + case -2: + return 2; /* missing capturing bracket */ + + case -3: + return 3; /* unrecognized opcode */ + + default: + re->minlength = (min > UINT16_MAX)? UINT16_MAX : min; + break; + } + } + +return 0; +} + +/* End of pcre2_study.c */ diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c new file mode 100644 index 0000000..edbb78c --- /dev/null +++ b/src/pcre2_substitute.c @@ -0,0 +1,1009 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +#define PTR_STACK_SIZE 20 + +#define SUBSTITUTE_OPTIONS \ + (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \ + PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \ + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \ + PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY) + + + +/************************************************* +* Find end of substitute text * +*************************************************/ + +/* In extended mode, we recognize ${name:+set text:unset text} and similar +constructions. This requires the identification of unescaped : and } +characters. This function scans for such. It must deal with nested ${ +constructions. The pointer to the text is updated, either to the required end +character, or to where an error was detected. + +Arguments: + code points to the compiled expression (for options) + ptrptr points to the pointer to the start of the text (updated) + ptrend end of the whole string + last TRUE if the last expected string (only } recognized) + +Returns: 0 on success + negative error code on failure +*/ + +static int +find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, + BOOL last) +{ +int rc = 0; +uint32_t nestlevel = 0; +BOOL literal = FALSE; +PCRE2_SPTR ptr = *ptrptr; + +for (; ptr < ptrend; ptr++) + { + if (literal) + { + if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E) + { + literal = FALSE; + ptr += 1; + } + } + + else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) + { + if (nestlevel == 0) goto EXIT; + nestlevel--; + } + + else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT; + + else if (*ptr == CHAR_DOLLAR_SIGN) + { + if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET) + { + nestlevel++; + ptr += 1; + } + } + + else if (*ptr == CHAR_BACKSLASH) + { + int erc; + int errorcode; + uint32_t ch; + + if (ptr < ptrend - 1) switch (ptr[1]) + { + case CHAR_L: + case CHAR_l: + case CHAR_U: + case CHAR_u: + ptr += 1; + continue; + } + + ptr += 1; /* Must point after \ */ + erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, + code->overall_options, code->extra_options, FALSE, NULL); + ptr -= 1; /* Back to last code unit of escape */ + if (errorcode != 0) + { + rc = errorcode; + goto EXIT; + } + + switch(erc) + { + case 0: /* Data character */ + case ESC_E: /* Isolated \E is ignored */ + break; + + case ESC_Q: + literal = TRUE; + break; + + default: + rc = PCRE2_ERROR_BADREPESCAPE; + goto EXIT; + } + } + } + +rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */ + +EXIT: +*ptrptr = ptr; +return rc; +} + + + +/************************************************* +* Match and substitute * +*************************************************/ + +/* This function applies a compiled re to a subject string and creates a new +string with substitutions. The first 7 arguments are the same as for +pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block, or is NULL + context points a PCRE2 context + replacement points to the replacement string + rlength length of replacement string + buffer where to put the substituted string + blength points to length of buffer; updated to length of string + +Returns: >= 0 number of substitutions made + < 0 an error code + PCRE2_ERROR_BADREPLACEMENT means invalid use of $ +*/ + +/* This macro checks for space in the buffer before copying into it. On +overflow, either give an error immediately, or keep on, accumulating the +length. */ + +#define CHECKMEMCPY(from,length) \ + { \ + if (!overflowed && lengthleft < length) \ + { \ + if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ + overflowed = TRUE; \ + extra_needed = length - lengthleft; \ + } \ + else if (overflowed) \ + { \ + extra_needed += length; \ + } \ + else \ + { \ + memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ + buff_offset += length; \ + lengthleft -= length; \ + } \ + } + +/* Here's the function */ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, + PCRE2_UCHAR *buffer, PCRE2_SIZE *blength) +{ +int rc; +int subs; +int forcecase = 0; +int forcecasereset = 0; +uint32_t ovector_count; +uint32_t goptions = 0; +uint32_t suboptions; +pcre2_match_data *internal_match_data = NULL; +BOOL escaped_literal = FALSE; +BOOL overflowed = FALSE; +BOOL use_existing_match; +BOOL replacement_only; +#ifdef SUPPORT_UNICODE +BOOL utf = (code->overall_options & PCRE2_UTF) != 0; +BOOL ucp = (code->overall_options & PCRE2_UCP) != 0; +#endif +PCRE2_UCHAR temp[6]; +PCRE2_SPTR ptr; +PCRE2_SPTR repend; +PCRE2_SIZE extra_needed = 0; +PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; +PCRE2_SIZE *ovector; +PCRE2_SIZE ovecsave[3]; +pcre2_substitute_callout_block scb; + +/* General initialization */ + +buff_offset = 0; +lengthleft = buff_length = *blength; +*blength = PCRE2_UNSET; +ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; + +/* Partial matching is not valid. This must come after setting *blength to +PCRE2_UNSET, so as not to imply an offset in the replacement. */ + +if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) + return PCRE2_ERROR_BADOPTION; + +/* Validate length and find the end of the replacement. A NULL replacement of +zero length is interpreted as an empty string. */ + +if (replacement == NULL) + { + if (rlength != 0) return PCRE2_ERROR_NULL; + replacement = (PCRE2_SPTR)""; + } + +if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); +repend = replacement + rlength; + +/* Check for using a match that has already happened. Note that the subject +pointer in the match data may be NULL after a no-match. */ + +use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0); +replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0); + +/* If starting from an existing match, there must be an externally provided +match data block. We create an internal match_data block in two cases: (a) an +external one is not supplied (and we are not starting from an existing match); +(b) an existing match is to be used for the first substitution. In the latter +case, we copy the existing match into the internal block, except for any cached +heap frame size and pointer. This ensures that no changes are made to the +external match data block. */ + +if (match_data == NULL) + { + pcre2_general_context *gcontext; + if (use_existing_match) return PCRE2_ERROR_NULL; + gcontext = (mcontext == NULL)? + (pcre2_general_context *)code : + (pcre2_general_context *)mcontext; + match_data = internal_match_data = + pcre2_match_data_create_from_pattern(code, gcontext); + if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; + } + +else if (use_existing_match) + { + pcre2_general_context *gcontext = (mcontext == NULL)? + (pcre2_general_context *)code : + (pcre2_general_context *)mcontext; + int pairs = (code->top_bracket + 1 < match_data->oveccount)? + code->top_bracket + 1 : match_data->oveccount; + internal_match_data = pcre2_match_data_create(match_data->oveccount, + gcontext); + if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector) + + 2*pairs*sizeof(PCRE2_SIZE)); + internal_match_data->heapframes = NULL; + internal_match_data->heapframes_size = 0; + match_data = internal_match_data; + } + +/* Remember ovector details */ + +ovector = pcre2_get_ovector_pointer(match_data); +ovector_count = pcre2_get_ovector_count(match_data); + +/* Fixed things in the callout block */ + +scb.version = 0; +scb.input = subject; +scb.output = (PCRE2_SPTR)buffer; +scb.ovector = ovector; + +/* A NULL subject of zero length is treated as an empty string. */ + +if (subject == NULL) + { + if (length != 0) return PCRE2_ERROR_NULL; + subject = (PCRE2_SPTR)""; + } + +/* Find length of zero-terminated subject */ + +if (length == PCRE2_ZERO_TERMINATED) + length = subject? PRIV(strlen)(subject) : 0; + +/* Check UTF replacement string if necessary. */ + +#ifdef SUPPORT_UNICODE +if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) + { + rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar)); + if (rc != 0) + { + match_data->leftchar = 0; + goto EXIT; + } + } +#endif /* SUPPORT_UNICODE */ + +/* Save the substitute options and remove them from the match options. */ + +suboptions = options & SUBSTITUTE_OPTIONS; +options &= ~SUBSTITUTE_OPTIONS; + +/* Error if the start match offset is greater than the length of the subject. */ + +if (start_offset > length) + { + match_data->leftchar = 0; + rc = PCRE2_ERROR_BADOFFSET; + goto EXIT; + } + +/* Copy up to the start offset, unless only the replacement is required. */ + +if (!replacement_only) CHECKMEMCPY(subject, start_offset); + +/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first +match is taken from the match_data that was passed in. */ + +subs = 0; +do + { + PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; + uint32_t ptrstackptr = 0; + + if (use_existing_match) + { + rc = match_data->rc; + use_existing_match = FALSE; + } + else rc = pcre2_match(code, subject, length, start_offset, options|goptions, + match_data, mcontext); + +#ifdef SUPPORT_UNICODE + if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ +#endif + + /* Any error other than no match returns the error code. No match when not + doing the special after-empty-match global rematch, or when at the end of the + subject, breaks the global loop. Otherwise, advance the starting point by one + character, copying it to the output, and try again. */ + + if (rc < 0) + { + PCRE2_SIZE save_start; + + if (rc != PCRE2_ERROR_NOMATCH) goto EXIT; + if (goptions == 0 || start_offset >= length) break; + + /* Advance by one code point. Then, if CRLF is a valid newline sequence and + we have advanced into the middle of it, advance one more code point. In + other words, do not start in the middle of CRLF, even if CR and LF on their + own are valid newlines. */ + + save_start = start_offset++; + if (subject[start_offset-1] == CHAR_CR && + code->newline_convention != PCRE2_NEWLINE_CR && + code->newline_convention != PCRE2_NEWLINE_LF && + start_offset < length && + subject[start_offset] == CHAR_LF) + start_offset++; + + /* Otherwise, in UTF mode, advance past any secondary code points. */ + + else if ((code->overall_options & PCRE2_UTF) != 0) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80) + start_offset++; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + while (start_offset < length && + (subject[start_offset] & 0xfc00) == 0xdc00) + start_offset++; +#endif + } + + /* Copy what we have advanced past (unless not required), reset the special + global options, and continue to the next match. */ + + fraglength = start_offset - save_start; + if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength); + goptions = 0; + continue; + } + + /* Handle a successful match. Matches that use \K to end before they start + or start before the current point in the subject are not supported. */ + + if (ovector[1] < ovector[0] || ovector[0] < start_offset) + { + rc = PCRE2_ERROR_BADSUBSPATTERN; + goto EXIT; + } + + /* Check for the same match as previous. This is legitimate after matching an + empty string that starts after the initial match offset. We have tried again + at the match point in case the pattern is one like /(?<=\G.)/ which can never + match at its starting point, so running the match achieves the bumpalong. If + we do get the same (null) match at the original match point, it isn't such a + pattern, so we now do the empty string magic. In all other cases, a repeat + match should never occur. */ + + if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) + { + if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) + { + goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + ovecsave[2] = start_offset; + continue; /* Back to the top of the loop */ + } + rc = PCRE2_ERROR_INTERNAL_DUPMATCH; + goto EXIT; + } + + /* Count substitutions with a paranoid check for integer overflow; surely no + real call to this function would ever hit this! */ + + if (subs == INT_MAX) + { + rc = PCRE2_ERROR_TOOMANYREPLACE; + goto EXIT; + } + subs++; + + /* Copy the text leading up to the match (unless not required), and remember + where the insert begins and how many ovector pairs are set. */ + + if (rc == 0) rc = ovector_count; + fraglength = ovector[0] - start_offset; + if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength); + scb.output_offsets[0] = buff_offset; + scb.oveccount = rc; + + /* Process the replacement string. If the entire replacement is literal, just + copy it with length check. */ + + ptr = replacement; + if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0) + { + CHECKMEMCPY(ptr, rlength); + } + + /* Within a non-literal replacement, which must be scanned character by + character, local literal mode can be set by \Q, but only in extended mode + when backslashes are being interpreted. In extended mode we must handle + nested substrings that are to be reprocessed. */ + + else for (;;) + { + uint32_t ch; + unsigned int chlen; + + /* If at the end of a nested substring, pop the stack. */ + + if (ptr >= repend) + { + if (ptrstackptr == 0) break; /* End of replacement string */ + repend = ptrstack[--ptrstackptr]; + ptr = ptrstack[--ptrstackptr]; + continue; + } + + /* Handle the next character */ + + if (escaped_literal) + { + if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E) + { + escaped_literal = FALSE; + ptr += 2; + continue; + } + goto LOADLITERAL; + } + + /* Not in literal mode. */ + + if (*ptr == CHAR_DOLLAR_SIGN) + { + int group, n; + uint32_t special = 0; + BOOL inparens; + BOOL star; + PCRE2_SIZE sublength; + PCRE2_SPTR text1_start = NULL; + PCRE2_SPTR text1_end = NULL; + PCRE2_SPTR text2_start = NULL; + PCRE2_SPTR text2_end = NULL; + PCRE2_UCHAR next; + PCRE2_UCHAR name[33]; + + if (++ptr >= repend) goto BAD; + if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; + + group = -1; + n = 0; + inparens = FALSE; + star = FALSE; + + if (next == CHAR_LEFT_CURLY_BRACKET) + { + if (++ptr >= repend) goto BAD; + next = *ptr; + inparens = TRUE; + } + + if (next == CHAR_ASTERISK) + { + if (++ptr >= repend) goto BAD; + next = *ptr; + star = TRUE; + } + + if (!star && next >= CHAR_0 && next <= CHAR_9) + { + group = next - CHAR_0; + while (++ptr < repend) + { + next = *ptr; + if (next < CHAR_0 || next > CHAR_9) break; + group = group * 10 + next - CHAR_0; + + /* A check for a number greater than the hightest captured group + is sufficient here; no need for a separate overflow check. If unknown + groups are to be treated as unset, just skip over any remaining + digits and carry on. */ + + if (group > code->top_bracket) + { + if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9); + break; + } + else + { + rc = PCRE2_ERROR_NOSUBSTRING; + goto PTREXIT; + } + } + } + } + else + { + const uint8_t *ctypes = code->tables + ctypes_offset; + while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) + { + name[n++] = next; + if (n > 32) goto BAD; + if (++ptr >= repend) break; + next = *ptr; + } + if (n == 0) goto BAD; + name[n] = 0; + } + + /* In extended mode we recognize ${name:+set text:unset text} and + ${name:-default text}. */ + + if (inparens) + { + if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && + !star && ptr < repend - 2 && next == CHAR_COLON) + { + special = *(++ptr); + if (special != CHAR_PLUS && special != CHAR_MINUS) + { + rc = PCRE2_ERROR_BADSUBSTITUTION; + goto PTREXIT; + } + + text1_start = ++ptr; + rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS); + if (rc != 0) goto PTREXIT; + text1_end = ptr; + + if (special == CHAR_PLUS && *ptr == CHAR_COLON) + { + text2_start = ++ptr; + rc = find_text_end(code, &ptr, repend, TRUE); + if (rc != 0) goto PTREXIT; + text2_end = ptr; + } + } + + else + { + if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET) + { + rc = PCRE2_ERROR_REPMISSINGBRACE; + goto PTREXIT; + } + } + + ptr++; + } + + /* Have found a syntactically correct group number or name, or *name. + Only *MARK is currently recognized. */ + + if (star) + { + if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) + { + PCRE2_SPTR mark = pcre2_get_mark(match_data); + if (mark != NULL) + { + PCRE2_SPTR mark_start = mark; + while (*mark != 0) mark++; + fraglength = mark - mark_start; + CHECKMEMCPY(mark_start, fraglength); + } + } + else goto BAD; + } + + /* Substitute the contents of a group. We don't use substring_copy + functions any more, in order to support case forcing. */ + + else + { + PCRE2_SPTR subptr, subptrend; + + /* Find a number for a named group. In case there are duplicate names, + search for the first one that is set. If the name is not found when + PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a + non-existent group. */ + + if (group < 0) + { + PCRE2_SPTR first, last, entry; + rc = pcre2_substring_nametable_scan(code, name, &first, &last); + if (rc == PCRE2_ERROR_NOSUBSTRING && + (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + group = code->top_bracket + 1; + } + else + { + if (rc < 0) goto PTREXIT; + for (entry = first; entry <= last; entry += rc) + { + uint32_t ng = GET2(entry, 0); + if (ng < ovector_count) + { + if (group < 0) group = ng; /* First in ovector */ + if (ovector[ng*2] != PCRE2_UNSET) + { + group = ng; /* First that is set */ + break; + } + } + } + + /* If group is still negative, it means we did not find a group + that is in the ovector. Just set the first group. */ + + if (group < 0) group = GET2(first, 0); + } + } + + /* We now have a group that is identified by number. Find the length of + the captured string. If a group in a non-special substitution is unset + when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */ + + rc = pcre2_substring_length_bynumber(match_data, group, &sublength); + if (rc < 0) + { + if (rc == PCRE2_ERROR_NOSUBSTRING && + (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) + { + rc = PCRE2_ERROR_UNSET; + } + if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */ + if (special == 0) /* Plain substitution */ + { + if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue; + goto PTREXIT; /* Else error */ + } + } + + /* If special is '+' we have a 'set' and possibly an 'unset' text, + both of which are reprocessed when used. If special is '-' we have a + default text for when the group is unset; it must be reprocessed. */ + + if (special != 0) + { + if (special == CHAR_MINUS) + { + if (rc == 0) goto LITERAL_SUBSTITUTE; + text2_start = text1_start; + text2_end = text1_end; + } + + if (ptrstackptr >= PTR_STACK_SIZE) goto BAD; + ptrstack[ptrstackptr++] = ptr; + ptrstack[ptrstackptr++] = repend; + + if (rc == 0) + { + ptr = text1_start; + repend = text1_end; + } + else + { + ptr = text2_start; + repend = text2_end; + } + continue; + } + + /* Otherwise we have a literal substitution of a group's contents. */ + + LITERAL_SUBSTITUTE: + subptr = subject + ovector[group*2]; + subptrend = subject + ovector[group*2 + 1]; + + /* Substitute a literal string, possibly forcing alphabetic case. */ + + while (subptr < subptrend) + { + GETCHARINCTEST(ch, subptr); + if (forcecase != 0) + { +#ifdef SUPPORT_UNICODE + if (utf || ucp) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + } + else +#endif + { + if (((code->tables + cbits_offset + + ((forcecase > 0)? cbit_upper:cbit_lower) + )[ch/8] & (1u << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + forcecase = forcecasereset; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + CHECKMEMCPY(temp, chlen); + } + } + } + + /* Handle an escape sequence in extended mode. We can use check_escape() + to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but + the case-forcing escapes are not supported in pcre2_compile() so must be + recognized here. */ + + else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && + *ptr == CHAR_BACKSLASH) + { + int errorcode; + + if (ptr < repend - 1) switch (ptr[1]) + { + case CHAR_L: + forcecase = forcecasereset = -1; + ptr += 2; + continue; + + case CHAR_l: + forcecase = -1; + forcecasereset = 0; + ptr += 2; + continue; + + case CHAR_U: + forcecase = forcecasereset = 1; + ptr += 2; + continue; + + case CHAR_u: + forcecase = 1; + forcecasereset = 0; + ptr += 2; + continue; + + default: + break; + } + + ptr++; /* Point after \ */ + rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, + code->overall_options, code->extra_options, FALSE, NULL); + if (errorcode != 0) goto BADESCAPE; + + switch(rc) + { + case ESC_E: + forcecase = forcecasereset = 0; + continue; + + case ESC_Q: + escaped_literal = TRUE; + continue; + + case 0: /* Data character */ + goto LITERAL; + + default: + goto BADESCAPE; + } + } + + /* Handle a literal code unit */ + + else + { + LOADLITERAL: + GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ + + LITERAL: + if (forcecase != 0) + { +#ifdef SUPPORT_UNICODE + if (utf || ucp) + { + uint32_t type = UCD_CHARTYPE(ch); + if (PRIV(ucp_gentype)[type] == ucp_L && + type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) + ch = UCD_OTHERCASE(ch); + } + else +#endif + { + if (((code->tables + cbits_offset + + ((forcecase > 0)? cbit_upper:cbit_lower) + )[ch/8] & (1u << (ch%8))) == 0) + ch = (code->tables + fcc_offset)[ch]; + } + forcecase = forcecasereset; + } + +#ifdef SUPPORT_UNICODE + if (utf) chlen = PRIV(ord2utf)(ch, temp); else +#endif + { + temp[0] = ch; + chlen = 1; + } + CHECKMEMCPY(temp, chlen); + } /* End handling a literal code unit */ + } /* End of loop for scanning the replacement. */ + + /* The replacement has been copied to the output, or its size has been + remembered. Do the callout if there is one and we have done an actual + replacement. */ + + if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL) + { + scb.subscount = subs; + scb.output_offsets[1] = buff_offset; + rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); + + /* A non-zero return means cancel this substitution. Instead, copy the + matched string fragment. */ + + if (rc != 0) + { + PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + PCRE2_SIZE oldlength = ovector[1] - ovector[0]; + + buff_offset -= newlength; + lengthleft += newlength; + if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength); + + /* A negative return means do not do any more. */ + + if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + } + } + + /* Save the details of this match. See above for how this data is used. If we + matched an empty string, do the magic for global matches. Update the start + offset to point to the rest of the subject string. If we re-used an existing + match for the first match, switch to the internal match data block. */ + + ovecsave[0] = ovector[0]; + ovecsave[1] = ovector[1]; + ovecsave[2] = start_offset; + + goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : + PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; + start_offset = ovector[1]; + } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ + +/* Copy the rest of the subject unless not required, and terminate the output +with a binary zero. */ + +if (!replacement_only) + { + fraglength = length - start_offset; + CHECKMEMCPY(subject + start_offset, fraglength); + } + +temp[0] = 0; +CHECKMEMCPY(temp, 1); + +/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, +and matching has carried on after a full buffer, in order to compute the length +needed. Otherwise, an overflow generates an immediate error return. */ + +if (overflowed) + { + rc = PCRE2_ERROR_NOMEMORY; + *blength = buff_length + extra_needed; + } + +/* After a successful execution, return the number of substitutions and set the +length of buffer used, excluding the trailing zero. */ + +else + { + rc = subs; + *blength = buff_offset - 1; + } + +EXIT: +if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data); + else match_data->rc = rc; +return rc; + +NOROOM: +rc = PCRE2_ERROR_NOMEMORY; +goto EXIT; + +BAD: +rc = PCRE2_ERROR_BADREPLACEMENT; +goto PTREXIT; + +BADESCAPE: +rc = PCRE2_ERROR_BADREPESCAPE; + +PTREXIT: +*blength = (PCRE2_SIZE)(ptr - replacement); +goto EXIT; +} + +/* End of pcre2_substitute.c */ diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c new file mode 100644 index 0000000..14e919d --- /dev/null +++ b/src/pcre2_substring.c @@ -0,0 +1,550 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + + + +/************************************************* +* Copy named captured string to given buffer * +*************************************************/ + +/* This function copies a single captured substring into a given buffer, +identifying it by name. If the regex permits duplicate names, the first +substring that is set is chosen. + +Arguments: + match_data points to the match data + stringname the name of the required substring + buffer where to put the substring + sizeptr the size of the buffer, updated to the size of the substring + +Returns: if successful: zero + if not successful, a negative error code: + (1) an error from nametable_scan() + (2) an error from copy_bynumber() + (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector + (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname, + PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) +{ +PCRE2_SPTR first, last, entry; +int failrc, entrysize; +if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) + return PCRE2_ERROR_DFA_UFUNC; +entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, + &first, &last); +if (entrysize < 0) return entrysize; +failrc = PCRE2_ERROR_UNAVAILABLE; +for (entry = first; entry <= last; entry += entrysize) + { + uint32_t n = GET2(entry, 0); + if (n < match_data->oveccount) + { + if (match_data->ovector[n*2] != PCRE2_UNSET) + return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr); + failrc = PCRE2_ERROR_UNSET; + } + } +return failrc; +} + + + +/************************************************* +* Copy numbered captured string to given buffer * +*************************************************/ + +/* This function copies a single captured substring into a given buffer, +identifying it by number. + +Arguments: + match_data points to the match data + stringnumber the number of the required substring + buffer where to put the substring + sizeptr the size of the buffer, updated to the size of the substring + +Returns: if successful: 0 + if not successful, a negative error code: + PCRE2_ERROR_NOMEMORY: buffer too small + PCRE2_ERROR_NOSUBSTRING: no such substring + PCRE2_ERROR_UNAVAILABLE: ovector too small + PCRE2_ERROR_UNSET: substring is not set +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) +{ +int rc; +PCRE2_SIZE size; +rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size); +if (rc < 0) return rc; +if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; +memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2], + CU2BYTES(size)); +buffer[size] = 0; +*sizeptr = size; +return 0; +} + + + +/************************************************* +* Extract named captured string * +*************************************************/ + +/* This function copies a single captured substring, identified by name, into +new memory. If the regex permits duplicate names, the first substring that is +set is chosen. + +Arguments: + match_data pointer to match_data + stringname the name of the required substring + stringptr where to put the pointer to the new memory + sizeptr where to put the length of the substring + +Returns: if successful: zero + if not successful, a negative value: + (1) an error from nametable_scan() + (2) an error from get_bynumber() + (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector + (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) +{ +PCRE2_SPTR first, last, entry; +int failrc, entrysize; +if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) + return PCRE2_ERROR_DFA_UFUNC; +entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, + &first, &last); +if (entrysize < 0) return entrysize; +failrc = PCRE2_ERROR_UNAVAILABLE; +for (entry = first; entry <= last; entry += entrysize) + { + uint32_t n = GET2(entry, 0); + if (n < match_data->oveccount) + { + if (match_data->ovector[n*2] != PCRE2_UNSET) + return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr); + failrc = PCRE2_ERROR_UNSET; + } + } +return failrc; +} + + + +/************************************************* +* Extract captured string to new memory * +*************************************************/ + +/* This function copies a single captured substring into a piece of new +memory. + +Arguments: + match_data points to match data + stringnumber the number of the required substring + stringptr where to put a pointer to the new memory + sizeptr where to put the size of the substring + +Returns: if successful: 0 + if not successful, a negative error code: + PCRE2_ERROR_NOMEMORY: failed to get memory + PCRE2_ERROR_NOSUBSTRING: no such substring + PCRE2_ERROR_UNAVAILABLE: ovector too small + PCRE2_ERROR_UNSET: substring is not set +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) +{ +int rc; +PCRE2_SIZE size; +PCRE2_UCHAR *yield; +rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size); +if (rc < 0) return rc; +yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + + (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); +if (yield == NULL) return PCRE2_ERROR_NOMEMORY; +yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl)); +memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2], + CU2BYTES(size)); +yield[size] = 0; +*stringptr = yield; +*sizeptr = size; +return 0; +} + + + +/************************************************* +* Free memory obtained by get_substring * +*************************************************/ + +/* +Argument: the result of a previous pcre2_substring_get_byxxx() +Returns: nothing +*/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_substring_free(PCRE2_UCHAR *string) +{ +if (string != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } +} + + + +/************************************************* +* Get length of a named substring * +*************************************************/ + +/* This function returns the length of a named captured substring. If the regex +permits duplicate names, the first substring that is set is chosen. + +Arguments: + match_data pointer to match data + stringname the name of the required substring + sizeptr where to put the length + +Returns: 0 if successful, else a negative error number +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr) +{ +PCRE2_SPTR first, last, entry; +int failrc, entrysize; +if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) + return PCRE2_ERROR_DFA_UFUNC; +entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, + &first, &last); +if (entrysize < 0) return entrysize; +failrc = PCRE2_ERROR_UNAVAILABLE; +for (entry = first; entry <= last; entry += entrysize) + { + uint32_t n = GET2(entry, 0); + if (n < match_data->oveccount) + { + if (match_data->ovector[n*2] != PCRE2_UNSET) + return pcre2_substring_length_bynumber(match_data, n, sizeptr); + failrc = PCRE2_ERROR_UNSET; + } + } +return failrc; +} + + + +/************************************************* +* Get length of a numbered substring * +*************************************************/ + +/* This function returns the length of a captured substring. If the start is +beyond the end (which can happen when \K is used in an assertion), it sets the +length to zero. + +Arguments: + match_data pointer to match data + stringnumber the number of the required substring + sizeptr where to put the length, if not NULL + +Returns: if successful: 0 + if not successful, a negative error code: + PCRE2_ERROR_NOSUBSTRING: no such substring + PCRE2_ERROR_UNAVAILABLE: ovector is too small + PCRE2_ERROR_UNSET: substring is not set + PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t stringnumber, PCRE2_SIZE *sizeptr) +{ +PCRE2_SIZE left, right; +int count = match_data->rc; +if (count == PCRE2_ERROR_PARTIAL) + { + if (stringnumber > 0) return PCRE2_ERROR_PARTIAL; + count = 0; + } +else if (count < 0) return count; /* Match failed */ + +if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER) + { + if (stringnumber > match_data->code->top_bracket) + return PCRE2_ERROR_NOSUBSTRING; + if (stringnumber >= match_data->oveccount) + return PCRE2_ERROR_UNAVAILABLE; + if (match_data->ovector[stringnumber*2] == PCRE2_UNSET) + return PCRE2_ERROR_UNSET; + } +else /* Matched using pcre2_dfa_match() */ + { + if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE; + if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET; + } + +left = match_data->ovector[stringnumber*2]; +right = match_data->ovector[stringnumber*2+1]; +if (left > match_data->subject_length || right > match_data->subject_length) + return PCRE2_ERROR_INVALIDOFFSET; +if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left; +return 0; +} + + + +/************************************************* +* Extract all captured strings to new memory * +*************************************************/ + +/* This function gets one chunk of memory and builds a list of pointers and all +the captured substrings in it. A NULL pointer is put on the end of the list. +The substrings are zero-terminated, but also, if the final argument is +non-NULL, a list of lengths is also returned. This allows binary data to be +handled. + +Arguments: + match_data points to the match data + listptr set to point to the list of pointers + lengthsptr set to point to the list of lengths (may be NULL) + +Returns: if successful: 0 + if not successful, a negative error code: + PCRE2_ERROR_NOMEMORY: failed to get memory, + or a match failure code +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, + PCRE2_SIZE **lengthsptr) +{ +int i, count, count2; +PCRE2_SIZE size; +PCRE2_SIZE *lensp; +pcre2_memctl *memp; +PCRE2_UCHAR **listp; +PCRE2_UCHAR *sp; +PCRE2_SIZE *ovector; + +if ((count = match_data->rc) < 0) return count; /* Match failed */ +if (count == 0) count = match_data->oveccount; /* Ovector too small */ + +count2 = 2*count; +ovector = match_data->ovector; +size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */ +if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */ + +for (i = 0; i < count2; i += 2) + { + size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1); + if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]); + } + +memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data); +if (memp == NULL) return PCRE2_ERROR_NOMEMORY; + +*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl)); +lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1)); + +if (lengthsptr == NULL) + { + sp = (PCRE2_UCHAR *)lensp; + lensp = NULL; + } +else + { + *lengthsptr = lensp; + sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count); + } + +for (i = 0; i < count2; i += 2) + { + size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; + + /* Size == 0 includes the case when the capture is unset. Avoid adding + PCRE2_UNSET to match_data->subject because it overflows, even though with + zero size calling memcpy() is harmless. */ + + if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size)); + *listp++ = sp; + if (lensp != NULL) *lensp++ = size; + sp += size; + *sp++ = 0; + } + +*listp = NULL; +return 0; +} + + + +/************************************************* +* Free memory obtained by substring_list_get * +*************************************************/ + +/* +Argument: the result of a previous pcre2_substring_list_get() +Returns: nothing +*/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_substring_list_free(PCRE2_UCHAR **list) +{ +if (list != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } +} + + + +/************************************************* +* Find (multiple) entries for named string * +*************************************************/ + +/* This function scans the nametable for a given name, using binary chop. It +returns either two pointers to the entries in the table, or, if no pointers are +given, the number of a unique group with the given name. If duplicate names are +permitted, and the name is not unique, an error is generated. + +Arguments: + code the compiled regex + stringname the name whose entries required + firstptr where to put the pointer to the first entry + lastptr where to put the pointer to the last entry + +Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found + otherwise, if firstptr and lastptr are NULL: + a group number for a unique substring + else PCRE2_ERROR_NOUNIQUESUBSTRING + otherwise: + the length of each entry, having set firstptr and lastptr +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname, + PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr) +{ +uint16_t bot = 0; +uint16_t top = code->name_count; +uint16_t entrysize = code->name_entry_size; +PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code)); + +while (top > bot) + { + uint16_t mid = (top + bot) / 2; + PCRE2_SPTR entry = nametable + entrysize*mid; + int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE); + if (c == 0) + { + PCRE2_SPTR first; + PCRE2_SPTR last; + PCRE2_SPTR lastentry; + lastentry = nametable + entrysize * (code->name_count - 1); + first = last = entry; + while (first > nametable) + { + if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break; + first -= entrysize; + } + while (last < lastentry) + { + if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break; + last += entrysize; + } + if (firstptr == NULL) return (first == last)? + (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING; + *firstptr = first; + *lastptr = last; + return entrysize; + } + if (c > 0) bot = mid + 1; else top = mid; + } + +return PCRE2_ERROR_NOSUBSTRING; +} + + +/************************************************* +* Find number for named string * +*************************************************/ + +/* This function is a convenience wrapper for pcre2_substring_nametable_scan() +when it is known that names are unique. If there are duplicate names, it is not +defined which number is returned. + +Arguments: + code the compiled regex + stringname the name whose number is required + +Returns: the number of the named parenthesis, or a negative number + PCRE2_ERROR_NOSUBSTRING if not found + PCRE2_ERROR_NOUNIQUESUBSTRING if not unique +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR stringname) +{ +return pcre2_substring_nametable_scan(code, stringname, NULL, NULL); +} + +/* End of pcre2_substring.c */ diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c new file mode 100644 index 0000000..097a1ac --- /dev/null +++ b/src/pcre2_tables.c @@ -0,0 +1,234 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains some fixed tables that are used by more than one of the +PCRE2 code modules. The tables are also #included by the pcre2test program, +which uses macros to change their names from _pcre2_xxx to xxxx, thereby +avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is +defined. */ + +#ifndef PCRE2_PCRE2TEST /* We're compiling the library */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "pcre2_internal.h" +#endif /* PCRE2_PCRE2TEST */ + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in pcre2_internal.h. +This is mode-dependent, so it is skipped when this file is included by +pcre2test. */ + +#ifndef PCRE2_PCRE2TEST +const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS }; +#endif + +/* Tables of horizontal and vertical whitespace characters, suitable for +adding to classes. */ + +const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST }; +const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST }; + +/* These tables are the pairs of delimiters that are valid for callout string +arguments. For each starting delimiter there must be a matching ending +delimiter, which in fact is different only for bracket-like delimiters. */ + +const uint32_t PRIV(callout_start_delims)[] = { + CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK, + CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN, + CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 }; + +const uint32_t PRIV(callout_end_delims[]) = { + CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK, + CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN, + CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 }; + + +/************************************************* +* Tables for UTF-8 support * +*************************************************/ + +/* These tables are required by pcre2test in 16- or 32-bit mode, as well +as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for +handling wide characters. */ + +#if defined PCRE2_PCRE2TEST || \ + (defined SUPPORT_UNICODE && \ + defined PCRE2_CODE_UNIT_WIDTH && \ + PCRE2_CODE_UNIT_WIDTH == 8) + +/* These are the breakpoints for different numbers of bytes in a UTF-8 +character. */ + +const int PRIV(utf8_table1)[] = + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; + +const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); + +/* These are the indicator bits and the mask for the data bits to set in the +first byte of a character, indexed by the number of additional bytes. */ + +const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; + +/* Table of the number of extra bytes, indexed by the first byte masked with +0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ + +const uint8_t PRIV(utf8_table4)[] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; + +#endif /* UTF-8 support needed */ + +/* Tables concerned with Unicode properties are relevant only when Unicode +support is enabled. See also the pcre2_ucptables.c file, which is generated by +a Python script from Unicode data files. */ + +#ifdef SUPPORT_UNICODE + +/* Table to translate from particular type value to the general value. */ + +const uint32_t PRIV(ucp_gentype)[] = { + ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ + ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ + ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ + ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */ + ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */ + ucp_P, ucp_P, /* Ps, Po */ + ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */ + ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */ +}; + +/* This table encodes the rules for finding the end of an extended grapheme +cluster. Every code point has a grapheme break property which is one of the +ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions +10 and 11. The 2-dimensional table is indexed by the properties of two adjacent +code points. The left property selects a word from the table, and the right +property selects a bit from that word like this: + + PRIV(ucp_gbtable)[left-property] & (1u << right-property) + +The value is non-zero if a grapheme break is NOT permitted between the relevant +two code points. The breaking rules are as follows: + +1. Break at the start and end of text (pretty obviously). + +2. Do not break between a CR and LF; otherwise, break before and after + controls. + +3. Do not break Hangul syllable sequences, the rules for which are: + + L may be followed by L, V, LV or LVT + LV or V may be followed by V or T + LVT or T may be followed by T + +4. Do not break before extending characters or zero-width-joiner (ZWJ). + +The following rules are only for extended grapheme clusters (but that's what we +are implementing). + +5. Do not break before SpacingMarks. + +6. Do not break after Prepend characters. + +7. Do not break within emoji modifier sequences or emoji zwj sequences. That + is, do not break between characters with the Extended_Pictographic property + if a ZWJ intervenes. Extend characters are allowed between the characters; + this cannot be represented in this table, the code has to deal with it. + +8. Do not break within emoji flag sequences. That is, do not break between + regional indicator (RI) symbols if there are an odd number of RI characters + before the break point. This table encodes "join RI characters"; the code + has to deal with checking for previous adjoining RIs. + +9. Otherwise, break everywhere. +*/ + +#define ESZ (1< 0x10ffff is not permitted +PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted +PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence +PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence +PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence +PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur) +PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur) +PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character) +PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff +*/ + +for (p = string; length > 0; p++) + { + uint32_t ab, d; + + c = *p; + length--; + + if (c < 128) continue; /* ASCII character */ + + if (c < 0xc0) /* Isolated 10xx xxxx byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF8_ERR20; + } + + if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */ + { + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF8_ERR21; + } + + ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */ + if (length < ab) /* Missing bytes */ + { + *erroroffset = (PCRE2_SIZE)(p - string); + switch(ab - length) + { + case 1: return PCRE2_ERROR_UTF8_ERR1; + case 2: return PCRE2_ERROR_UTF8_ERR2; + case 3: return PCRE2_ERROR_UTF8_ERR3; + case 4: return PCRE2_ERROR_UTF8_ERR4; + case 5: return PCRE2_ERROR_UTF8_ERR5; + } + } + length -= ab; /* Length remaining */ + + /* Check top bits in the second byte */ + + if (((d = *(++p)) & 0xc0) != 0x80) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF8_ERR6; + } + + /* For each length, check that the remaining bytes start with the 0x80 bit + set and not the 0x40 bit. Then check for an overlong sequence, and for the + excluded range 0xd800 to 0xdfff. */ + + switch (ab) + { + /* 2-byte character. No further bytes to check for 0x80. Check first byte + for for xx00 000x (overlong sequence). */ + + case 1: if ((c & 0x3e) == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF8_ERR15; + } + break; + + /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes + for 1110 0000, xx0x xxxx (overlong sequence) or + 1110 1101, 1010 xxxx (0xd800 - 0xdfff) */ + + case 2: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if (c == 0xe0 && (d & 0x20) == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR16; + } + if (c == 0xed && d >= 0xa0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR14; + } + break; + + /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2 + bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a + character greater than 0x0010ffff (f4 8f bf bf) */ + + case 3: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if (c == 0xf0 && (d & 0x30) == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR17; + } + if (c > 0xf4 || (c == 0xf4 && d > 0x8f)) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR13; + } + break; + + /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be + rejected by the length test below. However, we do the appropriate tests + here so that overlong sequences get diagnosed, and also in case there is + ever an option for handling these larger code points. */ + + /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for + 1111 1000, xx00 0xxx */ + + case 4: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR9; + } + if (c == 0xf8 && (d & 0x38) == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR18; + } + break; + + /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for + 1111 1100, xx00 00xx. */ + + case 5: + if ((*(++p) & 0xc0) != 0x80) /* Third byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 2; + return PCRE2_ERROR_UTF8_ERR7; + } + if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 3; + return PCRE2_ERROR_UTF8_ERR8; + } + if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 4; + return PCRE2_ERROR_UTF8_ERR9; + } + if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */ + { + *erroroffset = (PCRE2_SIZE)(p - string) - 5; + return PCRE2_ERROR_UTF8_ERR10; + } + if (c == 0xfc && (d & 0x3c) == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 5; + return PCRE2_ERROR_UTF8_ERR19; + } + break; + } + + /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are + excluded by RFC 3629. The pointer p is currently at the last byte of the + character. */ + + if (ab > 3) + { + *erroroffset = (PCRE2_SIZE)(p - string) - ab; + return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12; + } + } +return 0; + + +/* ----------------- Check a UTF-16 string ----------------- */ + +#elif PCRE2_CODE_UNIT_WIDTH == 16 + +/* There's not so much work, nor so many errors, for UTF-16. +PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string +PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate +PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate +*/ + +for (p = string; length > 0; p++) + { + c = *p; + length--; + + if ((c & 0xf800) != 0xd800) + { + /* Normal UTF-16 code point. Neither high nor low surrogate. */ + } + else if ((c & 0x0400) == 0) + { + /* High surrogate. Must be a followed by a low surrogate. */ + if (length == 0) + { + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF16_ERR1; + } + p++; + length--; + if ((*p & 0xfc00) != 0xdc00) + { + *erroroffset = (PCRE2_SIZE)(p - string) - 1; + return PCRE2_ERROR_UTF16_ERR2; + } + } + else + { + /* Isolated low surrogate. Always an error. */ + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF16_ERR3; + } + } +return 0; + + + +/* ----------------- Check a UTF-32 string ----------------- */ + +#else + +/* There is very little to do for a UTF-32 string. +PCRE2_ERROR_UTF32_ERR1 Surrogate character +PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff +*/ + +for (p = string; length > 0; length--, p++) + { + c = *p; + if ((c & 0xfffff800u) != 0xd800u) + { + /* Normal UTF-32 code point. Neither high nor low surrogate. */ + if (c > 0x10ffffu) + { + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF32_ERR2; + } + } + else + { + /* A surrogate */ + *erroroffset = (PCRE2_SIZE)(p - string); + return PCRE2_ERROR_UTF32_ERR1; + } + } +return 0; +#endif /* CODE_UNIT_WIDTH */ +} +#endif /* SUPPORT_UNICODE */ + +/* End of pcre2_valid_utf.c */ diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c new file mode 100644 index 0000000..5df25d2 --- /dev/null +++ b/src/pcre2_xclass.c @@ -0,0 +1,308 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains an internal function that is used to match an extended +class. It is used by pcre2_auto_possessify() and by both pcre2_match() and +pcre2_def_match(). */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "pcre2_internal.h" + +/************************************************* +* Match character against an XCLASS * +*************************************************/ + +/* This function is called to match a character against an extended class that +might contain codepoints above 255 and/or Unicode properties. + +Arguments: + c the character + data points to the flag code unit of the XCLASS data + utf TRUE if in UTF mode + +Returns: TRUE if character matches, else FALSE +*/ + +BOOL +PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf) +{ +PCRE2_UCHAR t; +BOOL negated = (*data & XCL_NOT) != 0; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ +utf = TRUE; +#endif + +/* Code points < 256 are matched against a bitmap, if one is present. If not, +we still carry on, because there may be ranges that start below 256 in the +additional data. */ + +if (c < 256) + { + if ((*data & XCL_HASPROP) == 0) + { + if ((*data & XCL_MAP) == 0) return negated; + return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0; + } + if ((*data & XCL_MAP) != 0 && + (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0) + return !negated; /* char found */ + } + +/* First skip the bit map if present. Then match against the list of Unicode +properties or large chars or ranges that end with a large char. We won't ever +encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */ + +if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR); + +while ((t = *data++) != XCL_END) + { + uint32_t x, y; + if (t == XCL_SINGLE) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + } + else +#endif + x = *data++; + if (c == x) return !negated; + } + else if (t == XCL_RANGE) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + GETCHARINC(y, data); /* macro generates multiple statements */ + } + else +#endif + { + x = *data++; + y = *data++; + } + if (c >= x && c <= y) return !negated; + } + +#ifdef SUPPORT_UNICODE + else /* XCL_PROP & XCL_NOTPROP */ + { + int chartype; + const ucd_record *prop = GET_UCD(c); + BOOL isprop = t == XCL_PROP; + BOOL ok; + + switch(*data) + { + case PT_ANY: + if (isprop) return !negated; + break; + + case PT_LAMP: + chartype = prop->chartype; + if ((chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt) == isprop) return !negated; + break; + + case PT_GC: + if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) + return !negated; + break; + + case PT_PC: + if ((data[1] == prop->chartype) == isprop) return !negated; + break; + + case PT_SC: + if ((data[1] == prop->script) == isprop) return !negated; + break; + + case PT_SCX: + ok = (data[1] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0); + if (ok == isprop) return !negated; + break; + + case PT_ALNUM: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == isprop) + return !negated; + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(c) + { + HSPACE_CASES: + VSPACE_CASES: + if (isprop) return !negated; + break; + + default: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop) + return !negated; + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc) == isprop) + return !negated; + break; + + case PT_UCNC: + if (c < 0xa0) + { + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT) == isprop) + return !negated; + } + else + { + if ((c < 0xd800 || c > 0xdfff) == isprop) + return !negated; + } + break; + + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop) + return !negated; + break; + + case PT_BOOL: + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), data[1]) != 0; + if (ok == isprop) return !negated; + break; + + /* The following three properties can occur only in an XCLASS, as there + is no \p or \P coding for them. */ + + /* Graphic character. Implement this as not Z (space or separator) and + not C (other), except for Cf (format) with a few exceptions. This seems + to be what Perl does. The exceptional characters are: + + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s + */ + + case PT_PXGRAPH: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] != ucp_Z && + (PRIV(ucp_gentype)[chartype] != ucp_C || + (chartype == ucp_Cf && + c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) + )) == isprop) + return !negated; + break; + + /* Printable character: same as graphic, with the addition of Zs, i.e. + not Zl and not Zp, and U+180E. */ + + case PT_PXPRINT: + chartype = prop->chartype; + if ((chartype != ucp_Zl && + chartype != ucp_Zp && + (PRIV(ucp_gentype)[chartype] != ucp_C || + (chartype == ucp_Cf && + c != 0x061c && (c < 0x2066 || c > 0x2069)) + )) == isprop) + return !negated; + break; + + /* Punctuation: all Unicode punctuation, plus ASCII characters that + Unicode treats as symbols rather than punctuation, for Perl + compatibility (these are $+<=>^`|~). */ + + case PT_PXPUNCT: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_P || + (c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop) + return !negated; + break; + + /* Perl has two sets of hex digits */ + + case PT_PXXDIGIT: + if (((c >= CHAR_0 && c <= CHAR_9) || + (c >= CHAR_A && c <= CHAR_F) || + (c >= CHAR_a && c <= CHAR_f) || + (c >= 0xff10 && c <= 0xff19) || /* Fullwidth digits */ + (c >= 0xff21 && c <= 0xff26) || /* Fullwidth letters */ + (c >= 0xff41 && c <= 0xff46)) == isprop) + return !negated; + break; + + /* This should never occur, but compilers may mutter if there is no + default. */ + + default: + return FALSE; + } + + data += 2; + } +#else + (void)utf; /* Avoid compiler warning */ +#endif /* SUPPORT_UNICODE */ + } + +return negated; /* char did not match */ +} + +/* End of pcre2_xclass.c */ diff --git a/src/pcre2demo.c b/src/pcre2demo.c new file mode 100644 index 0000000..de2e584 --- /dev/null +++ b/src/pcre2demo.c @@ -0,0 +1,497 @@ +/************************************************* +* PCRE2 DEMONSTRATION PROGRAM * +*************************************************/ + +/* This is a demonstration program to illustrate a straightforward way of +using the PCRE2 regular expression library from a C program. See the +pcre2sample documentation for a short discussion ("man pcre2sample" if you have +the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is +incompatible with the original PCRE API. + +There are actually three libraries, each supporting a different code unit +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. + +In Unix-like environments, if PCRE2 is installed in your standard system +libraries, you should be able to compile this program using this command: + +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo + +If PCRE2 is not installed in a standard place, it is likely to be installed +with support for the pkg-config mechanism. If you have pkg-config, you can +compile this program using this command: + +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo + +If you do not have pkg-config, you may have to use something like this: + +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ + -R/usr/local/lib -lpcre2-8 -o pcre2demo + +Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and +library files for PCRE2 are installed on your system. Only some operating +systems (Solaris is one) use the -R option. + +Building under Windows: + +If you want to statically link this program against a non-dll .a file, you must +define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment +the following line. */ + +/* #define PCRE2_STATIC */ + +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include +#include + + +/************************************************************************** +* Here is the program. The API includes the concept of "contexts" for * +* setting up unusual interface requirements for compiling and matching, * +* such as custom memory managers and non-standard newline definitions. * +* This program does not do any of this, so it makes no use of contexts, * +* always passing NULL where a context could be given. * +**************************************************************************/ + +int main(int argc, char **argv) +{ +pcre2_code *re; +PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ +PCRE2_SPTR name_table; + +int crlf_is_newline; +int errornumber; +int find_all; +int i; +int rc; +int utf8; + +uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; +uint32_t newline; + +PCRE2_SIZE erroroffset; +PCRE2_SIZE *ovector; +PCRE2_SIZE subject_length; + +pcre2_match_data *match_data; + + +/************************************************************************** +* First, sort out the command line. There is only one possible option at * +* the moment, "-g" to request repeated matching to find all occurrences, * +* like Perl's /g option. We set the variable find_all to a non-zero value * +* if the -g option is present. * +**************************************************************************/ + +find_all = 0; +for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-g") == 0) find_all = 1; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\n", argv[i]); + return 1; + } + else break; + } + +/* After the options, we require exactly two arguments, which are the pattern, +and the subject string. */ + +if (argc - i != 2) + { + printf("Exactly two arguments required: a regex and a subject string\n"); + return 1; + } + +/* Pattern and subject are char arguments, so they can be straightforwardly +cast to PCRE2_SPTR because we are working in 8-bit code units. The subject +length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact +defined to be size_t. */ + +pattern = (PCRE2_SPTR)argv[i]; +subject = (PCRE2_SPTR)argv[i+1]; +subject_length = (PCRE2_SIZE)strlen((char *)subject); + + +/************************************************************************* +* Now we are going to compile the regular expression pattern, and handle * +* any errors that are detected. * +*************************************************************************/ + +re = pcre2_compile( + pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + 0, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ + +/* Compilation failed: print the error message and exit. */ + +if (re == NULL) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, + buffer); + return 1; + } + + +/************************************************************************* +* If the compilation succeeded, we call PCRE2 again, in order to do a * +* pattern match against the subject string. This does just ONE match. If * +* further matching is needed, it will be done below. Before running the * +* match we must set up a match_data block for holding the result. Using * +* pcre2_match_data_create_from_pattern() ensures that the block is * +* exactly the right size for the number of capturing parentheses in the * +* pattern. If you need to know the actual size of a match_data block as * +* a number of bytes, you can find it like this: * +* * +* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); * +*************************************************************************/ + +match_data = pcre2_match_data_create_from_pattern(re, NULL); + +/* Now run the match. */ + +rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + +/* Matching failed: handle error cases */ + +if (rc < 0) + { + switch(rc) + { + case PCRE2_ERROR_NOMATCH: printf("No match\n"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\n", rc); break; + } + pcre2_match_data_free(match_data); /* Release memory used for the match */ + pcre2_code_free(re); /* data and the compiled pattern. */ + return 1; + } + +/* Match succeeded. Get a pointer to the output vector, where string offsets +are stored. */ + +ovector = pcre2_get_ovector_pointer(match_data); +printf("Match succeeded at offset %d\n", (int)ovector[0]); + + +/************************************************************************* +* We have found the first match within the subject string. If the output * +* vector wasn't big enough, say so. Then output any substrings that were * +* captured. * +*************************************************************************/ + +/* The output vector wasn't big enough. This should not happen, because we used +pcre2_match_data_create_from_pattern() above. */ + +if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + +/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround +assertions. However, there is an option to re-enable the old behaviour. If that +is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an +assertion to set the start of a match later than its end. In this demonstration +program, we show how to detect this case, but it shouldn't arise because the +option is never set. */ + +if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + +/* Show substrings stored in the output vector by number. Obviously, in a real +application you might want to do things other than print them. */ + +for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); + } + + +/************************************************************************** +* That concludes the basic part of this demonstration program. We have * +* compiled a pattern, and performed a single match. The code that follows * +* shows first how to access named substrings, and then how to code for * +* repeated matches on the same subject. * +**************************************************************************/ + +/* See if there are any named substrings, and if so, show them by name. First +we have to extract the count of named parentheses from the pattern. */ + +(void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + +if (namecount == 0) printf("No named substrings\n"); else + { + PCRE2_SPTR tabptr; + printf("Named substrings\n"); + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + tabptr = name_table; + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + + +/************************************************************************* +* If the "-g" option was given on the command line, we want to continue * +* to search for additional matches in the subject string, in a similar * +* way to the /g option in Perl. This turns out to be trickier than you * +* might think because of the possibility of matching an empty string. * +* What happens is as follows: * +* * +* If the previous match was NOT for an empty string, we can just start * +* the next match at the end of the previous one. * +* * +* If the previous match WAS for an empty string, we can't do that, as it * +* would lead to an infinite loop. Instead, a call of pcre2_match() is * +* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * +* first of these tells PCRE2 that an empty string at the start of the * +* subject is not a valid match; other possibilities must be tried. The * +* second flag restricts PCRE2 to one match attempt at the initial string * +* position. If this match succeeds, an alternative to the empty string * +* match has been found, and we can print it and proceed round the loop, * +* advancing by the length of whatever was found. If this match does not * +* succeed, we still stay in the loop, advancing by just one character. * +* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * +* more than one byte. * +* * +* However, there is a complication concerned with newlines. When the * +* newline convention is such that CRLF is a valid newline, we must * +* advance by two characters rather than one. The newline convention can * +* be set in the regex by (*CR), etc.; if not, we must find the default. * +*************************************************************************/ + +if (!find_all) /* Check for -g */ + { + pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_code_free(re); /* for the match data and the pattern. */ + return 0; /* Exit the program. */ + } + +/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline +sequence. First, find the options with which the regex was compiled and extract +the UTF state. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits); +utf8 = (option_bits & PCRE2_UTF) != 0; + +/* Now find the newline convention and see whether CRLF is a valid newline +sequence. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); +crlf_is_newline = newline == PCRE2_NEWLINE_ANY || + newline == PCRE2_NEWLINE_CRLF || + newline == PCRE2_NEWLINE_ANYCRLF; + +/* Loop for second and subsequent matches */ + +for (;;) + { + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == subject_length) break; + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /* If the previous match was not an empty string, there is one tricky case to + consider. If a pattern contains \K within a lookbehind assertion at the + start, the end of the matched string can be at the offset where the match + started. Without special action, this leads to a loop that keeps on matching + the same substring. We must detect this case and arrange to move the start on + by one character. The pcre2_get_startchar() function returns the starting + offset that was passed to pcre2_match(). */ + + else + { + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + if (start_offset <= startchar) + { + if (startchar >= subject_length) break; /* Reached end of subject. */ + start_offset = startchar + 1; /* Advance by one character. */ + if (utf8) /* If UTF-8, it may be more */ + { /* than one code unit. */ + for (; start_offset < subject_length; start_offset++) + if ((subject[start_offset] & 0xc0) != 0x80) break; + } + } + } + + /* Run the next matching operation */ + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) + { + if (options == 0) break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is a newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\r' && + subject[start_offset + 1] == '\n') + ovector[1] += 1; /* Advance by one more. */ + else if (utf8) /* Otherwise, ensure we */ + { /* advance a whole UTF-8 */ + while (ovector[1] < subject_length) /* character. */ + { + if ((subject[ovector[1]] & 0xc0) != 0x80) break; + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) + { + printf("Matching error %d\n", rc); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* Match succeeded */ + + printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]); + + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + + /* We must guard against patterns such as /(?=.\K)/ that use \K in an + assertion to set the start of a match later than its end. In this + demonstration program, we just detect this case and give up. */ + + if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + size_t substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); + } + + if (namecount == 0) printf("No named substrings\n"); else + { + PCRE2_SPTR tabptr = name_table; + printf("Named substrings\n"); + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + } /* End of loop to find second and subsequent matches */ + +printf("\n"); +pcre2_match_data_free(match_data); +pcre2_code_free(re); +return 0; +} + +/* End of pcre2demo.c */ diff --git a/src/pcre2grep.c b/src/pcre2grep.c new file mode 100644 index 0000000..bb96067 --- /dev/null +++ b/src/pcre2grep.c @@ -0,0 +1,4606 @@ +/************************************************* +* pcre2grep program * +*************************************************/ + +/* This is a grep program that uses the 8-bit PCRE regular expression library +via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows, +and native z/OS systems it can recurse into directories, and in z/OS it can +handle PDS files. + +Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an +additional header is required. That header is not included in the main PCRE2 +distribution because other apparatus is needed to compile pcre2grep for z/OS. +The header can be found in the special z/OS distribution, which is available +from www.zaconsultants.net or from www.cbttape.org. + + Copyright (c) 1997-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include + +#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \ + && !defined WIN32 && !defined(__CYGWIN__) +#define WIN32 +#endif + +/* Some CMake's define it still */ +#if defined(__CYGWIN__) && defined(WIN32) +#undef WIN32 +#endif + +#ifdef __VMS +#include clidef +#include descrip +#include lib$routines +#endif + +#ifdef WIN32 +#include /* For _setmode() */ +#include /* For _O_BINARY */ +#endif + +#if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK) +#ifdef WIN32 +#include +#else +#include +#endif +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef SUPPORT_LIBZ +#include +#endif + +#ifdef SUPPORT_LIBBZ2 +#include +#endif + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" + +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#endif + +/* old VC and older compilers don't support %td or %zu, and even some that claim to +be C99 don't support it (hence DISABLE_PERCENT_ZT). */ + +#if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ + (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)) +#ifdef _WIN64 +#define SIZ_FORM "llu" +#else +#define SIZ_FORM "lu" +#endif +#else +#define SIZ_FORM "zu" +#endif + +#define FALSE 0 +#define TRUE 1 + +typedef int BOOL; + +#define DEFAULT_CAPTURE_MAX 50 + +#if BUFSIZ > 8192 +#define MAXPATLEN BUFSIZ +#else +#define MAXPATLEN 8192 +#endif + +#define FNBUFSIZ 2048 +#define ERRBUFSIZ 256 + +/* Values for the "filenames" variable, which specifies options for file name +output. The order is important; it is assumed that a file name is wanted for +all values greater than FN_DEFAULT. */ + +enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; + +/* File reading styles */ + +enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; + +/* Actions for the -d and -D options */ + +enum { dee_READ, dee_SKIP, dee_RECURSE }; +enum { DEE_READ, DEE_SKIP }; + +/* Actions for special processing options (flag bits) */ + +#define PO_WORD_MATCH 0x0001 +#define PO_LINE_MATCH 0x0002 +#define PO_FIXED_STRINGS 0x0004 + +/* Binary file options */ + +enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT }; + +/* Return values from decode_dollar_escape() */ + +enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR }; + +/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some +environments), a warning is issued if the value of fwrite() is ignored. +Unfortunately, casting to (void) does not suppress the warning. To get round +this, we use a macro that compiles a fudge. Oddly, this does not also seem to +apply to fprintf(). */ + +#define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {} + +/* Under Windows, we have to set stdout to be binary, so that it does not +convert \r\n at the ends of output lines to \r\r\n. However, that means that +any messages written to stdout must have \r\n as their line terminator. This is +handled by using STDOUT_NL as the newline string. We also use a normal double +quote for the example, as single quotes aren't usually available. */ + +#ifdef WIN32 +#define STDOUT_NL "\r\n" +#define STDOUT_NL_LEN 2 +#define QUOT "\"" +#else +#define STDOUT_NL "\n" +#define STDOUT_NL_LEN 1 +#define QUOT "'" +#endif + +/* This code is returned from decode_dollar_escape() when $n is encountered, +and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code +point. */ + +#define STDOUT_NL_CODE 0x7fffffffu + + + +/************************************************* +* Global variables * +*************************************************/ + +static const char *colour_string = "1;31"; +static const char *colour_option = NULL; +static const char *dee_option = NULL; +static const char *DEE_option = NULL; +static const char *locale = NULL; +static const char *newline_arg = NULL; +static const char *group_separator = "--"; +static const char *om_separator = NULL; +static const char *stdin_name = "(standard input)"; +static const char *output_text = NULL; + +static char *main_buffer = NULL; + +static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */ +static int printname_colon = ':'; /* Changed to 0 for -Z */ +static int printname_hyphen = '-'; /* Changed to 0 for -Z */ + +static int after_context = 0; +static int before_context = 0; +static int binary_files = BIN_BINARY; +static int both_context = 0; +static int endlinetype; + +static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */ +static unsigned long int counts_printed = 0; +static unsigned long int total_count = 0; + +static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE; +static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE; +static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE; + +#ifdef WIN32 +static int dee_action = dee_SKIP; +#else +static int dee_action = dee_READ; +#endif + +static int DEE_action = DEE_READ; +static int error_count = 0; +static int filenames = FN_DEFAULT; + +#ifdef SUPPORT_PCRE2GREP_JIT +static BOOL use_jit = TRUE; +#else +static BOOL use_jit = FALSE; +#endif + +static const uint8_t *character_tables = NULL; + +static uint32_t pcre2_options = 0; +static uint32_t extra_options = 0; +static PCRE2_SIZE heap_limit = PCRE2_UNSET; +static uint32_t match_limit = 0; +static uint32_t depth_limit = 0; + +static pcre2_compile_context *compile_context; +static pcre2_match_context *match_context; +static pcre2_match_data *match_data, *match_data_pair[2]; +static PCRE2_SIZE *offsets, *offsets_pair[2]; +static int match_data_toggle; +static uint32_t offset_size; +static uint32_t capture_max = DEFAULT_CAPTURE_MAX; + +static BOOL all_matches = FALSE; +static BOOL case_restrict = FALSE; +static BOOL count_only = FALSE; +static BOOL do_colour = FALSE; +#ifdef WIN32 +static BOOL do_ansi = FALSE; +#endif +static BOOL file_offsets = FALSE; +static BOOL hyphenpending = FALSE; +static BOOL invert = FALSE; +static BOOL line_buffered = FALSE; +static BOOL line_offsets = FALSE; +static BOOL multiline = FALSE; +static BOOL no_ucp = FALSE; +static BOOL number = FALSE; +static BOOL omit_zero_count = FALSE; +static BOOL resource_error = FALSE; +static BOOL quiet = FALSE; +static BOOL show_total_count = FALSE; +static BOOL silent = FALSE; +static BOOL utf = FALSE; +static BOOL posix_digit = FALSE; + +static uint8_t utf8_buffer[8]; + + +/* Structure for list of --only-matching capturing numbers. */ + +typedef struct omstr { + struct omstr *next; + int groupnum; +} omstr; + +static omstr *only_matching = NULL; +static omstr *only_matching_last = NULL; +static int only_matching_count; + +/* Structure for holding the two variables that describe a number chain. */ + +typedef struct omdatastr { + omstr **anchor; + omstr **lastptr; +} omdatastr; + +static omdatastr only_matching_data = { &only_matching, &only_matching_last }; + +/* Structure for list of file names (for -f and --{in,ex}clude-from) */ + +typedef struct fnstr { + struct fnstr *next; + char *name; +} fnstr; + +static fnstr *exclude_from = NULL; +static fnstr *exclude_from_last = NULL; +static fnstr *include_from = NULL; +static fnstr *include_from_last = NULL; + +static fnstr *file_lists = NULL; +static fnstr *file_lists_last = NULL; +static fnstr *pattern_files = NULL; +static fnstr *pattern_files_last = NULL; + +/* Structure for holding the two variables that describe a file name chain. */ + +typedef struct fndatastr { + fnstr **anchor; + fnstr **lastptr; +} fndatastr; + +static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last }; +static fndatastr include_from_data = { &include_from, &include_from_last }; +static fndatastr file_lists_data = { &file_lists, &file_lists_last }; +static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last }; + +/* Structure for pattern and its compiled form; used for matching patterns and +also for include/exclude patterns. */ + +typedef struct patstr { + struct patstr *next; + char *string; + PCRE2_SIZE length; + pcre2_code *compiled; +} patstr; + +static patstr *patterns = NULL; +static patstr *patterns_last = NULL; +static patstr *include_patterns = NULL; +static patstr *include_patterns_last = NULL; +static patstr *exclude_patterns = NULL; +static patstr *exclude_patterns_last = NULL; +static patstr *include_dir_patterns = NULL; +static patstr *include_dir_patterns_last = NULL; +static patstr *exclude_dir_patterns = NULL; +static patstr *exclude_dir_patterns_last = NULL; + +/* Structure holding the two variables that describe a pattern chain. A pointer +to such structures is used for each appropriate option. */ + +typedef struct patdatastr { + patstr **anchor; + patstr **lastptr; +} patdatastr; + +static patdatastr match_patdata = { &patterns, &patterns_last }; +static patdatastr include_patdata = { &include_patterns, &include_patterns_last }; +static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last }; +static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last }; +static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last }; + +static patstr **incexlist[4] = { &include_patterns, &exclude_patterns, + &include_dir_patterns, &exclude_dir_patterns }; + +static const char *incexname[4] = { "--include", "--exclude", + "--include-dir", "--exclude-dir" }; + +/* Structure for options and list of them */ + +enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE, + OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES }; + +typedef struct option_item { + int type; + int one_char; + void *dataptr; + const char *long_name; + const char *help_text; +} option_item; + +/* Options without a single-letter equivalent get a negative value. This can be +used to identify them. */ + +#define N_COLOUR (-1) +#define N_EXCLUDE (-2) +#define N_EXCLUDE_DIR (-3) +#define N_HELP (-4) +#define N_INCLUDE (-5) +#define N_INCLUDE_DIR (-6) +#define N_LABEL (-7) +#define N_LOCALE (-8) +#define N_NULL (-9) +#define N_LOFFSETS (-10) +#define N_FOFFSETS (-11) +#define N_LBUFFER (-12) +#define N_H_LIMIT (-13) +#define N_M_LIMIT (-14) +#define N_M_LIMIT_DEP (-15) +#define N_BUFSIZE (-16) +#define N_NOJIT (-17) +#define N_FILE_LIST (-18) +#define N_BINARY_FILES (-19) +#define N_EXCLUDE_FROM (-20) +#define N_INCLUDE_FROM (-21) +#define N_OM_SEPARATOR (-22) +#define N_MAX_BUFSIZE (-23) +#define N_OM_CAPTURE (-24) +#define N_ALLABSK (-25) +#define N_POSIX_DIGIT (-26) +#define N_GROUP_SEPARATOR (-27) +#define N_NO_GROUP_SEPARATOR (-28) + +static option_item optionlist[] = { + { OP_NODATA, N_NULL, NULL, "", "terminate options" }, + { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, + { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, + { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, + { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, + { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, + { OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" }, + { OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" }, + { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, + { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, + { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, + { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, + { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, + { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, + { OP_NODATA, N_POSIX_DIGIT, NULL, "posix-digit", "\\d always matches [0-9], even in UTF/UCP mode" }, + { OP_NODATA, 'E', NULL, "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" }, + { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" }, + { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, + { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" }, + { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" }, + { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, + { OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" }, + { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, + { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, + { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" }, + { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, + { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, + { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, + { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, + { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, + { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, + { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, + { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" }, + { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" }, + { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" }, + { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" }, + { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, + { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after matched lines" }, + { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" }, + { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, +#ifdef SUPPORT_PCRE2GREP_JIT + { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, +#else + { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, +#endif + { OP_NODATA, N_NO_GROUP_SEPARATOR, NULL, "no-group-separator", "suppress separators between groups of lines" }, + { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" }, + { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, + { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, + { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" }, + { OP_NODATA, 'P', NULL, "no-ucp", "do not enable UCP mode with Unicode" }, + { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, + { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, + { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" }, + { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" }, + { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" }, + { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" }, + { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" }, + { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" }, + { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, + { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" }, + { OP_NODATA, 'u', NULL, "utf", "use UTF/Unicode" }, + { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" }, + { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, + { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, + { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, + { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, + { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" }, + { OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" }, + { OP_NODATA, 0, NULL, NULL, NULL } +}; + +/* Table of names for newline types. Must be kept in step with the definitions +of PCRE2_NEWLINE_xx in pcre2.h. */ + +static const char *newlines[] = { + "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" }; + +/* UTF-8 tables */ + +const int utf8_table1[] = + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; +const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int); + +const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; + +const char utf8_table4[] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; + + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there are some non-Unix environments that lack both memmove() and +bcopy(). */ + +static void * +emulated_memmove(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#undef memmove +#define memmove(d,s,n) emulated_memmove(d,s,n) +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + + +/************************************************* +* Convert code point to UTF-8 * +*************************************************/ + +/* A static buffer is used. Returns the number of bytes. */ + +static int +ord2utf8(uint32_t value) +{ +int i, j; +uint8_t *utf8bytes = utf8_buffer; +for (i = 0; i < utf8_table1_size; i++) + if (value <= (uint32_t)utf8_table1[i]) break; +utf8bytes += i; +for (j = i; j > 0; j--) + { + *utf8bytes-- = 0x80 | (value & 0x3f); + value >>= 6; + } +*utf8bytes = utf8_table2[i] | value; +return i + 1; +} + + + +/************************************************* +* Case-independent string compare * +*************************************************/ + +static int +strcmpic(const char *str1, const char *str2) +{ +unsigned int c1, c2; +while (*str1 != '\0' || *str2 != '\0') + { + c1 = tolower(*str1++); + c2 = tolower(*str2++); + if (c1 != c2) return ((c1 > c2) << 1) - 1; + } +return 0; +} + + +/************************************************* +* Parse GREP_COLORS * +*************************************************/ + +/* Extract ms or mt from GREP_COLORS. + +Argument: the string, possibly NULL +Returns: the value of ms or mt, or NULL if neither present +*/ + +static char * +parse_grep_colors(const char *gc) +{ +static char seq[16]; +char *col; +uint32_t len; +if (gc == NULL) return NULL; +col = strstr(gc, "ms="); +if (col == NULL) col = strstr(gc, "mt="); +if (col == NULL) return NULL; +len = 0; +col += 3; +while (*col != ':' && *col != 0 && len < sizeof(seq)-1) + seq[len++] = *col++; +seq[len] = 0; +return seq; +} + + +/************************************************* +* Exit from the program * +*************************************************/ + +/* If there has been a resource error, give a suitable message. + +Argument: the return code +Returns: does not return +*/ + +static void +pcre2grep_exit(int rc) +{ +/* VMS does exit codes differently: both exit(1) and exit(0) return with a +status of 1, which is not helpful. To help with this problem, define a symbol +(akin to an environment variable) called "PCRE2GREP_RC" and put the exit code +therein. */ + +#ifdef __VMS + char val_buf[4]; + $DESCRIPTOR(sym_nam, "PCRE2GREP_RC"); + $DESCRIPTOR(sym_val, val_buf); + sprintf(val_buf, "%d", rc); + sym_val.dsc$w_length = strlen(val_buf); + lib$set_symbol(&sym_nam, &sym_val); +#endif + +if (resource_error) + { + fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource " + "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, + PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT); + fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); + } +exit(rc); +} + + +/************************************************* +* Add item to chain of patterns * +*************************************************/ + +/* Used to add an item onto a chain, or just return an unconnected item if the +"after" argument is NULL. + +Arguments: + s pattern string to add + patlen length of pattern + after if not NULL points to item to insert after + +Returns: new pattern block or NULL on error +*/ + +static patstr * +add_pattern(char *s, PCRE2_SIZE patlen, patstr *after) +{ +patstr *p = (patstr *)malloc(sizeof(patstr)); + +/* LCOV_EXCL_START - These won't be hit in normal testing. */ + +if (p == NULL) + { + fprintf(stderr, "pcre2grep: malloc failed\n"); + pcre2grep_exit(2); + } +if (patlen > MAXPATLEN) + { + fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", + MAXPATLEN); + free(p); + return NULL; + } + +/* LCOV_EXCL_STOP */ + +p->next = NULL; +p->string = s; +p->length = patlen; +p->compiled = NULL; + +if (after != NULL) + { + p->next = after->next; + after->next = p; + } +return p; +} + + +/************************************************* +* Free chain of patterns * +*************************************************/ + +/* Used for several chains of patterns. + +Argument: pointer to start of chain +Returns: nothing +*/ + +static void +free_pattern_chain(patstr *pc) +{ +while (pc != NULL) + { + patstr *p = pc; + pc = p->next; + if (p->compiled != NULL) pcre2_code_free(p->compiled); + free(p); + } +} + + +/************************************************* +* Free chain of file names * +*************************************************/ + +/* +Argument: pointer to start of chain +Returns: nothing +*/ + +static void +free_file_chain(fnstr *fn) +{ +while (fn != NULL) + { + fnstr *f = fn; + fn = f->next; + free(f); + } +} + + +/************************************************* +* OS-specific functions * +*************************************************/ + +/* These definitions are needed in all Windows environments, even those where +Unix-style directory scanning can be used (see below). */ + +#ifdef WIN32 + +#ifndef STRICT +# define STRICT +#endif +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include + +#define iswild(name) (strpbrk(name, "*?") != NULL) + +/* Convert ANSI BGR format to RGB used by Windows */ +#define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0)) + +static HANDLE hstdout; +static CONSOLE_SCREEN_BUFFER_INFO csbi; +static WORD match_colour; + +static WORD +decode_ANSI_colour(const char *cs) +{ +WORD result = csbi.wAttributes; +while (*cs) + { + if (isdigit((unsigned char)(*cs))) + { + int code = atoi(cs); + if (code == 1) result |= 0x08; + else if (code == 4) result |= 0x8000; + else if (code == 5) result |= 0x80; + else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30); + else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F); + else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4); + else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0); + /* aixterm high intensity colour codes */ + else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08; + else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80; + + while (isdigit((unsigned char)(*cs))) cs++; + } + if (*cs) cs++; + } +return result; +} + + +static void +init_colour_output() +{ +if (do_colour) + { + hstdout = GetStdHandle(STD_OUTPUT_HANDLE); + /* This fails when redirected to con; try again if so. */ + if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi) + { + HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL); + GetConsoleScreenBufferInfo(hcon, &csbi); + CloseHandle(hcon); + } + match_colour = decode_ANSI_colour(colour_string); + /* No valid colour found - turn off colouring */ + if (!match_colour) do_colour = FALSE; + } +} + +#endif /* WIN32 */ + + +/* The following sets of functions are defined so that they can be made system +specific. At present there are versions for Unix-style environments, Windows, +native z/OS, and "no support". */ + + +/************* Directory scanning Unix-style and z/OS ***********/ + +#if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS +#include +#include +#include + +#if defined NATIVE_ZOS +/************* Directory and PDS/E scanning for z/OS ***********/ +/************* z/OS looks mostly like Unix with USS ************/ +/* However, z/OS needs the #include statements in this header */ +#include "pcrzosfs.h" +/* That header is not included in the main PCRE distribution because + other apparatus is needed to compile pcre2grep for z/OS. The header + can be found in the special z/OS distribution, which is available + from www.zaconsultants.net or from www.cbttape.org. */ +#endif + +typedef DIR directory_type; +#define FILESEP '/' + +static int +isdirectory(char *filename) +{ +struct stat statbuf; +if (stat(filename, &statbuf) < 0) + return 0; /* In the expectation that opening as a file will fail */ +return S_ISDIR(statbuf.st_mode); +} + +static directory_type * +opendirectory(char *filename) +{ +return opendir(filename); +} + +static char * +readdirectory(directory_type *dir) +{ +for (;;) + { + struct dirent *dent = readdir(dir); + if (dent == NULL) return NULL; + if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) + return dent->d_name; + } +/* Control never reaches here */ +} + +static void +closedirectory(directory_type *dir) +{ +closedir(dir); +} + + +/************* Test for regular file, Unix-style **********/ + +static int +isregfile(char *filename) +{ +struct stat statbuf; +if (stat(filename, &statbuf) < 0) + return 1; /* In the expectation that opening as a file will fail */ +return S_ISREG(statbuf.st_mode); +} + + +#if defined NATIVE_ZOS +/************* Test for a terminal in z/OS **********/ +/* isatty() does not work in a TSO environment, so always give FALSE.*/ + +static BOOL +is_stdout_tty(void) +{ +return FALSE; +} + +static BOOL +is_file_tty(FILE *f) +{ +return FALSE; +} + + +/************* Test for a terminal, Unix-style **********/ + +#else +static BOOL +is_stdout_tty(void) +{ +return isatty(fileno(stdout)); +} + +static BOOL +is_file_tty(FILE *f) +{ +return isatty(fileno(f)); +} +#endif + + +/************* Print optionally coloured match Unix-style and z/OS **********/ + +static void +print_match(const void *buf, int length) +{ +if (length == 0) return; +if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); +FWRITE_IGNORE(buf, 1, length, stdout); +if (do_colour) fprintf(stdout, "%c[0m", 0x1b); +} + +/* End of Unix-style or native z/OS environment functions. */ + + +/************* Directory scanning in Windows ***********/ + +/* I (Philip Hazel) have no means of testing this code. It was contributed by +Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES +when it did not exist. David Byron added a patch that moved the #include of + to before the INVALID_FILE_ATTRIBUTES definition rather than after. +*/ + +#elif defined WIN32 + +#ifndef INVALID_FILE_ATTRIBUTES +#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF +#endif + +typedef struct directory_type +{ +HANDLE handle; +BOOL first; +WIN32_FIND_DATA data; +} directory_type; + +#define FILESEP '/' + +int +isdirectory(char *filename) +{ +DWORD attr = GetFileAttributes(filename); +if (attr == INVALID_FILE_ATTRIBUTES) + return 0; +return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; +} + +directory_type * +opendirectory(char *filename) +{ +size_t len; +char *pattern; +directory_type *dir; +DWORD err; +len = strlen(filename); +pattern = (char *)malloc(len + 3); +dir = (directory_type *)malloc(sizeof(*dir)); +if ((pattern == NULL) || (dir == NULL)) + { + fprintf(stderr, "pcre2grep: malloc failed\n"); + pcre2grep_exit(2); + } +memcpy(pattern, filename, len); +if (iswild(filename)) + pattern[len] = 0; +else + memcpy(&(pattern[len]), "\\*", 3); +dir->handle = FindFirstFile(pattern, &(dir->data)); +if (dir->handle != INVALID_HANDLE_VALUE) + { + free(pattern); + dir->first = TRUE; + return dir; + } +err = GetLastError(); +free(pattern); +free(dir); +errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; +return NULL; +} + +char * +readdirectory(directory_type *dir) +{ +for (;;) + { + if (!dir->first) + { + if (!FindNextFile(dir->handle, &(dir->data))) + return NULL; + } + else + { + dir->first = FALSE; + } + if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) + return dir->data.cFileName; + } +#ifndef _MSC_VER +return NULL; /* Keep compiler happy; never executed */ +#endif +} + +void +closedirectory(directory_type *dir) +{ +FindClose(dir->handle); +free(dir); +} + + +/************* Test for regular file in Windows **********/ + +/* I don't know how to do this, or if it can be done; assume all paths are +regular if they are not directories. */ + +int isregfile(char *filename) +{ +return !isdirectory(filename); +} + + +/************* Test for a terminal in Windows **********/ + +static BOOL +is_stdout_tty(void) +{ +return _isatty(_fileno(stdout)); +} + +static BOOL +is_file_tty(FILE *f) +{ +return _isatty(_fileno(f)); +} + + +/************* Print optionally coloured match in Windows **********/ + +static void +print_match(const void *buf, int length) +{ +if (length == 0) return; +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + else SetConsoleTextAttribute(hstdout, match_colour); + } +FWRITE_IGNORE(buf, 1, length, stdout); +if (do_colour) + { + if (do_ansi) fprintf(stdout, "%c[0m", 0x1b); + else SetConsoleTextAttribute(hstdout, csbi.wAttributes); + } +} + +/* End of Windows functions */ + + +/************* Directory scanning when we can't do it ***********/ + +/* The type is void, and apart from isdirectory(), the functions do nothing. */ + +#else + +#define FILESEP 0 +typedef void directory_type; + +int isdirectory(char *filename) { return 0; } +directory_type * opendirectory(char *filename) { return (directory_type*)0;} +char *readdirectory(directory_type *dir) { return (char*)0;} +void closedirectory(directory_type *dir) {} + + +/************* Test for regular file when we can't do it **********/ + +/* Assume all files are regular. */ + +int isregfile(char *filename) { return 1; } + + +/************* Test for a terminal when we can't do it **********/ + +static BOOL +is_stdout_tty(void) +{ +return FALSE; +} + +static BOOL +is_file_tty(FILE *f) +{ +return FALSE; +} + + +/************* Print optionally coloured match when we can't do it **********/ + +static void +print_match(const void *buf, int length) +{ +if (length == 0) return; +FWRITE_IGNORE(buf, 1, length, stdout); +} + +#endif /* End of system-specific functions */ + + + +#ifndef HAVE_STRERROR +/************************************************* +* Provide strerror() for non-ANSI libraries * +*************************************************/ + +/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() +in their libraries, but can provide the same facility by this simple +alternative function. */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +char * +strerror(int n) +{ +if (n < 0 || n >= sys_nerr) return "unknown error number"; +return sys_errlist[n]; +} +#endif /* HAVE_STRERROR */ + + + +/************************************************* +* Usage function * +*************************************************/ + +static int +usage(int rc) +{ +option_item *op; +fprintf(stderr, "Usage: pcre2grep [-"); +for (op = optionlist; op->one_char != 0; op++) + { + if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); + } +fprintf(stderr, "] [long options] [pattern] [files]\n"); +fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long " + "options.\n"); +return rc; +} + + + +/************************************************* +* Help function * +*************************************************/ + +static void +help(void) +{ +option_item *op; + +printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL); +printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); +printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK +printf("All callout scripts in patterns are supported." STDOUT_NL); +#else +printf("Non-fork callout scripts in patterns are supported." STDOUT_NL); +#endif +#else +printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); +#endif + +printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL); + +#ifdef SUPPORT_LIBZ +printf("Files whose names end in .gz are read using zlib." STDOUT_NL); +#endif + +#ifdef SUPPORT_LIBBZ2 +printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL); +#endif + +#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 +printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL); +#else +printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL); +#endif + +printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL); +printf("Options:" STDOUT_NL); + +for (op = optionlist; op->one_char != 0; op++) + { + int n; + char s[4]; + + if (op->one_char > 0 && (op->long_name)[0] == 0) + n = 31 - printf(" -%c", op->one_char); + else + { + if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); + else strcpy(s, " "); + n = 31 - printf(" %s --%s", s, op->long_name); + } + + if (n < 1) n = 1; + printf("%.*s%s" STDOUT_NL, n, " ", op->help_text); + } + +printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL); +printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE); +printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE); +printf("When reading patterns or file names from a file, trailing white" STDOUT_NL); +printf("space is removed and blank lines are ignored." STDOUT_NL); +printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN); + +printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL); +printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL); +} + + + +/************************************************* +* Test exclude/includes * +*************************************************/ + +/* If any exclude pattern matches, the path is excluded. Otherwise, unless +there are no includes, the path must match an include pattern. + +Arguments: + path the path to be matched + ip the chain of include patterns + ep the chain of exclude patterns + +Returns: TRUE if the path is not excluded +*/ + +static BOOL +test_incexc(char *path, patstr *ip, patstr *ep) +{ +int plen = strlen((const char *)path); + +for (; ep != NULL; ep = ep->next) + { + if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) + return FALSE; + } + +if (ip == NULL) return TRUE; + +for (; ip != NULL; ip = ip->next) + { + if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) + return TRUE; + } + +return FALSE; +} + + + +/************************************************* +* Decode integer argument value * +*************************************************/ + +/* Integer arguments can be followed by K or M. Avoid the use of strtoul() +because SunOS4 doesn't have it. This is used only for unpicking arguments, so +just keep it simple. + +Arguments: + option_data the option data string + op the option item (for error messages) + longop TRUE if option given in long form + +Returns: a long integer +*/ + +static long int +decode_number(char *option_data, option_item *op, BOOL longop) +{ +unsigned long int n = 0; +char *endptr = option_data; +while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; +while (isdigit((unsigned char)(*endptr))) + n = n * 10 + (int)(*endptr++ - '0'); +if (toupper(*endptr) == 'K') + { + n *= 1024; + endptr++; + } +else if (toupper(*endptr) == 'M') + { + n *= 1024*1024; + endptr++; + } + +if (*endptr != 0) /* Error */ + { + if (longop) + { + char *equals = strchr(op->long_name, '='); + int nlen = (equals == NULL)? (int)strlen(op->long_name) : + (int)(equals - op->long_name); + fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n", + option_data, nlen, op->long_name); + } + else + fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n", + option_data, op->one_char); + pcre2grep_exit(usage(2)); + } + +return n; +} + + + +/************************************************* +* Add item to a chain of numbers * +*************************************************/ + +/* Used to add an item onto a chain, or just return an unconnected item if the +"after" argument is NULL. + +Arguments: + n the number to add + after if not NULL points to item to insert after + +Returns: new number block +*/ + +static omstr * +add_number(int n, omstr *after) +{ +omstr *om = (omstr *)malloc(sizeof(omstr)); + +/* LCOV_EXCL_START - These lines won't be hit in normal testing. */ + +if (om == NULL) + { + fprintf(stderr, "pcre2grep: malloc failed\n"); + pcre2grep_exit(2); + } + +/* LCOV_EXCL_STOP */ + +om->next = NULL; +om->groupnum = n; + +if (after != NULL) + { + om->next = after->next; + after->next = om; + } +return om; +} + + + +/************************************************* +* Read one line of input * +*************************************************/ + +/* Normally, input that is to be scanned is read using fread() (or gzread, or +BZ2_read) into a large buffer, so many lines may be read at once. However, +doing this for tty input means that no output appears until a lot of input has +been typed. Instead, tty input is handled line by line. We cannot use fgets() +for this, because it does not stop at a binary zero, and therefore there is no +way of telling how many characters it has read, because there may be binary +zeros embedded in the data. This function is also used for reading patterns +from files (the -f option). + +Arguments: + buffer the buffer to read into + length the maximum number of characters to read + f the file + +Returns: the number of characters read, zero at end of file +*/ + +static PCRE2_SIZE +read_one_line(char *buffer, PCRE2_SIZE length, FILE *f) +{ +int c; +PCRE2_SIZE yield = 0; +while ((c = fgetc(f)) != EOF) + { + buffer[yield++] = c; + if (c == '\n' || yield >= length) break; + } +return yield; +} + + + +/************************************************* +* Find end of line * +*************************************************/ + +/* The length of the endline sequence that is found is set via lenptr. This may +be zero at the very end of the file if there is no line-ending sequence there. + +Arguments: + p current position in line + endptr end of available data + lenptr where to put the length of the eol sequence + +Returns: pointer after the last byte of the line, + including the newline byte(s) +*/ + +static char * +end_of_line(char *p, char *endptr, int *lenptr) +{ +switch(endlinetype) + { + default: /* Just in case */ + case PCRE2_NEWLINE_LF: + while (p < endptr && *p != '\n') p++; + if (p < endptr) + { + *lenptr = 1; + return p + 1; + } + *lenptr = 0; + return endptr; + + case PCRE2_NEWLINE_CR: + while (p < endptr && *p != '\r') p++; + if (p < endptr) + { + *lenptr = 1; + return p + 1; + } + *lenptr = 0; + return endptr; + + case PCRE2_NEWLINE_NUL: + while (p < endptr && *p != '\0') p++; + if (p < endptr) + { + *lenptr = 1; + return p + 1; + } + *lenptr = 0; + return endptr; + + case PCRE2_NEWLINE_CRLF: + for (;;) + { + while (p < endptr && *p != '\r') p++; + if (++p >= endptr) + { + *lenptr = 0; + return endptr; + } + if (*p == '\n') + { + *lenptr = 2; + return p + 1; + } + } + break; + + case PCRE2_NEWLINE_ANYCRLF: + while (p < endptr) + { + int extra = 0; + int c = *((unsigned char *)p); + + if (utf && c >= 0xc0) + { + int gcii, gcss; + extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + gcss = 6*extra; + c = (c & utf8_table3[extra]) << gcss; + for (gcii = 1; gcii <= extra; gcii++) + { + gcss -= 6; + c |= (p[gcii] & 0x3f) << gcss; + } + } + + p += 1 + extra; + + switch (c) + { + case '\n': + *lenptr = 1; + return p; + + case '\r': + if (p < endptr && *p == '\n') + { + *lenptr = 2; + p++; + } + else *lenptr = 1; + return p; + + default: + break; + } + } /* End of loop for ANYCRLF case */ + + *lenptr = 0; /* Must have hit the end */ + return endptr; + + case PCRE2_NEWLINE_ANY: + while (p < endptr) + { + int extra = 0; + int c = *((unsigned char *)p); + + if (utf && c >= 0xc0) + { + int gcii, gcss; + extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + gcss = 6*extra; + c = (c & utf8_table3[extra]) << gcss; + for (gcii = 1; gcii <= extra; gcii++) + { + gcss -= 6; + c |= (p[gcii] & 0x3f) << gcss; + } + } + + p += 1 + extra; + + switch (c) + { + case '\n': /* LF */ + case '\v': /* VT */ + case '\f': /* FF */ + *lenptr = 1; + return p; + + case '\r': /* CR */ + if (p < endptr && *p == '\n') + { + *lenptr = 2; + p++; + } + else *lenptr = 1; + return p; + +#ifndef EBCDIC + case 0x85: /* Unicode NEL */ + *lenptr = utf? 2 : 1; + return p; + + case 0x2028: /* Unicode LS */ + case 0x2029: /* Unicode PS */ + *lenptr = 3; + return p; +#endif /* Not EBCDIC */ + + default: + break; + } + } /* End of loop for ANY case */ + + *lenptr = 0; /* Must have hit the end */ + return endptr; + } /* End of overall switch */ +} + + + +/************************************************* +* Find start of previous line * +*************************************************/ + +/* This is called when looking back for before lines to print. + +Arguments: + p start of the subsequent line + startptr start of available data + +Returns: pointer to the start of the previous line +*/ + +static char * +previous_line(char *p, char *startptr) +{ +switch(endlinetype) + { + default: /* Just in case */ + case PCRE2_NEWLINE_LF: + p--; + while (p > startptr && p[-1] != '\n') p--; + return p; + + case PCRE2_NEWLINE_CR: + p--; + while (p > startptr && p[-1] != '\n') p--; + return p; + + case PCRE2_NEWLINE_NUL: + p--; + while (p > startptr && p[-1] != '\0') p--; + return p; + + case PCRE2_NEWLINE_CRLF: + for (;;) + { + p -= 2; + while (p > startptr && p[-1] != '\n') p--; + if (p <= startptr + 1 || p[-2] == '\r') return p; + } + /* Control can never get here */ + + case PCRE2_NEWLINE_ANY: + case PCRE2_NEWLINE_ANYCRLF: + if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; + if (utf) while ((*p & 0xc0) == 0x80) p--; + + while (p > startptr) + { + unsigned int c; + char *pp = p - 1; + + if (utf) + { + int extra = 0; + while ((*pp & 0xc0) == 0x80) pp--; + c = *((unsigned char *)pp); + if (c >= 0xc0) + { + int gcii, gcss; + extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ + gcss = 6*extra; + c = (c & utf8_table3[extra]) << gcss; + for (gcii = 1; gcii <= extra; gcii++) + { + gcss -= 6; + c |= (pp[gcii] & 0x3f) << gcss; + } + } + } + else c = *((unsigned char *)pp); + + if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c) + { + case '\n': /* LF */ + case '\r': /* CR */ + return p; + + default: + break; + } + + else switch (c) + { + case '\n': /* LF */ + case '\v': /* VT */ + case '\f': /* FF */ + case '\r': /* CR */ +#ifndef EBCDIC + case 0x85: /* Unicode NEL */ + case 0x2028: /* Unicode LS */ + case 0x2029: /* Unicode PS */ +#endif /* Not EBCDIC */ + return p; + + default: + break; + } + + p = pp; /* Back one character */ + } /* End of loop for ANY case */ + + return startptr; /* Hit start of data */ + } /* End of overall switch */ +} + + + +/************************************************* +* Output newline at end * +*************************************************/ + +/* This function is called if the final line of a file has been written to +stdout, but it does not have a terminating newline. + +Arguments: none +Returns: nothing +*/ + +static void +write_final_newline(void) +{ +switch(endlinetype) + { + default: /* Just in case */ + case PCRE2_NEWLINE_LF: + case PCRE2_NEWLINE_ANY: + case PCRE2_NEWLINE_ANYCRLF: + fprintf(stdout, "\n"); + break; + + case PCRE2_NEWLINE_CR: + fprintf(stdout, "\r"); + break; + + case PCRE2_NEWLINE_CRLF: + fprintf(stdout, "\r\n"); + break; + + case PCRE2_NEWLINE_NUL: + fprintf(stdout, "%c", 0); + break; + } +} + + +/************************************************* +* Print the previous "after" lines * +*************************************************/ + +/* This is called if we are about to lose said lines because of buffer filling, +and at the end of the file. The data in the line is written using fwrite() so +that a binary zero does not terminate it. + +Arguments: + lastmatchnumber the number of the last matching line, plus one + lastmatchrestart where we restarted after the last match + endptr end of available data + printname filename for printing + +Returns: nothing +*/ + +static void +do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, + char *endptr, const char *printname) +{ +if (after_context > 0 && lastmatchnumber > 0) + { + int count = 0; + int ellength = 0; + while (lastmatchrestart < endptr && count < after_context) + { + char *pp = end_of_line(lastmatchrestart, endptr, &ellength); + if (ellength == 0 && pp == main_buffer + bufsize) break; + if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen); + if (number) fprintf(stdout, "%lu-", lastmatchnumber++); + FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); + lastmatchrestart = pp; + count++; + } + + /* If we have printed any lines, arrange for a hyphen separator if anything + else follows. Also, if the last line is the final line in the file and it had + no newline, add one. */ + + if (count > 0) + { + hyphenpending = TRUE; + if (ellength == 0 && lastmatchrestart >= endptr) + write_final_newline(); + } + } +} + + + +/************************************************* +* Apply patterns to subject till one matches * +*************************************************/ + +/* This function is called to run through all the patterns, looking for a +match. When all possible matches are required, for example, for colouring, it +checks all patterns for matching, and returns the earliest match. Otherwise, it +returns the first pattern that has matched. + +Arguments: + matchptr the start of the subject + length the length of the subject to match + options options for pcre2_match + startoffset where to start matching + mrc address of where to put the result of pcre2_match() + +Returns: TRUE if there was a match, match_data and offsets are set + FALSE if there was no match (but no errors) + invert if there was a non-fatal error +*/ + +static BOOL +match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options, + PCRE2_SIZE startoffset, int *mrc) +{ +PCRE2_SIZE slen = length; +int first = -1; +int firstrc = 0; +patstr *p = patterns; +const char *msg = "this text:\n\n"; + +if (slen > 200) + { + slen = 200; + msg = "text that starts:\n\n"; + } + +for (int i = 1; p != NULL; p = p->next, i++) + { + int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length, + startoffset, options, match_data, match_context); + if (rc == PCRE2_ERROR_NOMATCH) continue; + + /* Handle a successful match. When all_matches is false, we are done. + Otherwise we must save the earliest match. */ + + if (rc >= 0) + { + if (!all_matches) + { + *mrc = rc; + return TRUE; + } + + if (first < 0 || offsets[0] < offsets_pair[first][0] || + (offsets[0] == offsets_pair[first][0] && + offsets[1] > offsets_pair[first][1])) + { + first = match_data_toggle; + firstrc = rc; + match_data_toggle ^= 1; + match_data = match_data_pair[match_data_toggle]; + offsets = offsets_pair[match_data_toggle]; + } + continue; + } + + /* Deal with PCRE2 error. */ + + fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc); + if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i); + fprintf(stderr, "%s", msg); + FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */ + fprintf(stderr, "\n\n"); + if (rc <= PCRE2_ERROR_UTF8_ERR1 && + rc >= PCRE2_ERROR_UTF8_ERR21) + { + unsigned char mbuffer[256]; + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer)); + fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar); + } + if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT || + rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT) + resource_error = TRUE; + if (error_count++ > 20) + { + fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n"); + pcre2grep_exit(2); + } + return invert; /* No more matching; don't show the line again */ + } + +/* We get here when all patterns have been tried. If all_matches is false, +this means that none of them matched. If all_matches is true, matched_first +will be non-NULL if there was at least one match, and it will point to the +appropriate match_data block. */ + +if (!all_matches || first < 0) return FALSE; + +match_data_toggle = first; +match_data = match_data_pair[first]; +offsets = offsets_pair[first]; +*mrc = firstrc; +return TRUE; +} + + + +/************************************************* +* Decode dollar escape sequence * +*************************************************/ + +/* Called from various places to decode $ escapes in output strings. The escape +sequences are as follows: + +$ or ${} returns a capture number. However, if callout is TRUE, +zero is never returned; '0' is substituted. + +$a returns bell. +$b returns backspace. +$e returns escape. +$f returns form feed. +$n returns newline. +$r returns carriage return. +$t returns tab. +$v returns vertical tab. +$o returns the character represented by the given octal + number; up to three digits are processed. +$o{} does the same, up to 7 digits, but gives an error for mode-invalid + code points. +$x returns the character represented by the given hexadecimal + number; up to two digits are processed. +$x{= '0' && *string <= '9'); + string--; /* Point to last digit */ + + /* In a callout, capture number 0 is not available. No error can be given, + so just return the character '0'. */ + + if (callout && c == 0) + { + *value = '0'; + } + else + { + *value = c; + rc = DDE_CAPTURE; + } + break; + + /* Limit octal numbers to 3 digits without braces, or up to 7 with braces, + for valid Unicode code points. */ + + case 'o': + base = 8; + string++; + if (*string == '{') + { + brace = TRUE; + string++; + dcount = 7; + } + else dcount = 3; + for (; dcount > 0; dcount--) + { + if (*string < '0' || *string > '7') break; + c = c * 8 + (*string++ - '0'); + } + *value = c; + string--; /* Point to last digit */ + break; + + /* Limit hex numbers to 2 digits without braces, or up to 6 with braces, + for valid Unicode code points. */ + + case 'x': + base = 16; + string++; + if (*string == '{') + { + brace = TRUE; + string++; + dcount = 6; + } + else dcount = 2; + for (; dcount > 0; dcount--) + { + if (!isxdigit(*string)) break; + if (*string >= '0' && *string <= '9') + c = c *16 + *string++ - '0'; + else + c = c * 16 + (*string++ | 0x20) - 'a' + 10; + } + *value = c; + string--; /* Point to last digit */ + break; + + case 'a': *value = '\a'; break; + case 'b': *value = '\b'; break; +#ifndef EBCDIC + case 'e': *value = '\033'; break; +#else + case 'e': *value = '\047'; break; +#endif + case 'f': *value = '\f'; break; + case 'n': *value = STDOUT_NL_CODE; break; + case 'r': *value = '\r'; break; + case 't': *value = '\t'; break; + case 'v': *value = '\v'; break; + + default: *value = *string; break; + } + +if (brace) + { + c = string[1]; + if (c != '}') + { + rc = DDE_ERROR; + if (!callout) + { + if ((base == 8 && c >= '0' && c <= '7') || + (base == 16 && isxdigit(c))) + { + fprintf(stderr, "pcre2grep: Error in output text at offset %d: " + "too many %s digits\n", (int)(string - begin), + (base == 8)? "octal" : "hex"); + } + else + { + fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", + (int)(string - begin), "missing closing brace"); + } + } + } + else string++; + } + +/* Check maximum code point values, but take note of STDOUT_NL_CODE. */ + +if (rc == DDE_CHAR && *value != STDOUT_NL_CODE) + { + uint32_t max = utf? 0x0010ffffu : 0xffu; + if (*value > max) + { + if (!callout) + fprintf(stderr, "pcre2grep: Error in output text at offset %d: " + "code point greater than 0x%x is invalid\n", (int)(string - begin), max); + rc = DDE_ERROR; + } + } + +*last = string; +return rc; +} + + + +/************************************************* +* Check output text for errors * +*************************************************/ + +/* Called early, to get errors before doing anything for -O text; also called +from callouts to check before outputting. + +Arguments: + string an --output text string + callout TRUE if in a callout (stops printing errors) + +Returns: TRUE if OK, FALSE on error +*/ + +static BOOL +syntax_check_output_text(PCRE2_SPTR string, BOOL callout) +{ +uint32_t value; +PCRE2_SPTR begin = string; + +for (; *string != 0; string++) + { + if (*string == '$' && + decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR) + return FALSE; + } + +return TRUE; +} + + +/************************************************* +* Display output text * +*************************************************/ + +/* Display the output text, which is assumed to have already been syntax +checked. Output may contain escape sequences started by the dollar sign. + +Arguments: + string: the output text + callout: TRUE for the builtin callout, FALSE for --output + subject the start of the subject + ovector: capture offsets + capture_top: number of captures + +Returns: TRUE if something was output, other than newline + FALSE if nothing was output, or newline was last output +*/ + +static BOOL +display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject, + PCRE2_SIZE *ovector, PCRE2_SIZE capture_top) +{ +uint32_t value; +BOOL printed = FALSE; +PCRE2_SPTR begin = string; + +for (; *string != 0; string++) + { + if (*string == '$') + { + switch(decode_dollar_escape(begin, string, callout, &value, &string)) + { + case DDE_CHAR: + if (value == STDOUT_NL_CODE) + { + fprintf(stdout, STDOUT_NL); + printed = FALSE; + continue; + } + break; /* Will print value */ + + case DDE_CAPTURE: + if (value < capture_top) + { + PCRE2_SIZE capturesize; + value *= 2; + capturesize = ovector[value + 1] - ovector[value]; + if (capturesize > 0) + { + print_match(subject + ovector[value], capturesize); + printed = TRUE; + } + } + continue; + + /* LCOV_EXCL_START */ + default: /* Should not occur */ + break; + /* LCOV_EXCL_STOP */ + } + } + + else value = *string; /* Not a $ escape */ + + if (!utf || value <= 127) fprintf(stdout, "%c", value); else + { + int n = ord2utf8(value); + for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout); + } + + printed = TRUE; + } + +return printed; +} + + +#ifdef SUPPORT_PCRE2GREP_CALLOUT + +/************************************************* +* Parse and execute callout scripts * +*************************************************/ + +/* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout +string block and executes the program specified by the string. The string is a +list of substrings separated by pipe characters. The first substring represents +the executable name, and the following substrings specify the arguments: + + program_name|param1|param2|... + +Any substring (including the program name) can contain escape sequences +started by the dollar character. The escape sequences are substituted as +follows: + + $ or ${} is replaced by the captured substring of the given + decimal number, which must be greater than zero. If the number is greater + than the number of capturing substrings, or if the capture is unset, the + replacement is empty. + + Any other character is substituted by itself. E.g: $$ is replaced by a single + dollar or $| replaced by a pipe character. + +Alternatively, if string starts with pipe, the remainder is taken as an output +string, same as --output. This is the only form that is supported if +SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to +separate each callout, defaulting to newline. + +Example: + + echo -e "abcde\n12345" | pcre2grep \ + '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + +Arguments: + blockptr the callout block + +Returns: currently it always returns with 0 +*/ + +static int +pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused) +{ +PCRE2_SIZE length = calloutptr->callout_string_length; +PCRE2_SPTR string = calloutptr->callout_string; +PCRE2_SPTR subject = calloutptr->subject; +PCRE2_SIZE *ovector = calloutptr->offset_vector; +PCRE2_SIZE capture_top = calloutptr->capture_top; + +#ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK +PCRE2_SIZE argsvectorlen = 2; +PCRE2_SIZE argslen = 1; +char *args; +char *argsptr; +char **argsvector; +char **argsvectorptr; +#ifndef WIN32 +pid_t pid; +#endif +int result = 0; +#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */ + +(void)unused; /* Avoid compiler warning */ + +/* Only callouts with strings are supported. */ + +if (string == NULL || length == 0) return 0; + +/* If there's no command, output the remainder directly. */ + +if (*string == '|') + { + string++; + if (!syntax_check_output_text(string, TRUE)) return 0; + (void)display_output_text(string, TRUE, subject, ovector, capture_top); + return 0; + } + +#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK +return 0; +#else + +/* Checking syntax and compute the number of string fragments. Callout strings +are silently ignored in the event of a syntax error. */ + +while (length > 0) + { + if (*string == '|') + { + argsvectorlen++; + if (argsvectorlen > 10000) return 0; /* Too many args */ + } + + else if (*string == '$') + { + uint32_t value; + PCRE2_SPTR begin = string; + + switch (decode_dollar_escape(begin, string, TRUE, &value, &string)) + { + case DDE_CAPTURE: + if (value < capture_top) + { + value *= 2; + argslen += ovector[value + 1] - ovector[value]; + } + argslen--; /* Negate the effect of argslen++ below. */ + break; + + case DDE_CHAR: + if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1; + else if (utf && value > 127) argslen += ord2utf8(value) - 1; + break; + + /* LCOV_EXCL_START */ + default: /* Should not occur */ + case DDE_ERROR: + return 0; + /* LCOV_EXCL_STOP */ + } + + length -= (string - begin); + } + + string++; + length--; + argslen++; + } + +/* Get memory for the argument vector and its strings. */ + +args = (char*)malloc(argslen); +if (args == NULL) return 0; + +argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); +if (argsvector == NULL) + { + /* LCOV_EXCL_START */ + free(args); + return 0; + /* LCOV_EXCL_STOP */ + } + +/* Now reprocess the string and set up the arguments. */ + +argsptr = args; +argsvectorptr = argsvector; +*argsvectorptr++ = argsptr; + +length = calloutptr->callout_string_length; +string = calloutptr->callout_string; + +while (length > 0) + { + if (*string == '|') + { + *argsptr++ = '\0'; + *argsvectorptr++ = argsptr; + } + + else if (*string == '$') + { + uint32_t value; + PCRE2_SPTR begin = string; + + switch (decode_dollar_escape(begin, string, TRUE, &value, &string)) + { + case DDE_CAPTURE: + if (value < capture_top) + { + PCRE2_SIZE capturesize; + value *= 2; + capturesize = ovector[value + 1] - ovector[value]; + memcpy(argsptr, subject + ovector[value], capturesize); + argsptr += capturesize; + } + break; + + case DDE_CHAR: + if (value == STDOUT_NL_CODE) + { + memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN); + argsptr += STDOUT_NL_LEN; + } + else if (utf && value > 127) + { + int n = ord2utf8(value); + memcpy(argsptr, utf8_buffer, n); + argsptr += n; + } + else + { + *argsptr++ = value; + } + break; + + /* LCOV_EXCL_START */ + default: /* Even though this should not occur, the string having */ + case DDE_ERROR: /* been checked above, we need to include the free() */ + free(args); /* calls so that source checkers do not complain. */ + free(argsvector); + return 0; + /* LCOV_EXCL_STOP */ + } + + length -= (string - begin); + } + + else *argsptr++ = *string; + + /* Advance along the string */ + + string++; + length--; + } + +*argsptr++ = '\0'; +*argsvectorptr = NULL; + +/* Running an external command is system-dependent. Handle Windows and VMS as +necessary, otherwise assume fork(). */ + +#ifdef WIN32 +result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); + +#elif defined __VMS + { + char cmdbuf[500]; + short i = 0; + int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat; + $DESCRIPTOR(cmd, cmdbuf); + + cmdbuf[0] = 0; + while (argsvector[i]) + { + strcat(cmdbuf, argsvector[i]); + strcat(cmdbuf, " "); + i++; + } + cmd.dsc$w_length = strlen(cmdbuf) - 1; + status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat); + if (!(status & 1)) result = 0; + else result = retstat & 1 ? 0 : 1; + } + +#else /* Neither Windows nor VMS */ +pid = fork(); +if (pid == 0) + { + (void)execv(argsvector[0], argsvector); + /* Control gets here if there is an error, e.g. a non-existent program */ + exit(1); + } +else if (pid > 0) + { + (void)fflush(stdout); + (void)waitpid(pid, &result, 0); + (void)fflush(stdout); + } +#endif /* End Windows/VMS/other handling */ + +free(args); +free(argsvector); + +/* Currently negative return values are not supported, only zero (match +continues) or non-zero (match fails). */ + +return result != 0; +#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */ +} +#endif /* SUPPORT_PCRE2GREP_CALLOUT */ + + + +/************************************************* +* Read a portion of the file into buffer * +*************************************************/ + +static PCRE2_SIZE +fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length, + BOOL input_line_buffered) +{ +(void)frtype; /* Avoid warning when not used */ + +#ifdef SUPPORT_LIBZ +if (frtype == FR_LIBZ) + return gzread((gzFile)handle, buffer, length); +else +#endif + +#ifdef SUPPORT_LIBBZ2 +if (frtype == FR_LIBBZ2) + return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length); +else +#endif + +return (input_line_buffered ? + read_one_line(buffer, length, (FILE *)handle) : + fread(buffer, 1, length, (FILE *)handle)); +} + + + +/************************************************* +* Grep an individual file * +*************************************************/ + +/* This is called from grep_or_recurse() below. It uses a buffer that is three +times the value of bufthird. The matching point is never allowed to stray into +the top third of the buffer, thus keeping more of the file available for +context printing or for multiline scanning. For large files, the pointer will +be in the middle third most of the time, so the bottom third is available for +"before" context printing. + +Arguments: + handle the fopened FILE stream for a normal file + the gzFile pointer when reading is via libz + the BZFILE pointer when reading is via libbz2 + frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 + filename the file name or NULL (for errors) + printname the file name if it is to be printed for each match + or NULL if the file name is not to be printed + it cannot be NULL if filenames[_nomatch]_only is set + +Returns: 0 if there was at least one match + 1 otherwise (no matches) + 2 if an overlong line is encountered + 3 if there is a read error on a .bz2 file +*/ + +static int +pcre2grep(void *handle, int frtype, const char *filename, const char *printname) +{ +int rc = 1; +int filepos = 0; +unsigned long int linenumber = 1; +unsigned long int lastmatchnumber = 0; +unsigned long int count = 0; +long int count_matched_lines = 0; +char *lastmatchrestart = main_buffer; +char *ptr = main_buffer; +char *endptr; +PCRE2_SIZE bufflength; +BOOL binary = FALSE; +BOOL endhyphenpending = FALSE; +BOOL lines_printed = FALSE; +BOOL input_line_buffered = line_buffered; +FILE *in = NULL; /* Ensure initialized */ +long stream_start = -1; /* Only non-negative if relevant */ + +/* Do the first read into the start of the buffer and set up the pointer to end +of what we have. In the case of libz, a non-zipped .gz file will be read as a +plain file. However, if a .bz2 file isn't actually bzipped, the first read will +fail. */ + +if (frtype != FR_LIBZ && frtype != FR_LIBBZ2) + { + in = (FILE *)handle; + if (feof(in)) return 1; + if (is_file_tty(in)) input_line_buffered = TRUE; + else + { + if (count_limit >= 0 && filename == stdin_name) + stream_start = ftell(in); + } + } +else input_line_buffered = FALSE; + +bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, + input_line_buffered); + +#ifdef SUPPORT_LIBBZ2 +if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3; /* Gotcha: bufflength is PCRE2_SIZE */ +#endif + +endptr = main_buffer + bufflength; + +/* Unless binary-files=text, see if we have a binary file. This uses the same +rule as GNU grep, namely, a search for a binary zero byte near the start of the +file. However, when the newline convention is binary zero, we can't do this. */ + +if (binary_files != BIN_TEXT) + { + if (endlinetype != PCRE2_NEWLINE_NUL) + binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) + != NULL; + if (binary && binary_files == BIN_NOMATCH) return 1; + } + +/* Loop while the current pointer is not at the end of the file. For large +files, endptr will be at the end of the buffer when we are in the middle of the +file, but ptr will never get there, because as soon as it gets over 2/3 of the +way, the buffer is shifted left and re-filled. */ + +while (ptr < endptr) + { + int endlinelength; + int mrc = 0; + unsigned int options = 0; + BOOL match; + BOOL line_matched = FALSE; + char *t = ptr; + PCRE2_SIZE length, linelength; + PCRE2_SIZE startoffset = 0; + + /* If the -m option set a limit for the number of matched or non-matched + lines, check it here. A limit of zero means that no matching is ever done. + For stdin from a file, set the file position. */ + + if (count_limit >= 0 && count_matched_lines >= count_limit) + { + if (stream_start >= 0) + (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET); + rc = (count_limit == 0)? 1 : 0; + break; + } + + /* At this point, ptr is at the start of a line. We need to find the length + of the subject string to pass to pcre2_match(). In multiline mode, it is the + length remainder of the data in the buffer. Otherwise, it is the length of + the next line, excluding the terminating newline. After matching, we always + advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE + option is used for compiling, so that any match is constrained to be in the + first line. */ + + t = end_of_line(t, endptr, &endlinelength); + linelength = t - ptr - endlinelength; + length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength; + + /* Check to see if the line we are looking at extends right to the very end + of the buffer without a line terminator. This means the line is too long to + handle at the current buffer size. Until the buffer reaches its maximum size, + try doubling it and reading more data. */ + + if (endlinelength == 0 && t == main_buffer + bufsize) + { + if (bufthird < max_bufthird) + { + char *new_buffer; + PCRE2_SIZE new_bufthird = 2*bufthird; + + if (new_bufthird > max_bufthird) new_bufthird = max_bufthird; + new_buffer = (char *)malloc(3*new_bufthird); + + if (new_buffer == NULL) + { + /* LCOV_EXCL_START */ + fprintf(stderr, + "pcre2grep: line %lu%s%s is too long for the internal buffer\n" + "pcre2grep: not enough memory to increase the buffer size to %" + SIZ_FORM "\n", + linenumber, + (filename == NULL)? "" : " of file ", + (filename == NULL)? "" : filename, + new_bufthird); + return 2; + /* LCOV_EXCL_STOP */ + } + + /* Copy the data and adjust pointers to the new buffer location. */ + + memcpy(new_buffer, main_buffer, bufsize); + bufthird = new_bufthird; + bufsize = 3*bufthird; + ptr = new_buffer + (ptr - main_buffer); + lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer); + free(main_buffer); + main_buffer = new_buffer; + + /* Read more data into the buffer and then try to find the line ending + again. */ + + bufflength += fill_buffer(handle, frtype, main_buffer + bufflength, + bufsize - bufflength, input_line_buffered); + endptr = main_buffer + bufflength; + continue; + } + else + { + fprintf(stderr, + "pcre2grep: line %lu%s%s is too long for the internal buffer\n" + "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n" + "pcre2grep: use the --max-buffer-size option to change it\n", + linenumber, + (filename == NULL)? "" : " of file ", + (filename == NULL)? "" : filename, + bufthird); + return 2; + } + } + + /* We come back here after a match when only_matching_count is non-zero, in + order to find any further matches in the same line. This applies to + --only-matching, --file-offsets, and --line-offsets. */ + + ONLY_MATCHING_RESTART: + + /* Run through all the patterns until one matches or there is an error other + than NOMATCH. This code is in a subroutine so that it can be re-used for + finding subsequent matches when colouring matched lines. After finding one + match, set PCRE2_NOTEMPTY to disable any further matches of null strings in + this line. */ + + match = match_patterns(ptr, length, options, startoffset, &mrc); + options = PCRE2_NOTEMPTY; + + /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use + only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its + return code - to output data lines, so that binary zeroes are treated as just + another data character. */ + + if (match != invert) + { + BOOL hyphenprinted = FALSE; + + /* We've failed if we want a file that doesn't have any matches. */ + + if (filenames == FN_NOMATCH_ONLY) return 1; + + /* Remember that this line matched (for counting matched lines) */ + + line_matched = TRUE; + + /* If all we want is a yes/no answer, we can return immediately. */ + + if (quiet) return 0; + + /* Just count if just counting is wanted. */ + + else if (count_only || show_total_count) count++; + + /* When handling a binary file and binary-files==binary, the "binary" + variable will be set true (it's false in all other cases). In this + situation we just want to output the file name. No need to scan further. */ + + else if (binary) + { + fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename); + return 0; + } + + /* Likewise, if all we want is a file name, there is no need to scan any + more lines in the file. */ + + else if (filenames == FN_MATCH_ONLY) + { + fprintf(stdout, "%s", printname); + if (printname_nl == NULL) fprintf(stdout, "%c", 0); + else fprintf(stdout, "%s", printname_nl); + return 0; + } + + /* The --only-matching option prints just the substring that matched, + and/or one or more captured portions of it, as long as these strings are + not empty. The --file-offsets and --line-offsets options output offsets for + the matching substring (all three set only_matching_count non-zero). None + of these mutually exclusive options prints any context. Afterwards, adjust + the start and then jump back to look for further matches in the same line. + If we are in invert mode, however, nothing is printed and we do not restart + - this could still be useful because the return code is set. */ + + else if (only_matching_count != 0) + { + if (!invert) + { + PCRE2_SIZE oldstartoffset; + + if (printname != NULL) fprintf(stdout, "%s%c", printname, + printname_colon); + if (number) fprintf(stdout, "%lu:", linenumber); + + /* Handle --line-offsets */ + + if (line_offsets) + fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr), + (int)(offsets[1] - offsets[0])); + + /* Handle --file-offsets */ + + else if (file_offsets) + fprintf(stdout, "%d,%d" STDOUT_NL, + (int)(filepos + ptr + offsets[0] - ptr), + (int)(offsets[1] - offsets[0])); + + /* Handle --output (which has already been syntax checked) */ + + else if (output_text != NULL) + { + (void)display_output_text((PCRE2_SPTR)output_text, FALSE, + (PCRE2_SPTR)ptr, offsets, mrc); + fprintf(stdout, STDOUT_NL); + } + + /* Handle --only-matching, which may occur many times */ + + else + { + BOOL printed = FALSE; + omstr *om; + + for (om = only_matching; om != NULL; om = om->next) + { + int n = om->groupnum; + if (n == 0 || n < mrc) + { + int plen = offsets[2*n + 1] - offsets[2*n]; + if (plen > 0) + { + if (printed && om_separator != NULL) + fprintf(stdout, "%s", om_separator); + print_match(ptr + offsets[n*2], plen); + printed = TRUE; + } + } + } + if (printed || printname != NULL || number) + fprintf(stdout, STDOUT_NL); + } + + /* Prepare to repeat to find the next match in the line. */ + + //match = FALSE; + if (line_buffered) fflush(stdout); + rc = 0; /* Had some success */ + + /* If the pattern contained a lookbehind that included \K, it is + possible that the end of the match might be at or before the actual + starting offset we have just used. In this case, start one character + further on. */ + + startoffset = offsets[1]; /* Restart after the match */ + oldstartoffset = pcre2_get_startchar(match_data); + if (startoffset <= oldstartoffset) + { + if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */ + startoffset = oldstartoffset + 1; + if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++; + } + + /* If the current match ended past the end of the line (only possible + in multiline mode), we must move on to the line in which it did end + before searching for more matches. */ + + while (startoffset > linelength) + { + ptr += linelength + endlinelength; + filepos += (int)(linelength + endlinelength); + linenumber++; + startoffset -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; + length = (PCRE2_SIZE)(endptr - ptr); + } + + goto ONLY_MATCHING_RESTART; + } + } + + /* This is the default case when none of the above options is set. We print + the matching lines(s), possibly preceded and/or followed by other lines of + context. */ + + else + { + lines_printed = TRUE; + + /* See if there is a requirement to print some "after" lines from a + previous match. We never print any overlaps. */ + + if (after_context > 0 && lastmatchnumber > 0) + { + int ellength; + int linecount = 0; + char *p = lastmatchrestart; + + while (p < ptr && linecount < after_context) + { + p = end_of_line(p, ptr, &ellength); + linecount++; + } + + /* It is important to advance lastmatchrestart during this printing so + that it interacts correctly with any "before" printing below. Print + each line's data using fwrite() in case there are binary zeroes. */ + + while (lastmatchrestart < p) + { + char *pp = lastmatchrestart; + if (printname != NULL) fprintf(stdout, "%s%c", printname, + printname_hyphen); + if (number) fprintf(stdout, "%lu-", lastmatchnumber++); + pp = end_of_line(pp, endptr, &ellength); + FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); + lastmatchrestart = pp; + } + if (lastmatchrestart != ptr) hyphenpending = TRUE; + } + + /* If there were non-contiguous lines printed above, insert hyphens. */ + + if (hyphenpending) + { + if (group_separator != NULL) + fprintf(stdout, "%s%s", group_separator, STDOUT_NL); + hyphenpending = FALSE; + hyphenprinted = TRUE; + } + + /* See if there is a requirement to print some "before" lines for this + match. Again, don't print overlaps. */ + + if (before_context > 0) + { + int linecount = 0; + char *p = ptr; + + while (p > main_buffer && + (lastmatchnumber == 0 || p > lastmatchrestart) && + linecount < before_context) + { + linecount++; + p = previous_line(p, main_buffer); + } + + if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted && + group_separator != NULL) + fprintf(stdout, "%s%s", group_separator, STDOUT_NL); + + while (p < ptr) + { + int ellength; + char *pp = p; + if (printname != NULL) fprintf(stdout, "%s%c", printname, + printname_hyphen); + if (number) fprintf(stdout, "%lu-", linenumber - linecount--); + pp = end_of_line(pp, endptr, &ellength); + FWRITE_IGNORE(p, 1, pp - p, stdout); + p = pp; + } + } + + /* Now print the matching line(s); ensure we set hyphenpending at the end + of the file if any context lines are being output. */ + + if (after_context > 0 || before_context > 0) + endhyphenpending = TRUE; + + if (printname != NULL) fprintf(stdout, "%s%c", printname, + printname_colon); + if (number) fprintf(stdout, "%lu:", linenumber); + + /* In multiline mode, or if colouring, we have to split the line(s) up + and search for further matches, but not of course if the line is a + non-match. In multiline mode this is necessary in case there is another + match that spans the end of the current line. When colouring we want to + colour all matches. */ + + if ((multiline || do_colour) && !invert) + { + int plength; + PCRE2_SIZE endprevious; + + /* The use of \K may make the end offset earlier than the start. In + this situation, swap them round. */ + + if (offsets[0] > offsets[1]) + { + PCRE2_SIZE temp = offsets[0]; + offsets[0] = offsets[1]; + offsets[1] = temp; + } + + FWRITE_IGNORE(ptr, 1, offsets[0], stdout); + print_match(ptr + offsets[0], offsets[1] - offsets[0]); + + for (;;) + { + PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data); + + endprevious = offsets[1]; + startoffset = endprevious; /* Advance after previous match. */ + + /* If the pattern contained a lookbehind that included \K, it is + possible that the end of the match might be at or before the actual + starting offset we have just used. In this case, start one character + further on. */ + + if (startoffset <= oldstartoffset) + { + startoffset = oldstartoffset + 1; + if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++; + } + + /* If the current match ended past the end of the line (only possible + in multiline mode), we must move on to the line in which it did end + before searching for more matches. Because the PCRE2_FIRSTLINE option + is set, the start of the match will always be before the first + newline sequence. */ + + while (startoffset > linelength + endlinelength) + { + ptr += linelength + endlinelength; + filepos += (int)(linelength + endlinelength); + linenumber++; + startoffset -= (int)(linelength + endlinelength); + endprevious -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; + length = (PCRE2_SIZE)(endptr - ptr); + } + + /* If startoffset is at the exact end of the line it means this + complete line was the final part of the match, so there is nothing + more to do. */ + + if (startoffset == linelength + endlinelength) break; + + /* Otherwise, run a match from within the final line, and if found, + loop for any that may follow. */ + + if (!match_patterns(ptr, length, options, startoffset, &mrc)) break; + + /* The use of \K may make the end offset earlier than the start. In + this situation, swap them round. */ + + if (offsets[0] > offsets[1]) + { + PCRE2_SIZE temp = offsets[0]; + offsets[0] = offsets[1]; + offsets[1] = temp; + } + + FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout); + print_match(ptr + offsets[0], offsets[1] - offsets[0]); + } + + /* In multiline mode, we may have already printed the complete line + and its line-ending characters (if they matched the pattern), so there + may be no more to print. */ + + plength = (int)((linelength + endlinelength) - endprevious); + if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout); + } + + /* Not colouring or multiline; no need to search for further matches. */ + + else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout); + } + + /* End of doing what has to be done for a match. If --line-buffered was + given, flush the output. */ + + if (line_buffered) fflush(stdout); + rc = 0; /* Had some success */ + + /* Remember where the last match happened for after_context. We remember + where we are about to restart, and that line's number. */ + + lastmatchrestart = ptr + linelength + endlinelength; + lastmatchnumber = linenumber + 1; + + /* If a line was printed and we are now at the end of the file and the last + line had no newline, output one. */ + + if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0) + write_final_newline(); + } + + /* For a match in multiline inverted mode (which of course did not cause + anything to be printed), we have to move on to the end of the match before + proceeding. */ + + if (multiline && invert && match) + { + int ellength; + char *endmatch = ptr + offsets[1]; + t = ptr; + while (t < endmatch) + { + t = end_of_line(t, endptr, &ellength); + if (t <= endmatch) linenumber++; else break; + } + endmatch = end_of_line(endmatch, endptr, &ellength); + linelength = endmatch - ptr - ellength; + } + + /* Advance to after the newline and increment the line number. The file + offset to the current line is maintained in filepos. */ + + END_ONE_MATCH: + ptr += linelength + endlinelength; + filepos += (int)(linelength + endlinelength); + linenumber++; + + /* If there was at least one match (or a non-match, as required) in the line, + increment the count for the -m option. */ + + if (line_matched) count_matched_lines++; + + /* If input is line buffered, and the buffer is not yet full, read another + line and add it into the buffer. */ + + if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) + { + PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in); + bufflength += add; + endptr += add; + } + + /* If we haven't yet reached the end of the file (the buffer is full), and + the current point is in the top 1/3 of the buffer, slide the buffer down by + 1/3 and refill it. Before we do this, if some unprinted "after" lines are + about to be lost, print them. */ + + if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird) + { + if (after_context > 0 && + lastmatchnumber > 0 && + lastmatchrestart < main_buffer + bufthird) + { + do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); + lastmatchnumber = 0; /* Indicates no after lines pending */ + } + + /* Now do the shuffle */ + + (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird); + ptr -= bufthird; + + bufflength = 2*bufthird + fill_buffer(handle, frtype, + main_buffer + 2*bufthird, bufthird, input_line_buffered); + endptr = main_buffer + bufflength; + + /* Adjust any last match point */ + + if (lastmatchnumber > 0) lastmatchrestart -= bufthird; + } + } /* Loop through the whole file */ + +/* End of file; print final "after" lines if wanted; do_after_lines sets +hyphenpending if it prints something. */ + +if (only_matching_count == 0 && !(count_only|show_total_count)) + { + do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); + hyphenpending |= endhyphenpending; + } + +/* Print the file name if we are looking for those without matches and there +were none. If we found a match, we won't have got this far. */ + +if (filenames == FN_NOMATCH_ONLY) + { + fprintf(stdout, "%s", printname); + if (printname_nl == NULL) fprintf(stdout, "%c", 0); + else fprintf(stdout, "%s", printname_nl); + return 0; + } + +/* Print the match count if wanted */ + +if (count_only && !quiet) + { + if (count > 0 || !omit_zero_count) + { + if (printname != NULL && filenames != FN_NONE) + fprintf(stdout, "%s%c", printname, printname_colon); + fprintf(stdout, "%lu" STDOUT_NL, count); + counts_printed++; + } + } + +total_count += count; /* Can be set without count_only */ +return rc; +} + + + +/************************************************* +* Grep a file or recurse into a directory * +*************************************************/ + +/* Given a path name, if it's a directory, scan all the files if we are +recursing; if it's a file, grep it. + +Arguments: + pathname the path to investigate + dir_recurse TRUE if recursing is wanted (-r or -drecurse) + only_one_at_top TRUE if the path is the only one at toplevel + +Returns: -1 the file/directory was skipped + 0 if there was at least one match + 1 if there were no matches + 2 there was some kind of error + +However, file opening failures are suppressed if "silent" is set. +*/ + +static int +grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) +{ +int rc = 1; +int frtype; +void *handle; +char *lastcomp; +FILE *in = NULL; /* Ensure initialized */ + +#ifdef SUPPORT_LIBZ +gzFile ingz = NULL; +#endif + +#ifdef SUPPORT_LIBBZ2 +BZFILE *inbz2 = NULL; +#endif + +#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 +int pathlen; +#endif + +#if defined NATIVE_ZOS +int zos_type; +FILE *zos_test_file; +#endif + +/* If the file name is "-" we scan stdin */ + +if (strcmp(pathname, "-") == 0) + { + if (count_limit >= 0) setbuf(stdin, NULL); + return pcre2grep(stdin, FR_PLAIN, stdin_name, + (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? + stdin_name : NULL); + } + +/* Inclusion and exclusion: --include-dir and --exclude-dir apply only to +directories, whereas --include and --exclude apply to everything else. The test +is against the final component of the path. */ + +lastcomp = strrchr(pathname, FILESEP); +lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1; + +/* If the file is a directory, skip if not recursing or if explicitly excluded. +Otherwise, scan the directory and recurse for each path within it. The scanning +code is localized so it can be made system-specific. */ + + +/* For z/OS, determine the file type. */ + +#if defined NATIVE_ZOS +zos_test_file = fopen(pathname,"rb"); + +if (zos_test_file == NULL) + { + if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n", + pathname, strerror(errno)); + return -1; + } +zos_type = identifyzosfiletype (zos_test_file); +fclose (zos_test_file); + +/* Handle a PDS in separate code */ + +if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE) + { + return travelonpdsdir (pathname, only_one_at_top); + } + +/* Deal with regular files in the normal way below. These types are: + zos_type == __ZOS_PDS_MEMBER + zos_type == __ZOS_PS + zos_type == __ZOS_VSAM_KSDS + zos_type == __ZOS_VSAM_ESDS + zos_type == __ZOS_VSAM_RRDS +*/ + +/* Handle a z/OS directory using common code. */ + +else if (zos_type == __ZOS_HFS) + { +#endif /* NATIVE_ZOS */ + + +/* Handle directories: common code for all OS */ + +if (isdirectory(pathname)) + { + if (dee_action == dee_SKIP || + !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns)) + return -1; + + if (dee_action == dee_RECURSE) + { + char childpath[FNBUFSIZ]; + char *nextfile; + directory_type *dir = opendirectory(pathname); + + if (dir == NULL) + { + /* LCOV_EXCL_START - this is a "never" event */ + if (!silent) + fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname, + strerror(errno)); + return 2; + /* LCOV_EXCL_STOP */ + } + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + int fnlength = strlen(pathname) + strlen(nextfile) + 2; + if (fnlength > FNBUFSIZ) + { + /* LCOV_EXCL_START - this is a "never" event */ + fprintf(stderr, "pcre2grep: recursive filename is too long\n"); + rc = 2; + break; + /* LCOV_EXCL_STOP */ + } + sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile); + + /* If the realpath() function is available, we can try to prevent endless + recursion caused by a symlink pointing to a parent directory (GitHub + issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann. + Modified to avoid using strlcat() because that isn't a standard C + function, and also modified not to copy back the fully resolved path, + because that affects the output from pcre2grep. */ + +#ifdef HAVE_REALPATH + { + char resolvedpath[PATH_MAX]; + BOOL isSame; + size_t rlen; + if (realpath(childpath, resolvedpath) == NULL) + /* LCOV_EXCL_START - this is a "never" event */ + continue; /* This path is invalid - we can skip processing this */ + /* LCOV_EXCL_STOP */ + isSame = strcmp(pathname, resolvedpath) == 0; + if (isSame) continue; /* We have a recursion */ + rlen = strlen(resolvedpath); + if (rlen++ < sizeof(resolvedpath) - 3) + { + BOOL contained; + strcat(resolvedpath, "/"); + contained = strncmp(pathname, resolvedpath, rlen) == 0; + if (contained) continue; /* We have a recursion */ + } + } +#endif /* HAVE_REALPATH */ + + frc = grep_or_recurse(childpath, dir_recurse, FALSE); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } + } + +#ifdef WIN32 +if (iswild(pathname)) + { + char buffer[1024]; + char *nextfile; + char *name; + directory_type *dir = opendirectory(pathname); + + if (dir == NULL) + return 0; + + for (nextfile = name = pathname; *nextfile != 0; nextfile++) + if (*nextfile == '/' || *nextfile == '\\') + name = nextfile + 1; + *name = 0; + + while ((nextfile = readdirectory(dir)) != NULL) + { + int frc; + sprintf(buffer, "%.512s%.128s", pathname, nextfile); + frc = grep_or_recurse(buffer, dir_recurse, FALSE); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + + closedirectory(dir); + return rc; + } +#endif + +#if defined NATIVE_ZOS + } +#endif + +/* If the file is not a directory, check for a regular file, and if it is not, +skip it if that's been requested. Otherwise, check for an explicit inclusion or +exclusion. */ + +else if ( +#if defined NATIVE_ZOS + (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) || +#else /* all other OS */ + (!isregfile(pathname) && DEE_action == DEE_SKIP) || +#endif + !test_incexc(lastcomp, include_patterns, exclude_patterns)) + return -1; /* File skipped */ + +/* Control reaches here if we have a regular file, or if we have a directory +and recursion or skipping was not requested, or if we have anything else and +skipping was not requested. The scan proceeds. If this is the first and only +argument at top level, we don't show the file name, unless we are only showing +the file name, or the filename was forced (-H). */ + +#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 +pathlen = (int)(strlen(pathname)); +#endif + +/* Open using zlib if it is supported and the file name ends with .gz. */ + +#ifdef SUPPORT_LIBZ +if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) + { + ingz = gzopen(pathname, "rb"); + if (ingz == NULL) + { + /* LCOV_EXCL_START */ + if (!silent) + fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, + strerror(errno)); + return 2; + /* LCOV_EXCL_STOP */ + } + handle = (void *)ingz; + frtype = FR_LIBZ; + } +else +#endif + +/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ + +#ifdef SUPPORT_LIBBZ2 +if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) + { + inbz2 = BZ2_bzopen(pathname, "rb"); + handle = (void *)inbz2; + frtype = FR_LIBBZ2; + } +else +#endif + +/* Otherwise use plain fopen(). The label is so that we can come back here if +an attempt to read a .bz2 file indicates that it really is a plain file. */ + +#ifdef SUPPORT_LIBBZ2 +PLAIN_FILE: +#endif + { + in = fopen(pathname, "rb"); + handle = (void *)in; + frtype = FR_PLAIN; + } + +/* All the opening methods return errno when they fail. */ + +if (handle == NULL) + { + if (!silent) + fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, + strerror(errno)); + return 2; + } + +/* Now grep the file */ + +rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT || + (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); + +/* Close in an appropriate manner. */ + +#ifdef SUPPORT_LIBZ +if (frtype == FR_LIBZ) + gzclose(ingz); +else +#endif + +/* If it is a .bz2 file and the result is 3, it means that the first attempt to +read failed. If the error indicates that the file isn't in fact bzipped, try +again as a normal file. */ + +#ifdef SUPPORT_LIBBZ2 +if (frtype == FR_LIBBZ2) + { + if (rc == 3) + { + int errnum; + const char *err = BZ2_bzerror(inbz2, &errnum); + if (errnum == BZ_DATA_ERROR_MAGIC) + { + BZ2_bzclose(inbz2); + goto PLAIN_FILE; + } + /* LCOV_EXCL_START */ + else if (!silent) + fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n", + pathname, err); + rc = 2; /* The normal "something went wrong" code */ + /* LCOV_EXCL_STOP */ + } + BZ2_bzclose(inbz2); + } +else +#endif + +/* Normal file close */ + +fclose(in); + +/* Pass back the yield from pcre2grep(). */ + +return rc; +} + + + +/************************************************* +* Handle a no-data option * +*************************************************/ + +/* This is called when a known option has been identified. */ + +static int +handle_option(int letter, int options) +{ +switch(letter) + { + case N_FOFFSETS: file_offsets = TRUE; break; + case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */ + case N_LBUFFER: line_buffered = TRUE; break; + case N_LOFFSETS: line_offsets = number = TRUE; break; + case N_NOJIT: use_jit = FALSE; break; + case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break; + case N_NO_GROUP_SEPARATOR: group_separator = NULL; break; + case 'a': binary_files = BIN_TEXT; break; + case 'c': count_only = TRUE; break; + case N_POSIX_DIGIT: posix_digit = TRUE; break; + case 'E': case_restrict = TRUE; break; + case 'F': options |= PCRE2_LITERAL; break; + case 'H': filenames = FN_FORCE; break; + case 'I': binary_files = BIN_NOMATCH; break; + case 'h': filenames = FN_NONE; break; + case 'i': options |= PCRE2_CASELESS; break; + case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; + case 'L': filenames = FN_NOMATCH_ONLY; break; + case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break; + case 'n': number = TRUE; break; + + case 'o': + only_matching_last = add_number(0, only_matching_last); + if (only_matching == NULL) only_matching = only_matching_last; + break; + + case 'P': no_ucp = TRUE; break; + case 'q': quiet = TRUE; break; + case 'r': dee_action = dee_RECURSE; break; + case 's': silent = TRUE; break; + case 't': show_total_count = TRUE; break; + case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break; + case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP; + utf = TRUE; break; + case 'v': invert = TRUE; break; + + case 'V': + { + unsigned char buffer[128]; + (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); + fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer); + } + pcre2grep_exit(0); + break; /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */ + + case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break; + case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break; + case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break; + + /* LCOV_EXCL_START - this is a "never event" */ + default: + fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter); + pcre2grep_exit(usage(2)); + /* LCOV_EXCL_STOP */ + } + +return options; +} + + + +/************************************************* +* Construct printed ordinal * +*************************************************/ + +/* This turns a number into "1st", "3rd", etc. */ + +static char * +ordin(int n) +{ +static char buffer[14]; +char *p = buffer; +sprintf(p, "%d", n); +while (*p != 0) p++; +n %= 100; +if (n >= 11 && n <= 13) n = 0; +switch (n%10) + { + case 1: strcpy(p, "st"); break; + case 2: strcpy(p, "nd"); break; + case 3: strcpy(p, "rd"); break; + default: strcpy(p, "th"); break; + } +return buffer; +} + + + +/************************************************* +* Compile a single pattern * +*************************************************/ + +/* Do nothing if the pattern has already been compiled. This is the case for +include/exclude patterns read from a file. + +When the -F option has been used, each "pattern" may be a list of strings, +separated by line breaks. They will be matched literally. We split such a +string and compile the first substring, inserting an additional block into the +pattern chain. + +Arguments: + p points to the pattern block + options the PCRE options + fromfile TRUE if the pattern was read from a file + fromtext file name or identifying text (e.g. "include") + count 0 if this is the only command line pattern, or + number of the command line pattern, or + linenumber for a pattern from a file + +Returns: TRUE on success, FALSE after an error +*/ + +static BOOL +compile_pattern(patstr *p, int options, int fromfile, const char *fromtext, + int count) +{ +char *ps; +int errcode; +PCRE2_SIZE patlen, erroffset; +PCRE2_UCHAR errmessbuffer[ERRBUFSIZ]; + +if (p->compiled != NULL) return TRUE; +ps = p->string; +patlen = p->length; + +if ((options & PCRE2_LITERAL) != 0) + { + int ellength; + char *eop = ps + patlen; + char *pe = end_of_line(ps, eop, &ellength); + + if (ellength != 0) + { + patlen = pe - ps - ellength; + if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE; + } + } + +p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode, + &erroffset, compile_context); + +/* Handle successful compile. Try JIT-compiling if supported and enabled. We +ignore any JIT compiler errors, relying falling back to interpreting if +anything goes wrong with JIT. */ + +if (p->compiled != NULL) + { +#ifdef SUPPORT_PCRE2GREP_JIT + if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); +#endif + return TRUE; + } + +/* Handle compile errors */ + +if (erroffset > patlen) erroffset = patlen; +pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer)); + +if (fromfile) + { + fprintf(stderr, "pcre2grep: Error in regex in line %d of %s " + "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer); + } +else + { + if (count == 0) + fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n", + fromtext, (int)erroffset, errmessbuffer); + else + fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n", + ordin(count), fromtext, (int)erroffset, errmessbuffer); + } + +return FALSE; +} + + + +/************************************************* +* Read and compile a file of patterns * +*************************************************/ + +/* This is used for --filelist, --include-from, and --exclude-from. + +Arguments: + name the name of the file; "-" is stdin + patptr pointer to the pattern chain anchor + patlastptr pointer to the last pattern pointer + +Returns: TRUE if all went well +*/ + +static BOOL +read_pattern_file(char *name, patstr **patptr, patstr **patlastptr) +{ +int linenumber = 0; +PCRE2_SIZE patlen; +FILE *f; +const char *filename; +char buffer[MAXPATLEN+20]; + +if (strcmp(name, "-") == 0) + { + f = stdin; + filename = stdin_name; + } +else + { + f = fopen(name, "r"); + if (f == NULL) + { + fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno)); + return FALSE; + } + filename = name; + } + +while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) + { + while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; + linenumber++; + if (patlen == 0) continue; /* Skip blank lines */ + + /* Note: this call to add_pattern() puts a pointer to the local variable + "buffer" into the pattern chain. However, that pointer is used only when + compiling the pattern, which happens immediately below, so we flatten it + afterwards, as a precaution against any later code trying to use it. */ + + *patlastptr = add_pattern(buffer, patlen, *patlastptr); + if (*patlastptr == NULL) + { + /* LCOV_EXCL_START - won't happen in testing */ + if (f != stdin) fclose(f); + return FALSE; + /* LCOV_EXCL_STOP */ + } + if (*patptr == NULL) *patptr = *patlastptr; + + /* This loop is needed because compiling a "pattern" when -F is set may add + on additional literal patterns if the original contains a newline. In the + common case, it never will, because read_one_line() stops at a newline. + However, the -N option can be used to give pcre2grep a different newline + setting. */ + + for(;;) + { + if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename, + linenumber)) + { + if (f != stdin) fclose(f); + return FALSE; + } + (*patlastptr)->string = NULL; /* Insurance */ + if ((*patlastptr)->next == NULL) break; + *patlastptr = (*patlastptr)->next; + } + } + +if (f != stdin) fclose(f); +return TRUE; +} + + + +/************************************************* +* Main program * +*************************************************/ + +/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ + +int +main(int argc, char **argv) +{ +int i, j; +int rc = 1; +BOOL only_one_at_top; +patstr *cp; +fnstr *fn; +omstr *om; +const char *locale_from = "--locale"; + +#ifdef SUPPORT_PCRE2GREP_JIT +pcre2_jit_stack *jit_stack = NULL; +#endif + +/* In Windows, stdout is set up as a text stream, which means that \n is +converted to \r\n. This causes output lines that are copied from the input to +change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure +that stdout is a binary stream. Note that this means all other output to stdout +must use STDOUT_NL to terminate lines. */ + +#ifdef WIN32 +_setmode(_fileno(stdout), _O_BINARY); +#endif + +/* Process the options */ + +for (i = 1; i < argc; i++) + { + option_item *op = NULL; + char *option_data = (char *)""; /* default to keep compiler happy */ + BOOL longop; + BOOL longopwasequals = FALSE; + + if (argv[i][0] != '-') break; + + /* If we hit an argument that is just "-", it may be a reference to STDIN, + but only if we have previously had -e or -f to define the patterns. */ + + if (argv[i][1] == 0) + { + if (pattern_files != NULL || patterns != NULL) break; + else pcre2grep_exit(usage(2)); + } + + /* Handle a long name option, or -- to terminate the options */ + + if (argv[i][1] == '-') + { + char *arg = argv[i] + 2; + char *argequals = strchr(arg, '='); + + if (*arg == 0) /* -- terminates options */ + { + i++; + break; /* out of the options-handling loop */ + } + + longop = TRUE; + + /* Some long options have data that follows after =, for example file=name. + Some options have variations in the long name spelling: specifically, we + allow "regexp" because GNU grep allows it, though I personally go along + with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". + These options are entered in the table as "regex(p)". Options can be in + both these categories. */ + + for (op = optionlist; op->one_char != 0; op++) + { + char *opbra = strchr(op->long_name, '('); + char *equals = strchr(op->long_name, '='); + + /* Handle options with only one spelling of the name */ + + if (opbra == NULL) /* Does not contain '(' */ + { + if (equals == NULL) /* Not thing=data case */ + { + if (strcmp(arg, op->long_name) == 0) break; + } + else /* Special case xxx=data */ + { + int oplen = (int)(equals - op->long_name); + int arglen = (argequals == NULL)? + (int)strlen(arg) : (int)(argequals - arg); + if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) + { + option_data = arg + arglen; + if (*option_data == '=') + { + option_data++; + longopwasequals = TRUE; + } + break; + } + } + } + + /* Handle options with an alternate spelling of the name */ + + else + { + char buff1[24]; + char buff2[24]; + int ret; + + int baselen = (int)(opbra - op->long_name); + int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); + int arglen = (argequals == NULL || equals == NULL)? + (int)strlen(arg) : (int)(argequals - arg); + + if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name), + ret < 0 || ret > (int)sizeof(buff1)) || + (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1, + fulllen - baselen - 2, opbra + 1), + ret < 0 || ret > (int)sizeof(buff2))) + { + /* LCOV_EXCL_START - this is a "never" event */ + fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n", + op->long_name); + pcre2grep_exit(2); + /* LCOV_EXCL_STOP */ + } + + if (strncmp(arg, buff1, arglen) == 0 || + strncmp(arg, buff2, arglen) == 0) + { + if (equals != NULL && argequals != NULL) + { + option_data = argequals; + if (*option_data == '=') + { + option_data++; + longopwasequals = TRUE; + } + } + break; + } + } + } + + if (op->one_char == 0) + { + fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]); + pcre2grep_exit(usage(2)); + } + } + + /* One-char options; many that have no data may be in a single argument; we + continue till we hit the last one or one that needs data. */ + + else + { + char *s = argv[i] + 1; + longop = FALSE; + + while (*s != 0) + { + for (op = optionlist; op->one_char != 0; op++) + { + if (*s == op->one_char) break; + } + if (op->one_char == 0) + { + fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n", + *s, argv[i]); + pcre2grep_exit(usage(2)); + } + + option_data = s+1; + + /* Break out if this is the last character in the string; it's handled + below like a single multi-char option. */ + + if (*option_data == 0) break; + + /* Check for a single-character option that has data: OP_OP_NUMBER(S) + are used for ones that either have a numerical number or defaults, i.e. + the data is optional. If a digit follows, there is data; if not, carry on + with other single-character options in the same string. */ + + if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS) + { + if (isdigit((unsigned char)(s[1]))) break; + } + else /* Check for an option with data */ + { + if (op->type != OP_NODATA) break; + } + + /* Handle a single-character option with no data, then loop for the + next character in the string. */ + + pcre2_options = handle_option(*s++, pcre2_options); + } + } + + /* At this point we should have op pointing to a matched option. If the type + is NO_DATA, it means that there is no data, and the option might set + something in the PCRE options. */ + + if (op->type == OP_NODATA) + { + pcre2_options = handle_option(op->one_char, pcre2_options); + continue; + } + + /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that + either has a value or defaults to something. It cannot have data in a + separate item. At the moment, the only such options are "colo(u)r", + and "only-matching". */ + + if (*option_data == 0 && + (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER || + op->type == OP_OP_NUMBERS)) + { + switch (op->one_char) + { + case N_COLOUR: + colour_option = "auto"; + break; + + case 'o': + only_matching_last = add_number(0, only_matching_last); + if (only_matching == NULL) only_matching = only_matching_last; + break; + } + continue; + } + + /* Otherwise, find the data string for the option. */ + + if (*option_data == 0) + { + if (i >= argc - 1 || longopwasequals) + { + fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]); + pcre2grep_exit(usage(2)); + } + option_data = argv[++i]; + } + + /* If the option type is OP_OP_NUMBERS, the value is a number that is to be + added to a chain of numbers. */ + + if (op->type == OP_OP_NUMBERS) + { + unsigned long int n = decode_number(option_data, op, longop); + omdatastr *omd = (omdatastr *)op->dataptr; + *(omd->lastptr) = add_number((int)n, *(omd->lastptr)); + if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr); + } + + /* If the option type is OP_PATLIST, it's the -e option, or one of the + include/exclude options, which can be called multiple times to create lists + of patterns. */ + + else if (op->type == OP_PATLIST) + { + patdatastr *pd = (patdatastr *)op->dataptr; + *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), + *(pd->lastptr)); + if (*(pd->lastptr) == NULL) goto EXIT2; + if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); + } + + /* If the option type is OP_FILELIST, it's one of the options that names a + file. */ + + else if (op->type == OP_FILELIST) + { + fndatastr *fd = (fndatastr *)op->dataptr; + fn = (fnstr *)malloc(sizeof(fnstr)); + if (fn == NULL) + { + /* LCOV_EXCL_START */ + fprintf(stderr, "pcre2grep: malloc failed\n"); + goto EXIT2; + /* LCOV_EXCL_STOP */ + } + fn->next = NULL; + fn->name = option_data; + if (*(fd->anchor) == NULL) + *(fd->anchor) = fn; + else + (*(fd->lastptr))->next = fn; + *(fd->lastptr) = fn; + } + + /* Handle OP_BINARY_FILES */ + + else if (op->type == OP_BINFILES) + { + if (strcmp(option_data, "binary") == 0) + binary_files = BIN_BINARY; + else if (strcmp(option_data, "without-match") == 0) + binary_files = BIN_NOMATCH; + else if (strcmp(option_data, "text") == 0) + binary_files = BIN_TEXT; + else + { + fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n", + option_data); + pcre2grep_exit(usage(2)); + } + } + + /* Otherwise, deal with a single string or numeric data value. */ + + else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER && + op->type != OP_OP_NUMBER && op->type != OP_SIZE) + { + *((char **)op->dataptr) = option_data; + } + else + { + unsigned long int n = decode_number(option_data, op, longop); + if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n; + else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n; + else *((int *)op->dataptr) = n; + } + } + +/* Options have been decoded. If -C was used, its value is used as a default +for -A and -B. */ + +if (both_context > 0) + { + if (after_context == 0) after_context = both_context; + if (before_context == 0) before_context = both_context; + } + +/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is +permitted. They display, each in their own way, only the data that has matched. +*/ + +only_matching_count = (only_matching != NULL) + (output_text != NULL) + + file_offsets + line_offsets; + +if (only_matching_count > 1) + { + fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, " + "--file-offsets and/or --line-offsets\n"); + pcre2grep_exit(usage(2)); + } + +/* Check that there is a big enough ovector for all -o settings. */ + +for (om = only_matching; om != NULL; om = om->next) + { + int n = om->groupnum; + if (n > (int)capture_max) + { + fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n); + fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n"); + goto EXIT2; + } + } + +/* Check the text supplied to --output for errors. */ + +if (output_text != NULL && + !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE)) + goto EXIT2; + +/* Set up default compile and match contexts and match data blocks. */ + +offset_size = capture_max + 1; +compile_context = pcre2_compile_context_create(NULL); +match_context = pcre2_match_context_create(NULL); +match_data_pair[0] = pcre2_match_data_create(offset_size, NULL); +match_data_pair[1] = pcre2_match_data_create(offset_size, NULL); +offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]); +offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]); +match_data = match_data_pair[0]; +offsets = offsets_pair[0]; +match_data_toggle = 0; + +/* If string (script) callouts are supported, set up the callout processing +function in the match context. */ + +#ifdef SUPPORT_PCRE2GREP_CALLOUT +pcre2_set_callout(match_context, pcre2grep_callout, NULL); +#endif + +/* Put limits into the match context. */ + +if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit); +if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); +if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit); + +/* If a locale has not been provided as an option, see if the LC_CTYPE or +LC_ALL environment variable is set, and if so, use it. */ + +if (locale == NULL) + { + locale = getenv("LC_ALL"); + locale_from = "LC_ALL"; + } + +if (locale == NULL) + { + locale = getenv("LC_CTYPE"); + locale_from = "LC_CTYPE"; + } + +/* If a locale is set, use it to generate the tables the PCRE needs. Passing +NULL to pcre2_maketables() means that malloc() is used to get the memory. */ + +if (locale != NULL) + { + if (setlocale(LC_CTYPE, locale) == NULL) + { + fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n", + locale, locale_from); + goto EXIT2; + } + character_tables = pcre2_maketables(NULL); + pcre2_set_character_tables(compile_context, character_tables); + } + +/* Sort out colouring */ + +if (colour_option != NULL && strcmp(colour_option, "never") != 0) + { + if (strcmp(colour_option, "always") == 0) +#ifdef WIN32 + do_ansi = !is_stdout_tty(), +#endif + do_colour = TRUE; + else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); + else + { + fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n", + colour_option); + goto EXIT2; + } + if (do_colour) + { + char *cs = getenv("PCRE2GREP_COLOUR"); + if (cs == NULL) cs = getenv("PCRE2GREP_COLOR"); + if (cs == NULL) cs = getenv("PCREGREP_COLOUR"); + if (cs == NULL) cs = getenv("PCREGREP_COLOR"); + if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS")); + if (cs == NULL) cs = getenv("GREP_COLOR"); + if (cs != NULL) + { + if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs; + } +#ifdef WIN32 + init_colour_output(); +#endif + } + } + +/* When colouring or otherwise identifying matching substrings, we need to find +all possible matches when there are multiple patterns. */ + +all_matches = do_colour || only_matching_count != 0; + +/* Sort out a newline setting. */ + +if (newline_arg != NULL) + { + for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); + endlinetype++) + { + if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; + } + if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) + pcre2_set_newline(compile_context, endlinetype); + else + { + fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", + newline_arg); + goto EXIT2; + } + } + +/* Find default newline convention */ + +else + { + (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype); + } + +/* Interpret the text values for -d and -D */ + +if (dee_option != NULL) + { + if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; + else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; + else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; + else + { + fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option); + goto EXIT2; + } + } + +if (DEE_option != NULL) + { + if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; + else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; + else + { + fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option); + goto EXIT2; + } + } + +/* If no_ucp is set, remove PCRE2_UCP from the compile options. */ + +if (no_ucp) pcre2_options &= ~PCRE2_UCP; + +/* adjust the extra options. */ + +if (case_restrict) extra_options |= PCRE2_EXTRA_CASELESS_RESTRICT; +if (posix_digit) + extra_options |= (PCRE2_EXTRA_ASCII_BSD | PCRE2_EXTRA_ASCII_DIGIT); + +/* Set the extra options in the compile context. */ + +(void)pcre2_set_compile_extra_options(compile_context, extra_options); + +/* If use_jit is set, check whether JIT is available. If not, do not try +to use JIT. */ + +if (use_jit) + { + uint32_t answer; + (void)pcre2_config(PCRE2_CONFIG_JIT, &answer); + if (!answer) use_jit = FALSE; + } + +/* Get memory for the main buffer. */ + +if (bufthird <= 0) + { + fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n"); + goto EXIT2; + } + +bufsize = 3*bufthird; +main_buffer = (char *)malloc(bufsize); + +if (main_buffer == NULL) + { + /* LCOV_EXCL_START */ + fprintf(stderr, "pcre2grep: malloc failed\n"); + goto EXIT2; + /* LCOV_EXCL_STOP */ + } + +/* If no patterns were provided by -e, and there are no files provided by -f, +the first argument is the one and only pattern, and it must exist. */ + +if (patterns == NULL && pattern_files == NULL) + { + if (i >= argc) return usage(2); + patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]), + NULL); + i++; + if (patterns == NULL) goto EXIT2; + } + +/* Compile the patterns that were provided on the command line, either by +multiple uses of -e or as a single unkeyed pattern. We cannot do this until +after all the command-line options are read so that we know which PCRE options +to use. When -F is used, compile_pattern() may add another block into the +chain, so we must not access the next pointer till after the compile. */ + +for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) + { + if (!compile_pattern(cp, pcre2_options, FALSE, "command-line", + (j == 1 && patterns->next == NULL)? 0 : j)) + goto EXIT2; + } + +/* Read and compile the regular expressions that are provided in files. */ + +for (fn = pattern_files; fn != NULL; fn = fn->next) + { + if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2; + } + +/* Unless JIT has been explicitly disabled, arrange a stack for it to use. */ + +#ifdef SUPPORT_PCRE2GREP_JIT +if (use_jit) + { + jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL); + if (jit_stack != NULL ) + pcre2_jit_stack_assign(match_context, NULL, jit_stack); + } +#endif + +/* -F, -w, and -x do not apply to include or exclude patterns, so we must +adjust the options. */ + +pcre2_options &= ~PCRE2_LITERAL; +(void)pcre2_set_compile_extra_options(compile_context, 0); + +/* If there are include or exclude patterns read from the command line, compile +them. */ + +for (j = 0; j < 4; j++) + { + int k; + for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next) + { + if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j], + (k == 1 && cp->next == NULL)? 0 : k)) + goto EXIT2; + } + } + +/* Read and compile include/exclude patterns from files. */ + +for (fn = include_from; fn != NULL; fn = fn->next) + { + if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last)) + goto EXIT2; + } + +for (fn = exclude_from; fn != NULL; fn = fn->next) + { + if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last)) + goto EXIT2; + } + +/* If there are no files that contain lists of files to search, and there are +no file arguments, search stdin, and then exit. */ + +if (file_lists == NULL && i >= argc) + { + /* Using a buffered stdin, that then is seek is not portable, + so attempt to remove the buffer, to workaround reported issues + affecting several BSD and AIX */ + if (count_limit >= 0) + setbuf(stdin, NULL); + rc = pcre2grep(stdin, FR_PLAIN, stdin_name, + (filenames > FN_DEFAULT)? stdin_name : NULL); + goto EXIT; + } + +/* If any files that contains a list of files to search have been specified, +read them line by line and search the given files. */ + +for (fn = file_lists; fn != NULL; fn = fn->next) + { + char buffer[FNBUFSIZ]; + FILE *fl; + if (strcmp(fn->name, "-") == 0) fl = stdin; else + { + fl = fopen(fn->name, "rb"); + if (fl == NULL) + { + fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name, + strerror(errno)); + goto EXIT2; + } + } + while (fgets(buffer, sizeof(buffer), fl) != NULL) + { + int frc; + char *end = buffer + (int)strlen(buffer); + while (end > buffer && isspace((unsigned char)(end[-1]))) end--; + *end = 0; + if (*buffer != 0) + { + frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + } + if (fl != stdin) fclose(fl); + } + +/* After handling file-list, work through remaining arguments. Pass in the fact +that there is only one argument at top level - this suppresses the file name if +the argument is not a directory and filenames are not otherwise forced. */ + +only_one_at_top = i == argc - 1 && file_lists == NULL; + +for (; i < argc; i++) + { + int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, + only_one_at_top); + if (frc > 1) rc = frc; + else if (frc == 0 && rc == 1) rc = 0; + } + +/* Show the total number of matches if requested, but not if only one file's +count was printed. */ + +if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY) + { + if (counts_printed != 0 && filenames >= FN_DEFAULT) + fprintf(stdout, "TOTAL:"); + fprintf(stdout, "%lu" STDOUT_NL, total_count); + } + +EXIT: +#ifdef SUPPORT_PCRE2GREP_JIT +pcre2_jit_free_unused_memory(NULL); +if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack); +#endif + +free(main_buffer); +if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables); + +pcre2_compile_context_free(compile_context); +pcre2_match_context_free(match_context); +pcre2_match_data_free(match_data_pair[0]); +pcre2_match_data_free(match_data_pair[1]); + +free_pattern_chain(patterns); +free_pattern_chain(include_patterns); +free_pattern_chain(include_dir_patterns); +free_pattern_chain(exclude_patterns); +free_pattern_chain(exclude_dir_patterns); + +free_file_chain(exclude_from); +free_file_chain(include_from); +free_file_chain(pattern_files); +free_file_chain(file_lists); + +while (only_matching != NULL) + { + omstr *this = only_matching; + only_matching = this->next; + free(this); + } + +pcre2grep_exit(rc); + +EXIT2: +rc = 2; +goto EXIT; +} + +/* End of pcre2grep */ diff --git a/src/pcre2posix.c b/src/pcre2posix.c new file mode 100644 index 0000000..9fe3199 --- /dev/null +++ b/src/pcre2posix.c @@ -0,0 +1,425 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module is a wrapper that provides a POSIX API to the underlying PCRE2 +functions. The functions are called pcre2_regcomp(), pcre2_regexec(), etc. +pcre2posix.h defines the POSIX names as macros for the corresonding pcre2_xxx +functions, so any program that includes it and uses the POSIX names will call +the PCRE2 implementations instead. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef PCRE2POSIX_SHARED +#undef PCRE2_STATIC +#endif + + +/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for +compiling these functions. This must come before including pcre2posix.h, where +they are set for an application (using these functions) if they have not +previously been set. */ + +#if defined(_WIN32) && (defined(PCRE2POSIX_SHARED) || !defined(PCRE2_STATIC)) +# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport) +# define PCRE2POSIX_EXP_DEFN __declspec(dllexport) +#endif + +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#define BROKEN_SNPRINTF +#endif + + +/* Compile-time error numbers start at this value. It should probably never be +changed. This #define is a copy of the one in pcre2_internal.h. */ + +#define COMPILE_ERROR_BASE 100 + + +/* Standard C headers */ + +#include +#include +#include +#include +#include +#include + +/* PCRE2 headers */ + +#include "pcre2.h" +#include "pcre2posix.h" + +/* Table to translate PCRE2 compile time error codes into POSIX error codes. +Only a few PCRE2 errors with a value greater than 23 turn into special POSIX +codes: most go to REG_BADPAT. The second table lists, in pairs, those that +don't. */ + +static const int eint1[] = { + 0, /* No error */ + REG_EESCAPE, /* \ at end of pattern */ + REG_EESCAPE, /* \c at end of pattern */ + REG_EESCAPE, /* unrecognized character follows \ */ + REG_BADBR, /* numbers out of order in {} quantifier */ + /* 5 */ + REG_BADBR, /* number too big in {} quantifier */ + REG_EBRACK, /* missing terminating ] for character class */ + REG_ECTYPE, /* invalid escape sequence in character class */ + REG_ERANGE, /* range out of order in character class */ + REG_BADRPT, /* nothing to repeat */ + /* 10 */ + REG_ASSERT, /* internal error: unexpected repeat */ + REG_BADPAT, /* unrecognized character after (? or (?- */ + REG_BADPAT, /* POSIX named classes are supported only within a class */ + REG_BADPAT, /* POSIX collating elements are not supported */ + REG_EPAREN, /* missing ) */ + /* 15 */ + REG_ESUBREG, /* reference to non-existent subpattern */ + REG_INVARG, /* pattern passed as NULL */ + REG_INVARG, /* unknown compile-time option bit(s) */ + REG_EPAREN, /* missing ) after (?# comment */ + REG_ESIZE, /* parentheses nested too deeply */ + /* 20 */ + REG_ESIZE, /* regular expression too large */ + REG_ESPACE, /* failed to get memory */ + REG_EPAREN, /* unmatched closing parenthesis */ + REG_ASSERT /* internal error: code overflow */ + }; + +static const int eint2[] = { + 30, REG_ECTYPE, /* unknown POSIX class name */ + 32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */ + 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ + 56, REG_INVARG, /* internal error: unknown newline setting */ + 92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */ + 99, REG_EESCAPE /* \K in lookaround */ +}; + +/* Table of texts corresponding to POSIX error codes */ + +static const char *const pstring[] = { + "", /* Dummy for value 0 */ + "internal error", /* REG_ASSERT */ + "invalid repeat counts in {}", /* BADBR */ + "pattern error", /* BADPAT */ + "? * + invalid", /* BADRPT */ + "unbalanced {}", /* EBRACE */ + "unbalanced []", /* EBRACK */ + "collation error - not relevant", /* ECOLLATE */ + "bad class", /* ECTYPE */ + "bad escape sequence", /* EESCAPE */ + "empty expression", /* EMPTY */ + "unbalanced ()", /* EPAREN */ + "bad range inside []", /* ERANGE */ + "expression too big", /* ESIZE */ + "failed to get memory", /* ESPACE */ + "bad back reference", /* ESUBREG */ + "bad argument", /* INVARG */ + "match failed" /* NOMATCH */ +}; + +static int message_len(const char *message, int offset) +{ +char buf[12]; + +/* 11 magic number comes from the format below */ +return (int)strlen(message) + 11 + snprintf(buf, sizeof(buf), "%d", offset); +} + +/************************************************* +* Translate error code to string * +*************************************************/ + +PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION +pcre2_regerror(int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size) +{ +int ret; +const char *message; +size_t len = 0; /* keeps 0 if snprintf is used */ + +message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))? + "unknown error code" : pstring[errcode]; + +if (preg != NULL && (int)preg->re_erroffset != -1) + { + /* no need to deal with UB in snprintf */ + if (errbuf_size > INT_MAX) errbuf_size = INT_MAX; + + /* there are 11 charactes between message and offset, + update message_len() if changed */ + ret = snprintf(errbuf, errbuf_size, "%s at offset %d", message, + (int)preg->re_erroffset); + } +else + { + len = strlen(message); + if (errbuf_size != 0) + { + strncpy(errbuf, message, errbuf_size); + if (errbuf_size <= len) errbuf[errbuf_size - 1] = '\0'; + } + ret = (int)len; + } + +do { + if (ret < 0) + { +#ifdef BROKEN_SNPRINTF + /* _snprintf returns -1 on overflow and doesn't zero terminate */ + if (!len) + { + if (ret == -1 && errbuf_size != 0) errbuf[errbuf_size - 1] = '\0'; + + ret = message_len(message, (int)preg->re_erroffset); + break; + } +#endif + /* snprintf failed, will use a 14 char long message if possible */ + ret = 14; + if (errbuf_size != 0) + { + strncpy(errbuf, "internal error", errbuf_size); + if ((int)errbuf_size <= ret) errbuf[errbuf_size - 1] = '\0'; + } + } + else if (ret == (int)errbuf_size && !len) + { + /* pre C99 snprintf returns used, so redo ret to fix that */ + + ret = message_len(message, (int)preg->re_erroffset); + } +} while (0); + +return ret + 1; +} + + + +/************************************************* +* Free store held by a regex * +*************************************************/ + +PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_regfree(regex_t *preg) +{ +pcre2_match_data_free(preg->re_match_data); +pcre2_code_free(preg->re_pcre2_code); +} + + + +/************************************************* +* Compile a regular expression * +*************************************************/ + +/* +Arguments: + preg points to a structure for recording the compiled expression + pattern the pattern to compile + cflags compilation flags + +Returns: 0 on success + various non-zero codes on failure +*/ + +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_regcomp(regex_t *preg, const char *pattern, int cflags) +{ +PCRE2_SIZE erroffset; +PCRE2_SIZE patlen; +int errorcode; +int options = 0; +int re_nsub = 0; + +patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) : + PCRE2_ZERO_TERMINATED; + +if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS; +if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE; +if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL; +if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL; +if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF; +if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP; +if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY; + +preg->re_cflags = cflags; +preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options, + &errorcode, &erroffset, NULL); +preg->re_erroffset = erroffset; + +if (preg->re_pcre2_code == NULL) + { + unsigned int i; + + /* A negative value is a UTF error; otherwise all error codes are greater + than COMPILE_ERROR_BASE, but check, just in case. */ + + if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT; + errorcode -= COMPILE_ERROR_BASE; + + if (errorcode < (int)(sizeof(eint1)/sizeof(const int))) + return eint1[errorcode]; + for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2) + if (errorcode == eint2[i]) return eint2[i+1]; + return REG_BADPAT; + } + +(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, + PCRE2_INFO_CAPTURECOUNT, &re_nsub); +preg->re_nsub = (size_t)re_nsub; +preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL); +preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */ + +if (preg->re_match_data == NULL) + { + /* LCOV_EXCL_START */ + pcre2_code_free(preg->re_pcre2_code); + return REG_ESPACE; + /* LCOV_EXCL_STOP */ + } + +return 0; +} + + + +/************************************************* +* Match a regular expression * +*************************************************/ + +/* A suitable match_data block, large enough to hold all possible captures, was +obtained when the pattern was compiled, to save having to allocate and free it +for each match. If REG_NOSUB was specified at compile time, the nmatch and +pmatch arguments are ignored, and the only result is yes/no/error. */ + +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ +int rc, so, eo; +int options = 0; +pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data; + +if (string == NULL) return REG_INVARG; + +if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL; +if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL; +if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY; + +/* When REG_NOSUB was specified, or if no vector has been passed in which to +put captured strings, ensure that nmatch is zero. This will stop any attempt to +write to pmatch. */ + +if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0; + +/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. +The man page from OS X says "REG_STARTEND affects only the location of the +string, not how it is matched". That is why the "so" value is used to bump the +start location rather than being passed as a PCRE2 "starting offset". */ + +if ((eflags & REG_STARTEND) != 0) + { + if (pmatch == NULL) return REG_INVARG; + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + } +else + { + so = 0; + eo = (int)strlen(string); + } + +rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, + (PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL); + +/* Successful match */ + +if (rc >= 0) + { + size_t i; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); + if ((size_t)rc > nmatch) rc = (int)nmatch; + for (i = 0; i < (size_t)rc; i++) + { + pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2] + so); + pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2+1] + so); + } + for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + return 0; + } + +/* Unsuccessful match */ + +if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return REG_INVARG; + +/* Most of these are events that won't occur during testing, so exclude them +from coverage. */ + +switch(rc) + { + case PCRE2_ERROR_HEAPLIMIT: return REG_ESPACE; + case PCRE2_ERROR_NOMATCH: return REG_NOMATCH; + + /* LCOV_EXCL_START */ + case PCRE2_ERROR_BADMODE: return REG_INVARG; + case PCRE2_ERROR_BADMAGIC: return REG_INVARG; + case PCRE2_ERROR_BADOPTION: return REG_INVARG; + case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG; + case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE; + case PCRE2_ERROR_NOMEMORY: return REG_ESPACE; + case PCRE2_ERROR_NULL: return REG_INVARG; + default: return REG_ASSERT; + /* LCOV_EXCL_STOP */ + } +} + +/* End of pcre2posix.c */ diff --git a/src/pcre2posix.h b/src/pcre2posix.h new file mode 100644 index 0000000..cccea57 --- /dev/null +++ b/src/pcre2posix.h @@ -0,0 +1,187 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. This is +the public header file to be #included by applications that call PCRE2 via the +POSIX wrapper interface. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2POSIX_H_IDEMPOTENT_GUARD +#define PCRE2POSIX_H_IDEMPOTENT_GUARD + +/* Have to include stdlib.h in order to ensure that size_t is defined. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Options, mostly defined by POSIX, but with some extras. */ + +#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */ +#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */ +#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ +#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ +#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ +#define REG_NOSUB 0x0020 /* Do not report what was matched */ +#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ +#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ +#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ +#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */ +#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */ +#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */ +#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */ + +/* This is not used by PCRE2, but by defining it we make it easier +to slot PCRE2 into existing programs that make POSIX calls. */ + +#define REG_EXTENDED 0 + +/* Error values. Not all these are relevant or used by the wrapper. */ + +enum { + REG_ASSERT = 1, /* internal error ? */ + REG_BADBR, /* invalid repeat counts in {} */ + REG_BADPAT, /* pattern error */ + REG_BADRPT, /* ? * + invalid */ + REG_EBRACE, /* unbalanced {} */ + REG_EBRACK, /* unbalanced [] */ + REG_ECOLLATE, /* collation error - not relevant */ + REG_ECTYPE, /* bad class */ + REG_EESCAPE, /* bad escape sequence */ + REG_EMPTY, /* empty expression */ + REG_EPAREN, /* unbalanced () */ + REG_ERANGE, /* bad range inside [] */ + REG_ESIZE, /* expression too big */ + REG_ESPACE, /* failed to get memory */ + REG_ESUBREG, /* bad back reference */ + REG_INVARG, /* bad argument */ + REG_NOMATCH /* match failed */ +}; + + +/* The structure representing a compiled regular expression. It is also used +for passing the pattern end pointer when REG_PEND is set. */ + +typedef struct { + void *re_pcre2_code; + void *re_match_data; + const char *re_endp; + size_t re_nsub; + size_t re_erroffset; + int re_cflags; +} regex_t; + +/* The structure in which a captured offset is returned. */ + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order to make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + +#ifndef PCRE2_EXPORT +#define PCRE2_EXPORT +#endif + +/* When an application links to a PCRE2 DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE2, the appropriate +export settings are needed, and are set in pcre2posix.c before including this +file. */ + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE2POSIX_EXP_DECL +# if defined(_WIN32) && defined(PCRE2POSIX_SHARED) && !defined(PCRE2_STATIC) +# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport) +# define PCRE2POSIX_EXP_DEFN __declspec(dllimport) +# else +# define PCRE2POSIX_EXP_DECL extern PCRE2_EXPORT +# define PCRE2POSIX_EXP_DEFN +# endif +#endif + +/* The functions. The actual code is in functions with pcre2_xxx names for +uniqueness. POSIX names are provided as macros for API compatibility with POSIX +regex functions. It's done this way to ensure to they are always linked from +the PCRE2 library and not by accident from elsewhere (regex_t differs in size +elsewhere). */ + +PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regcomp(regex_t *, const char *, int); +PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regexec(const regex_t *, const char *, size_t, + regmatch_t *, int); +PCRE2POSIX_EXP_DECL size_t PCRE2_CALL_CONVENTION pcre2_regerror(int, const regex_t *, char *, size_t); +PCRE2POSIX_EXP_DECL void PCRE2_CALL_CONVENTION pcre2_regfree(regex_t *); + +#define regcomp pcre2_regcomp +#define regexec pcre2_regexec +#define regerror pcre2_regerror +#define regfree pcre2_regfree + +/* Debian had a patch that used different names. These are now here to save +them having to maintain their own patch, but are not documented by PCRE2. */ + +#define PCRE2regcomp pcre2_regcomp +#define PCRE2regexec pcre2_regexec +#define PCRE2regerror pcre2_regerror +#define PCRE2regfree pcre2_regfree + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PCRE2POSIX_H_IDEMPOTENT_GUARD */ + +/* End of pcre2posix.h */ diff --git a/src/pcre2posix_test.c b/src/pcre2posix_test.c new file mode 100644 index 0000000..c9c03a4 --- /dev/null +++ b/src/pcre2posix_test.c @@ -0,0 +1,209 @@ +/************************************************* +* PCRE2 POSIX interface test program * +*************************************************/ + +/* +Written by Philip Hazel, December 2022 +Copyright (c) 2022 +File last edited: December 2022 + +This program tests the POSIX wrapper to the PCRE2 regular expression library. +The main PCRE2 test program is pcre2test, which also tests these function +calls. This little program is needed to test the case where the client includes +pcre2posix.h but not pcre2.h, mainly to make sure that it builds successfully. +However, the code is written as a flexible test program to which extra tests +can be added. + +Compile with -lpcre2-posix -lpcre2-8 + +If run with no options, there is no output on success, and the return code is +zero. If any test fails there is output to stderr, and the return code is 1. + +For testing purposes, the "-v" option causes verification output to be written +to stdout. */ + +#include +#include +#include + +#define CAPCOUNT 5 /* Number of captures supported */ +#define PRINTF if (v) printf /* Shorthand for testing output */ + +/* This vector contains compiler flags for each pattern that is tested. */ + +static int cflags[] = { + 0, /* Test 0 */ + REG_ICASE, /* Test 1 */ + 0, /* Test 2 */ + REG_NEWLINE, /* Test 3 */ + 0 /* Test 4 */ +}; + +/* This vector contains match flags for each pattern that is tested. */ + +static int mflags[] = { + 0, /* Test 0 */ + 0, /* Test 1 */ + 0, /* Test 2 */ + REG_NOTBOL, /* Test 3 */ + 0 /* Test 4 */ +}; + +/* Automate the number of patterns */ + +#define count (int)(sizeof(cflags)/sizeof(int)) + +/* The data for each pattern consists of a pattern string, followed by any +number of subject strings, terminated by NULL. Some tests share data, but use +different flags. */ + +static const char *data0_1[] = { "posix", "lower posix", "upper POSIX", NULL }; +static const char *data2_3[] = { "(*LF)^(cat|dog)", "catastrophic\ncataclysm", + "dogfight", "no animals", NULL }; +static const char *data4[] = { "*badpattern", NULL }; + +/* Index the data strings */ + +static char **data[] = { + (char **)(&data0_1), + (char **)(&data0_1), + (char **)(&data2_3), + (char **)(&data2_3), + (char **)(&data4) +}; + +/* The expected results for each pattern consist of a compiler return code, +optionally followed, for each subject string, by a match return code and, for a +successful match, up to CAPCOUNT pairs of returned match data. */ + +static int results0[] = { + 0, /* Compiler rc */ + 0, 6, 11, /* 1st match */ + REG_NOMATCH /* 2nd match */ +}; + +static int results1[] = { + 0, /* Compiler rc */ + 0, 6, 11, /* 1st match */ + 0, 6, 11 /* 2nd match */ +}; + +static int results2[] = { + 0, /* Compiler rc */ + 0, 0, 3, 0, 3, /* 1st match */ + 0, 0, 3, 0, 3, /* 2nd match */ + REG_NOMATCH /* 3rd match */ +}; + +static int results3[] = { + 0, /* Compiler rc */ + 0, 13, 16, 13, 16, /* 1st match */ + REG_NOMATCH, /* 2nd match */ + REG_NOMATCH /* 3rd match */ +}; + +static int results4[] = { + REG_BADRPT /* Compiler rc */ +}; + +/* Index the result vectors */ + +static int *results[] = { + (int *)(&results0), + (int *)(&results1), + (int *)(&results2), + (int *)(&results3), + (int *)(&results4) +}; + +/* And here is the program */ + +int main(int argc, char **argv) +{ +regex_t re; +regmatch_t match[CAPCOUNT]; +int v = argc > 1 && strcmp(argv[1], "-v") == 0; + +PRINTF("Test of pcre2posix.h without pcre2.h\n"); + +for (int i = 0; i < count; i++) + { + char *pattern = data[i][0]; + char **subjects = data[i] + 1; + int *rd = results[i]; + int rc = regcomp(&re, pattern, cflags[i]); + + PRINTF("Pattern: %s flags=0x%02x\n", pattern, cflags[i]); + + if (rc != *rd) + { + fprintf(stderr, "Unexpected compile error %d (expected %d)\n", rc, *rd); + fprintf(stderr, "Pattern is: %s\n", pattern); + return 1; + } + + if (rc != 0) + { + if (v) + { + char buffer[256]; + (void)regerror(rc, &re, buffer, sizeof(buffer)); + PRINTF("Compile error %d: %s (expected)\n", rc, buffer); + } + continue; + } + + for (; *subjects != NULL; subjects++) + { + rc = regexec(&re, *subjects, CAPCOUNT, match, mflags[i]); + + PRINTF("Subject: %s\n", *subjects); + PRINTF("Return: %d", rc); + + if (rc != *(++rd)) + { + PRINTF("\n"); + fprintf(stderr, "Unexpected match error %d (expected %d)\n", rc, *rd); + fprintf(stderr, "Pattern is: %s\n", pattern); + fprintf(stderr, "Subject is: %s\n", *subjects); + return 1; + } + + if (rc == 0) + { + for (int j = 0; j < CAPCOUNT; j++) + { + regmatch_t *m = match + j; + if (m->rm_so < 0) continue; + if (m->rm_so != *(++rd) || m->rm_eo != *(++rd)) + { + PRINTF("\n"); + fprintf(stderr, "Mismatched results for successful match\n"); + fprintf(stderr, "Pattern is: %s\n", pattern); + fprintf(stderr, "Subject is: %s\n", *subjects); + fprintf(stderr, "Result %d: expected %d %d received %d %d\n", + j, rd[-1], rd[0], m->rm_so, m->rm_eo); + return 1; + } + PRINTF(" (%d %d %d)", j, m->rm_so, m->rm_eo); + } + } + + else if (v) + { + char buffer[256]; + (void)regerror(rc, &re, buffer, sizeof(buffer)); + PRINTF(": %s (expected)", buffer); + } + + PRINTF("\n"); + } + + regfree(&re); + } + +PRINTF("End of test\n"); +return 0; +} + +/* End of pcre2posix_test.c */ diff --git a/src/pcre2test.c b/src/pcre2test.c new file mode 100644 index 0000000..3790345 --- /dev/null +++ b/src/pcre2test.c @@ -0,0 +1,9708 @@ +/************************************************* +* PCRE2 testing program * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. In 2014 +the API was completely revised and '2' was added to the name, because the old +API, which had lasted for 16 years, could not accommodate new requirements. At +the same time, this testing program was re-designed because its original +hacked-up (non-) design had also run out of steam. + + Written by Philip Hazel + Original code Copyright (c) 1997-2012 University of Cambridge + Rewritten code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2 +libraries in a single program, though its input and output are always 8-bit. +It is different from modules such as pcre2_compile.c in the library itself, +which are compiled separately for each code unit width. If two widths are +enabled, for example, pcre2_compile.c is compiled twice. In contrast, +pcre2test.c is compiled only once, and linked with all the enabled libraries. +Therefore, it must not make use of any of the macros from pcre2.h or +pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make +use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that +it references only the enabled library functions. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if defined NATIVE_ZOS +#include "pcrzoscs.h" +/* That header is not included in the main PCRE2 distribution because other +apparatus is needed to compile pcre2test for z/OS. The header can be found in +the special z/OS distribution, which is available from www.zaconsultants.net or +from www.cbttape.org. */ +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +/* Debugging code enabler */ + +/* #define DEBUG_SHOW_MALLOC_ADDRESSES */ + +/* Both libreadline and libedit are optionally supported */ +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +#if defined(SUPPORT_LIBREADLINE) +#include +#include +#else +#if defined(HAVE_EDITLINE_READLINE_H) +#include +#elif defined(HAVE_EDIT_READLINE_READLINE_H) +#include +#else +#include +/* GNU readline defines this macro but libedit doesn't, if that ever changes +this needs to be updated or the build could break */ +#ifdef RL_VERSION_MAJOR +#include +#endif +#endif +#endif +#endif + +/* Put the test for interactive input into a macro so that it can be changed if +required for different environments. */ + +#define INTERACTIVE(f) isatty(fileno(f)) + + +/* ---------------------- System-specific definitions ---------------------- */ + +/* A number of things vary for Windows builds. Originally, pcretest opened its +input and output without "b"; then I was told that "b" was needed in some +environments, so it was added for release 5.0 to both the input and output. (It +makes no difference on Unix-like systems.) Later I was told that it is wrong +for the input on Windows. I've now abstracted the modes into macros that are +set here, to make it easier to fiddle with them, and removed "b" from the input +mode under Windows. The BINARY versions are used when saving/restoring compiled +patterns. */ + +#if defined(_WIN32) || defined(WIN32) +#include /* For _setmode() */ +#include /* For _O_BINARY */ +#define INPUT_MODE "r" +#define OUTPUT_MODE "wb" +#define BINARY_INPUT_MODE "rb" +#define BINARY_OUTPUT_MODE "wb" + +#ifndef isatty +#define isatty _isatty /* This is what Windows calls them, I'm told, */ +#endif /* though in some environments they seem to */ + /* be already defined, hence the #ifndefs. */ +#ifndef fileno +#define fileno _fileno +#endif + +/* A user sent this fix for Borland Builder 5 under Windows. */ + +#ifdef __BORLANDC__ +#define _setmode(handle, mode) setmode(handle, mode) +#endif + +/* Not Windows */ + +#else +#include /* These two includes are needed */ +#include /* for setrlimit(). */ +#if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ +#define INPUT_MODE "r" +#define OUTPUT_MODE "w" +#define BINARY_INPUT_MODE "rb" +#define BINARY_OUTPUT_MODE "wb" +#else +#define INPUT_MODE "rb" +#define OUTPUT_MODE "wb" +#define BINARY_INPUT_MODE "rb" +#define BINARY_OUTPUT_MODE "wb" +#endif +#endif + +/* VMS-specific code was included as suggested by a VMS user [1]. Another VMS +user [2] provided alternative code which worked better for him. I have +commented out the original, but kept it around just in case. */ + +#ifdef __VMS +#include +/* These two includes came from [2]. */ +#include descrip +#include lib$routines +/* void vms_setsymbol( char *, char *, int ); Original code from [1]. */ +#endif + +/* old VC and older compilers don't support %td or %zu, and even some that +claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */ + +#if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ + (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L))) +#ifdef _WIN64 +#define PTR_FORM "lld" +#define SIZ_FORM "llu" +#else +#define PTR_FORM "ld" +#define SIZ_FORM "lu" +#endif +#else +#define PTR_FORM "td" +#define SIZ_FORM "zu" +#endif + +/* ------------------End of system-specific definitions -------------------- */ + +/* Glueing macros that are used in several places below. */ + +#define glue(a,b) a##b +#define G(a,b) glue(a,b) + +/* Miscellaneous parameters and manifests */ + +#ifndef CLOCKS_PER_SEC +#ifdef CLK_TCK +#define CLOCKS_PER_SEC CLK_TCK +#else +#define CLOCKS_PER_SEC 100 +#endif +#endif + +#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */ +#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */ +#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ +#define DEFAULT_OVECCOUNT 15 /* Default ovector count */ +#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ +#define LOCALESIZE 32 /* Size of locale name */ +#define LOOPREPEAT 500000 /* Default loop count for timing */ +#define MALLOCLISTSIZE 20 /* For remembering mallocs */ +#define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */ +#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */ +#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */ +#define VERSION_SIZE 64 /* Size of buffer for the version strings */ + +/* Default JIT compile options */ + +#define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\ + PCRE2_JIT_PARTIAL_SOFT|\ + PCRE2_JIT_PARTIAL_HARD) + +/* Make sure the buffer into which replacement strings are copied is big enough +to hold them as 32-bit code units. */ + +#define REPLACE_BUFFSIZE 1024 /* This is a byte value */ + +/* Execution modes */ + +#define PCRE8_MODE 8 +#define PCRE16_MODE 16 +#define PCRE32_MODE 32 + +/* Processing returns */ + +enum { PR_OK, PR_SKIP, PR_ABEND }; + +/* The macro PRINTABLE determines whether to print an output character as-is or +as a hex value when showing compiled patterns. is We use it in cases when the +locale has not been explicitly changed, so as to get consistent output from +systems that differ in their output from isprint() even in the "C" locale. */ + +#ifdef EBCDIC +#define PRINTABLE(c) ((c) >= 64 && (c) < 255) +#else +#define PRINTABLE(c) ((c) >= 32 && (c) < 127) +#endif + +#define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c)) + +/* We have to include some of the library source files because we need +to use some of the macros, internal structure definitions, and other internal +values - pcre2test has "inside information" compared to an application program +that strictly follows the PCRE2 API. + +Before including pcre2_internal.h we define PRIV so that it does not get +defined therein. This ensures that PRIV names in the included files do not +clash with those in the libraries. Also, although pcre2_internal.h does itself +include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h, +so that the PCRE2_EXP_xxx macros get set appropriately for an application, not +for building the library. + +Setting PCRE2_CODE_UNIT_WIDTH to zero cuts out all the width-specific settings +in pcre2.h and pcre2_internal.h. Defining PCRE2_BUILDING_PCRE2TEST cuts out the +check in pcre2_internal.h that ensures PCRE2_CODE_UNIT_WIDTH is 8, 16, or 32 +(which it needs to be when compiling one of the libraries). */ + +#define PRIV(name) name +#define PCRE2_CODE_UNIT_WIDTH 0 +#define PCRE2_BUILDING_PCRE2TEST +#include "pcre2.h" +#include "pcre2posix.h" +#include "pcre2_internal.h" + +/* We need access to some of the data tables that PCRE2 uses. Defining +PCRE2_PCRE2TEST makes some minor changes in the files. The previous definition +of PRIV avoids name clashes. */ + +#define PCRE2_PCRE2TEST +#include "pcre2_tables.c" +#include "pcre2_ucd.c" + +/* 32-bit integer values in the input are read by strtoul() or strtol(). The +check needed for overflow depends on whether long ints are in fact longer than +ints. They are defined not to be shorter. */ + +#if ULONG_MAX > UINT32_MAX +#define U32OVERFLOW(x) (x > UINT32_MAX) +#else +#define U32OVERFLOW(x) (x == UINT32_MAX) +#endif + +#if LONG_MAX > INT32_MAX +#define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN) +#else +#define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN) +#endif + +/* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include +pcre2_intmodedep.h, which is where mode-dependent macros and structures are +defined. We can now include it for each supported code unit width. Because +PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will +have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately +while including these files, and then restore it to a no-op. Because LINK_SIZE +may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of +these inclusions should not be changed. */ + +#undef PCRE2_SUFFIX +#undef PCRE2_CODE_UNIT_WIDTH + +#ifdef SUPPORT_PCRE2_8 +#define PCRE2_CODE_UNIT_WIDTH 8 +#define PCRE2_SUFFIX(a) G(a,8) +#include "pcre2_intmodedep.h" +#include "pcre2_printint.c" +#undef PCRE2_CODE_UNIT_WIDTH +#undef PCRE2_SUFFIX +#endif /* SUPPORT_PCRE2_8 */ + +#ifdef SUPPORT_PCRE2_16 +#define PCRE2_CODE_UNIT_WIDTH 16 +#define PCRE2_SUFFIX(a) G(a,16) +#include "pcre2_intmodedep.h" +#include "pcre2_printint.c" +#undef PCRE2_CODE_UNIT_WIDTH +#undef PCRE2_SUFFIX +#endif /* SUPPORT_PCRE2_16 */ + +#ifdef SUPPORT_PCRE2_32 +#define PCRE2_CODE_UNIT_WIDTH 32 +#define PCRE2_SUFFIX(a) G(a,32) +#include "pcre2_intmodedep.h" +#include "pcre2_printint.c" +#undef PCRE2_CODE_UNIT_WIDTH +#undef PCRE2_SUFFIX +#endif /* SUPPORT_PCRE2_32 */ + +#define PCRE2_SUFFIX(a) a + +#include "pcre2_chkdint.c" + +/* We need to be able to check input text for UTF-8 validity, whatever code +widths are actually available, because the input to pcre2test is always in +8-bit code units. So we include the UTF validity checking function for 8-bit +code units. */ + +extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *); + +#define PCRE2_CODE_UNIT_WIDTH 8 +#undef PCRE2_SPTR +#define PCRE2_SPTR PCRE2_SPTR8 +#include "pcre2_valid_utf.c" +#undef PCRE2_CODE_UNIT_WIDTH +#undef PCRE2_SPTR + +/* If we have 8-bit support, default to it; if there is also 16-or 32-bit +support, it can be selected by a command-line option. If there is no 8-bit +support, there must be 16-bit or 32-bit support, so default to one of them. The +config function, JIT stack, contexts, and version string are the same in all +modes, so use the form of the first that is available. */ + +#if defined SUPPORT_PCRE2_8 +#define DEFAULT_TEST_MODE PCRE8_MODE +#define VERSION_TYPE PCRE2_UCHAR8 +#define PCRE2_CONFIG pcre2_config_8 +#define PCRE2_JIT_STACK pcre2_jit_stack_8 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8 +#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8 + +#elif defined SUPPORT_PCRE2_16 +#define DEFAULT_TEST_MODE PCRE16_MODE +#define VERSION_TYPE PCRE2_UCHAR16 +#define PCRE2_CONFIG pcre2_config_16 +#define PCRE2_JIT_STACK pcre2_jit_stack_16 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16 +#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16 + +#elif defined SUPPORT_PCRE2_32 +#define DEFAULT_TEST_MODE PCRE32_MODE +#define VERSION_TYPE PCRE2_UCHAR32 +#define PCRE2_CONFIG pcre2_config_32 +#define PCRE2_JIT_STACK pcre2_jit_stack_32 +#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32 +#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32 +#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32 +#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32 +#endif + +/* ------------- Structure and table for handling #-commands ------------- */ + +typedef struct cmdstruct { + const char *name; + int value; +} cmdstruct; + +enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT, + CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, + CMD_UNKNOWN }; + +static cmdstruct cmdlist[] = { + { "forbid_utf", CMD_FORBID_UTF }, + { "load", CMD_LOAD }, + { "loadtables", CMD_LOADTABLES }, + { "newline_default", CMD_NEWLINE_DEFAULT }, + { "pattern", CMD_PATTERN }, + { "perltest", CMD_PERLTEST }, + { "pop", CMD_POP }, + { "popcopy", CMD_POPCOPY }, + { "save", CMD_SAVE }, + { "subject", CMD_SUBJECT }}; + +#define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct)) + +/* ------------- Structures and tables for handling modifiers -------------- */ + +/* Table of names for newline types. Must be kept in step with the definitions +of PCRE2_NEWLINE_xx in pcre2.h. */ + +static const char *newlines[] = { + "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" }; + +/* Structure and table for handling pattern conversion types. */ + +typedef struct convertstruct { + const char *name; + uint32_t option; +} convertstruct; + +static convertstruct convertlist[] = { + { "glob", PCRE2_CONVERT_GLOB }, + { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR }, + { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR }, + { "posix_basic", PCRE2_CONVERT_POSIX_BASIC }, + { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED }, + { "unset", CONVERT_UNSET }}; + +#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct)) + +/* Modifier types and applicability */ + +enum { MOD_CTC, /* Applies to a compile context */ + MOD_CTM, /* Applies to a match context */ + MOD_PAT, /* Applies to a pattern */ + MOD_PATP, /* Ditto, OK for Perl test */ + MOD_DAT, /* Applies to a data line */ + MOD_DATP, /* Ditto, OK for Perl test */ + MOD_PD, /* Applies to a pattern or a data line */ + MOD_PDP, /* As MOD_PD, OK for Perl test */ + MOD_PND, /* As MOD_PD, but not for a default pattern */ + MOD_PNDP, /* As MOD_PND, OK for Perl test */ + MOD_CHR, /* Is a single character */ + MOD_CON, /* Is a "convert" type/options list */ + MOD_CTL, /* Is a control bit */ + MOD_BSR, /* Is a BSR value */ + MOD_IN2, /* Is one or two unsigned integers */ + MOD_INS, /* Is a signed integer */ + MOD_INT, /* Is an unsigned integer */ + MOD_IND, /* Is an unsigned integer, but no value => default */ + MOD_NL, /* Is a newline value */ + MOD_NN, /* Is a number or a name; more than one may occur */ + MOD_OPT, /* Is an option bit */ + MOD_SIZ, /* Is a PCRE2_SIZE value */ + MOD_STR }; /* Is a string */ + +/* Control bits. Some apply to compiling, some to matching, but some can be set +either on a pattern or a data line, so they must all be distinct. There are now +so many of them that they are split into two fields. */ + +#define CTL_AFTERTEXT 0x00000001u +#define CTL_ALLAFTERTEXT 0x00000002u +#define CTL_ALLCAPTURES 0x00000004u +#define CTL_ALLUSEDTEXT 0x00000008u +#define CTL_ALTGLOBAL 0x00000010u +#define CTL_BINCODE 0x00000020u +#define CTL_CALLOUT_CAPTURE 0x00000040u +#define CTL_CALLOUT_INFO 0x00000080u +#define CTL_CALLOUT_NONE 0x00000100u +#define CTL_DFA 0x00000200u +#define CTL_EXPAND 0x00000400u +#define CTL_FINDLIMITS 0x00000800u +#define CTL_FINDLIMITS_NOHEAP 0x00001000u +#define CTL_FULLBINCODE 0x00002000u +#define CTL_GETALL 0x00004000u +#define CTL_GLOBAL 0x00008000u +#define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */ +#define CTL_INFO 0x00020000u +#define CTL_JITFAST 0x00040000u +#define CTL_JITVERIFY 0x00080000u +#define CTL_MARK 0x00100000u +#define CTL_MEMORY 0x00200000u +#define CTL_NULLCONTEXT 0x00400000u +#define CTL_POSIX 0x00800000u +#define CTL_POSIX_NOSUB 0x01000000u +#define CTL_PUSH 0x02000000u /* These three must be */ +#define CTL_PUSHCOPY 0x04000000u /* all in the same */ +#define CTL_PUSHTABLESCOPY 0x08000000u /* word. */ +#define CTL_STARTCHAR 0x10000000u +#define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */ +#define CTL_UTF8_INPUT 0x40000000u +#define CTL_ZERO_TERMINATE 0x80000000u + +/* Combinations */ + +#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ +#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO) +#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL) + +/* Second control word */ + +#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u +#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u +#define CTL2_SUBSTITUTE_LITERAL 0x00000004u +#define CTL2_SUBSTITUTE_MATCHED 0x00000008u +#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u +#define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u +#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u +#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u +#define CTL2_SUBJECT_LITERAL 0x00000100u +#define CTL2_CALLOUT_NO_WHERE 0x00000200u +#define CTL2_CALLOUT_EXTRA 0x00000400u +#define CTL2_ALLVECTOR 0x00000800u +#define CTL2_NULL_PATTERN 0x00001000u +#define CTL2_NULL_SUBJECT 0x00002000u +#define CTL2_NULL_REPLACEMENT 0x00004000u +#define CTL2_FRAMESIZE 0x00008000u + +#define CTL2_HEAPFRAMES_SIZE 0x20000000u /* Informational */ +#define CTL2_NL_SET 0x40000000u /* Informational */ +#define CTL2_BSR_SET 0x80000000u /* Informational */ + +/* These are the matching controls that may be set either on a pattern or on a +data line. They are copied from the pattern controls as initial settings for +data line controls. Note that CTL_MEMORY is not included here, because it does +different things in the two cases. */ + +#define CTL_ALLPD (CTL_AFTERTEXT|\ + CTL_ALLAFTERTEXT|\ + CTL_ALLCAPTURES|\ + CTL_ALLUSEDTEXT|\ + CTL_ALTGLOBAL|\ + CTL_GLOBAL|\ + CTL_MARK|\ + CTL_STARTCHAR|\ + CTL_UTF8_INPUT) + +#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\ + CTL2_SUBSTITUTE_EXTENDED|\ + CTL2_SUBSTITUTE_LITERAL|\ + CTL2_SUBSTITUTE_MATCHED|\ + CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\ + CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\ + CTL2_SUBSTITUTE_UNKNOWN_UNSET|\ + CTL2_SUBSTITUTE_UNSET_EMPTY|\ + CTL2_ALLVECTOR|\ + CTL2_HEAPFRAMES_SIZE) + +/* Structures for holding modifier information for patterns and subject strings +(data). Fields containing modifiers that can be set either for a pattern or a +subject must be at the start and in the same order in both cases so that the +same offset in the big table below works for both. */ + +typedef struct patctl { /* Structure for pattern modifiers. */ + uint32_t options; /* Must be in same position as datctl */ + uint32_t control; /* Must be in same position as datctl */ + uint32_t control2; /* Must be in same position as datctl */ + uint32_t jitstack; /* Must be in same position as datctl */ + uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint32_t substitute_skip; /* Must be in same position as patctl */ + uint32_t substitute_stop; /* Must be in same position as patctl */ + uint32_t jit; + uint32_t stackguard_test; + uint32_t tables_id; + uint32_t convert_type; + uint32_t convert_length; + uint32_t convert_glob_escape; + uint32_t convert_glob_separator; + uint32_t regerror_buffsize; + uint8_t locale[LOCALESIZE]; +} patctl; + +#define MAXCPYGET 10 +#define LENCPYGET 64 + +typedef struct datctl { /* Structure for data line modifiers. */ + uint32_t options; /* Must be in same position as patctl */ + uint32_t control; /* Must be in same position as patctl */ + uint32_t control2; /* Must be in same position as patctl */ + uint32_t jitstack; /* Must be in same position as patctl */ + uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint32_t substitute_skip; /* Must be in same position as patctl */ + uint32_t substitute_stop; /* Must be in same position as patctl */ + uint32_t startend[2]; + uint32_t cerror[2]; + uint32_t cfail[2]; + int32_t callout_data; + int32_t copy_numbers[MAXCPYGET]; + int32_t get_numbers[MAXCPYGET]; + uint32_t oveccount; + uint32_t offset; + uint8_t copy_names[LENCPYGET]; + uint8_t get_names[LENCPYGET]; +} datctl; + +/* Ids for which context to modify. */ + +enum { CTX_PAT, /* Active pattern context */ + CTX_POPPAT, /* Ditto, for a popped pattern */ + CTX_DEFPAT, /* Default pattern context */ + CTX_DAT, /* Active data (match) context */ + CTX_DEFDAT }; /* Default data (match) context */ + +/* Macros to simplify the big table below. */ + +#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name) +#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name) +#define PO(name) offsetof(patctl, name) +#define PD(name) PO(name) +#define DO(name) offsetof(datctl, name) + +/* Table of all long-form modifiers. Must be in collating sequence of modifier +name because it is searched by binary chop. */ + +typedef struct modstruct { + const char *name; + uint16_t which; + uint16_t type; + uint32_t value; + PCRE2_SIZE offset; +} modstruct; + +#define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \ + PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX) + +static modstruct modlist[] = { + { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, + { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, + { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, + { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, + { "allow_lookaround_bsk", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) }, + { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) }, + { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) }, + { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) }, + { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, + { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, + { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, + { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, + { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, + { "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) }, + { "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) }, + { "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) }, + { "ascii_bsw", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSW, CO(extra_options) }, + { "ascii_digit", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT, CO(extra_options) }, + { "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) }, + { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, + { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, + { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, + { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, + { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, + { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, + { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) }, + { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) }, + { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, + { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, + { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) }, + { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, + { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, + { "caseless_restrict", MOD_CTC, MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) }, + { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, + { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) }, + { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, + { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) }, + { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, + { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) }, + { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, + { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, + { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, + { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, + { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, + { "disable_recurseloop_check", MOD_DAT, MOD_OPT, PCRE2_DISABLE_RECURSELOOP_CHECK, DO(options) }, + { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, + { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, + { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, + { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) }, + { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) }, + { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) }, + { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, + { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) }, + { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) }, + { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, + { "find_limits_noheap", MOD_DAT, MOD_CTL, CTL_FINDLIMITS_NOHEAP, DO(control) }, + { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, + { "framesize", MOD_PAT, MOD_CTL, CTL2_FRAMESIZE, PO(control2) }, + { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) }, + { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, + { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, + { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, + { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) }, + { "heapframes_size", MOD_PND, MOD_CTL, CTL2_HEAPFRAMES_SIZE, PO(control2) }, + { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, + { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, + { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, + { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, + { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) }, + { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, + { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) }, + { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, + { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, + { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) }, + { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, + { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) }, + { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, + { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) }, + { "max_pattern_compiled_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_compiled_length) }, + { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) }, + { "max_varlookbehind", MOD_CTC, MOD_INT, 0, CO(max_varlookbehind) }, + { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, + { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, + { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, + { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, + { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, + { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, + { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, + { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, + { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, + { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) }, + { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, + { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, + { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, + { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, + { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, + { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, + { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, + { "null_pattern", MOD_PAT, MOD_CTL, CTL2_NULL_PATTERN, PO(control2) }, + { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) }, + { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) }, + { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, + { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, + { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, + { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, + { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) }, + { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) }, + { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, + { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, + { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) }, + { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */ + { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, + { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, + { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, + { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, + { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) }, + { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) }, + { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, + { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) }, + { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) }, + { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) }, + { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) }, + { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) }, + { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) }, + { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, + { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, + { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, + { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, + { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, + { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) }, + { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) }, + { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, + { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) }, + { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } +}; + +#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) + +/* Controls and options that are supported for use with the POSIX interface. */ + +#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \ + PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \ + PCRE2_UTF|PCRE2_UNGREEDY) + +#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0) + +#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \ + CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \ + CTL_POSIX_NOSUB|CTL_USE_LENGTH) + +#define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0) + +#define POSIX_SUPPORTED_MATCH_OPTIONS ( \ + PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) + +#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) +#define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT) + +/* Control bits that are not ignored with 'push'. */ + +#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \ + CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \ + CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY| \ + CTL_PUSHTABLESCOPY|CTL_USE_LENGTH) + +#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET| \ + CTL2_HEAPFRAMES_SIZE|CTL2_FRAMESIZE|CTL2_NL_SET) + +/* Controls that apply only at compile time with 'push'. */ + +#define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY +#define PUSH_COMPILE_ONLY_CONTROLS2 (0) + +/* Controls that are forbidden with #pop or #popcopy. */ + +#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \ + CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH) + +/* Pattern controls that are mutually exclusive. At present these are all in +the first control word. Note that CTL_POSIX_NOSUB is always accompanied by +CTL_POSIX, so it doesn't need its own entries. */ + +static uint32_t exclusive_pat_controls[] = { + CTL_POSIX | CTL_PUSH, + CTL_POSIX | CTL_PUSHCOPY, + CTL_POSIX | CTL_PUSHTABLESCOPY, + CTL_PUSH | CTL_PUSHCOPY, + CTL_PUSH | CTL_PUSHTABLESCOPY, + CTL_PUSHCOPY | CTL_PUSHTABLESCOPY, + CTL_EXPAND | CTL_HEXPAT }; + +/* Data controls that are mutually exclusive. At present these are all in the +first control word. */ + +static uint32_t exclusive_dat_controls[] = { + CTL_ALLUSEDTEXT | CTL_STARTCHAR, + CTL_FINDLIMITS | CTL_NULLCONTEXT, + CTL_FINDLIMITS_NOHEAP | CTL_NULLCONTEXT }; + +/* Table of single-character abbreviated modifiers. The index field is +initialized to -1, but the first time the modifier is encountered, it is filled +in with the index of the full entry in modlist, to save repeated searching when +processing multiple test items. This short list is searched serially, so its +order does not matter. */ + +typedef struct c1modstruct { + const char *fullname; + uint32_t onechar; + int index; +} c1modstruct; + +static c1modstruct c1modlist[] = { + { "bincode", 'B', -1 }, + { "info", 'I', -1 }, + { "ascii_all", 'a', -1 }, + { "global", 'g', -1 }, + { "caseless", 'i', -1 }, + { "multiline", 'm', -1 }, + { "no_auto_capture", 'n', -1 }, + { "caseless_restrict", 'r', -1 }, + { "dotall", 's', -1 }, + { "extended", 'x', -1 } +}; + +#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) + +/* Table of arguments for the -C command line option. Use macros to make the +table itself easier to read. */ + +#if defined SUPPORT_PCRE2_8 +#define SUPPORT_8 1 +#endif +#if defined SUPPORT_PCRE2_16 +#define SUPPORT_16 1 +#endif +#if defined SUPPORT_PCRE2_32 +#define SUPPORT_32 1 +#endif + +#ifndef SUPPORT_8 +#define SUPPORT_8 0 +#endif +#ifndef SUPPORT_16 +#define SUPPORT_16 0 +#endif +#ifndef SUPPORT_32 +#define SUPPORT_32 0 +#endif + +#ifdef EBCDIC +#define SUPPORT_EBCDIC 1 +#define EBCDIC_NL CHAR_LF +#else +#define SUPPORT_EBCDIC 0 +#define EBCDIC_NL 0 +#endif + +#ifdef NEVER_BACKSLASH_C +#define BACKSLASH_C 0 +#else +#define BACKSLASH_C 1 +#endif + +typedef struct coptstruct { + const char *name; + uint32_t type; + uint32_t value; +} coptstruct; + +enum { CONF_BSR, + CONF_FIX, + CONF_FIZ, + CONF_INT, + CONF_NL +}; + +static coptstruct coptlist[] = { + { "backslash-C", CONF_FIX, BACKSLASH_C }, + { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, + { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, + { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, + { "jit", CONF_INT, PCRE2_CONFIG_JIT }, + { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, + { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, + { "pcre2-16", CONF_FIX, SUPPORT_16 }, + { "pcre2-32", CONF_FIX, SUPPORT_32 }, + { "pcre2-8", CONF_FIX, SUPPORT_8 }, + { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE } +}; + +#define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) + +#undef SUPPORT_8 +#undef SUPPORT_16 +#undef SUPPORT_32 +#undef SUPPORT_EBCDIC + + +/* ----------------------- Static variables ------------------------ */ + +static FILE *infile; +static FILE *outfile; + +static const void *last_callout_mark; +static PCRE2_JIT_STACK *jit_stack = NULL; +static size_t jit_stack_size = 0; + +static BOOL first_callout; +static BOOL jit_was_used; +static BOOL restrict_for_perl_test = FALSE; +static BOOL show_memory = FALSE; + +static int jitrc; /* Return from JIT compile */ +static int test_mode = DEFAULT_TEST_MODE; +static int timeit = 0; +static int timeitm = 0; + +clock_t total_compile_time = 0; +clock_t total_jit_compile_time = 0; +clock_t total_match_time = 0; + +static uint32_t code_unit_size; /* Bytes */ +static uint32_t dfa_matched; +static uint32_t forbid_utf = 0; +static uint32_t maxlookbehind; +static uint32_t max_oveccount; +static uint32_t callout_count; +static uint32_t maxcapcount; + +static uint16_t local_newline_default = 0; + +static VERSION_TYPE jittarget[VERSION_SIZE]; +static VERSION_TYPE version[VERSION_SIZE]; +static VERSION_TYPE uversion[VERSION_SIZE]; + +static patctl def_patctl; +static patctl pat_patctl; +static datctl def_datctl; +static datctl dat_datctl; + +static void *patstack[PATSTACKSIZE]; +static int patstacknext = 0; + +static void *malloclist[MALLOCLISTSIZE]; +static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE]; +static uint32_t malloclistptr = 0; + +#ifdef SUPPORT_PCRE2_8 +static regex_t preg = { NULL, NULL, 0, 0, 0, 0 }; +#endif + +static int *dfa_workspace = NULL; +static const uint8_t *locale_tables = NULL; +static const uint8_t *use_tables = NULL; +static uint8_t locale_name[32]; +static uint8_t *tables3 = NULL; /* For binary-loaded tables */ +static uint32_t loadtables_length = 0; + +/* We need buffers for building 16/32-bit strings; 8-bit strings don't need +rebuilding, but set up the same naming scheme for use in macros. The "buffer" +buffer is where all input lines are read. Its size is the same as pbuffer8. +Pattern lines are always copied to pbuffer8 for use in callouts, even if they +are actually compiled from pbuffer16 or pbuffer32. */ + +static size_t pbuffer8_size = 50000; /* Initial size, bytes */ +static uint8_t *pbuffer8 = NULL; +static uint8_t *buffer = NULL; + +/* The dbuffer is where all processed data lines are put. In non-8-bit modes it +is cast as needed. For long data lines it grows as necessary. */ + +static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */ +static uint8_t *dbuffer = NULL; + + +/* ---------------- Mode-dependent variables -------------------*/ + +#ifdef SUPPORT_PCRE2_8 +static pcre2_code_8 *compiled_code8; +static pcre2_general_context_8 *general_context8, *general_context_copy8; +static pcre2_compile_context_8 *pat_context8, *default_pat_context8; +static pcre2_convert_context_8 *con_context8, *default_con_context8; +static pcre2_match_context_8 *dat_context8, *default_dat_context8; +static pcre2_match_data_8 *match_data8; +#endif + +#ifdef SUPPORT_PCRE2_16 +static pcre2_code_16 *compiled_code16; +static pcre2_general_context_16 *general_context16, *general_context_copy16; +static pcre2_compile_context_16 *pat_context16, *default_pat_context16; +static pcre2_convert_context_16 *con_context16, *default_con_context16; +static pcre2_match_context_16 *dat_context16, *default_dat_context16; +static pcre2_match_data_16 *match_data16; +static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */ +static uint16_t *pbuffer16 = NULL; +#endif + +#ifdef SUPPORT_PCRE2_32 +static pcre2_code_32 *compiled_code32; +static pcre2_general_context_32 *general_context32, *general_context_copy32; +static pcre2_compile_context_32 *pat_context32, *default_pat_context32; +static pcre2_convert_context_32 *con_context32, *default_con_context32; +static pcre2_match_context_32 *dat_context32, *default_dat_context32; +static pcre2_match_data_32 *match_data32; +static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */ +static uint32_t *pbuffer32 = NULL; +#endif + + +/* ---------------- Macros that work in all modes ----------------- */ + +#define CAST8VAR(x) CASTVAR(uint8_t *, x) +#define SET(x,y) SETOP(x,y,=) +#define SETPLUS(x,y) SETOP(x,y,+=) +#define strlen8(x) strlen((char *)x) + + +/* ---------------- Mode-dependent, runtime-testing macros ------------------*/ + +/* Define macros for variables and functions that must be selected dynamically +depending on the mode setting (8, 16, 32). These are dependent on which modes +are supported. */ + +#if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \ + defined (SUPPORT_PCRE2_32)) >= 2 + +/* ----- All three modes supported ----- */ + +#if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32) + +#define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \ + (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b)) + +#define CASTVAR(t,x) ( \ + (test_mode == PCRE8_MODE)? (t)G(x,8) : \ + (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32)) + +#define CODE_UNIT(a,b) ( \ + (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \ + (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \ + (uint32_t)(((PCRE2_SPTR32)(a))[b])) + +#define CONCTXCPY(a,b) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \ + else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32)) + +#define CONVERT_COPY(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),(char *)b,c); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),(char *)b,(c)*2); \ + else if (test_mode == PCRE32_MODE) \ + memcpy(G(a,32),(char *)b,(c)*4) + +#define DATCTXCPY(a,b) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \ + else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) + +#define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \ + (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b) + +#define PATCTXCPY(a,b) \ + if (test_mode == PCRE8_MODE) \ + memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \ + else if (test_mode == PCRE16_MODE) \ + memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \ + else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) + +#define PCHARS(lv, p, offset, len, utf, f) \ + if (test_mode == PCRE32_MODE) \ + lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ + else if (test_mode == PCRE16_MODE) \ + lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ + else \ + lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) + +#define PCHARSV(p, offset, len, utf, f) \ + if (test_mode == PCRE32_MODE) \ + (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ + else if (test_mode == PCRE16_MODE) \ + (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ + else \ + (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) + +#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_callout_enumerate_8(compiled_code8, \ + (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_callout_enumerate_16(compiled_code16, \ + (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \ + else \ + a = pcre2_callout_enumerate_32(compiled_code32, \ + (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) + +#define PCRE2_CODE_COPY_FROM_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_code_copy_8(b); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_code_copy_16(b); \ + else \ + G(a,32) = pcre2_code_copy_32(b) + +#define PCRE2_CODE_COPY_TO_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = (void *)pcre2_code_copy_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = (void *)pcre2_code_copy_16(G(b,16)); \ + else \ + a = (void *)pcre2_code_copy_32(G(b,32)) + +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \ + else \ + a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) + +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_compile_8(b,c,d,e,f,g); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_compile_16(b,c,d,e,f,g); \ + else \ + G(a,32) = pcre2_compile_32(b,c,d,e,f,g) + +#define PCRE2_CONVERTED_PATTERN_FREE(a) \ + if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \ + else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \ + else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a) + +#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \ + else \ + a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) + +#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \ + else \ + r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4)) + +#define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_get_match_data_heapframes_size_8(G(a,8)); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_get_match_data_heapframes_size_16(G(a,16)); \ + else \ + r = pcre2_get_match_data_heapframes_size_32(G(a,32)) + +#define PCRE2_GET_OVECTOR_COUNT(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_get_ovector_count_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_get_ovector_count_16(G(b,16)); \ + else \ + a = pcre2_get_ovector_count_32(G(b,32)) + +#define PCRE2_GET_STARTCHAR(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_get_startchar_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_get_startchar_16(G(b,16)); \ + else \ + a = pcre2_get_startchar_32(G(b,32)) + +#define PCRE2_JIT_COMPILE(r,a,b) \ + if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \ + else r = pcre2_jit_compile_32(G(a,32),b) + +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ + if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \ + else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \ + else pcre2_jit_free_unused_memory_32(G(a,32)) + +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ + else \ + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) + +#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \ + else \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); + +#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \ + else \ + pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); + +#define PCRE2_JIT_STACK_FREE(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \ + else \ + pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); + +#define PCRE2_MAKETABLES(a,c) \ + if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(G(c,8)); \ + else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(G(c,16)); \ + else a = pcre2_maketables_32(G(c,32)) + +#define PCRE2_MAKETABLES_FREE(c,a) \ + if (test_mode == PCRE8_MODE) pcre2_maketables_free_8(G(c,8),a); \ + else if (test_mode == PCRE16_MODE) pcre2_maketables_free_16(G(c,16),a); \ + else pcre2_maketables_free_32(G(c,32),a) + +#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ + else \ + a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) + +#define PCRE2_MATCH_DATA_CREATE(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_match_data_create_8(b,G(c,8)); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_match_data_create_16(b,G(c,16)); \ + else \ + G(a,32) = pcre2_match_data_create_32(b,G(c,32)) + +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8)); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16)); \ + else \ + G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32)) + +#define PCRE2_MATCH_DATA_FREE(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_match_data_free_8(G(a,8)); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_match_data_free_16(G(a,16)); \ + else \ + pcre2_match_data_free_32(G(a,32)) + +#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \ + else \ + a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32)) + +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_pattern_info_8(G(b,8),c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_pattern_info_16(G(b,16),c,d); \ + else \ + a = pcre2_pattern_info_32(G(b,32),c,d) + +#define PCRE2_PRINTINT(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_printint_8(compiled_code8,outfile,a); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_printint_16(compiled_code16,outfile,a); \ + else \ + pcre2_printint_32(compiled_code32,outfile,a) + +#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \ + else \ + r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) + +#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \ + else \ + r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) + +#define PCRE2_SERIALIZE_FREE(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_serialize_free_8(a); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_serialize_free_16(a); \ + else \ + pcre2_serialize_free_32(a) + +#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_serialize_get_number_of_codes_8(a); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_serialize_get_number_of_codes_16(a); \ + else \ + r = pcre2_serialize_get_number_of_codes_32(a); \ + +#define PCRE2_SET_CALLOUT(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \ + else \ + pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c); + +#define PCRE2_SET_CHARACTER_TABLES(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_character_tables_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_character_tables_16(G(a,16),b); \ + else \ + pcre2_set_character_tables_32(G(a,32),b) + +#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \ + else \ + pcre2_set_compile_recursion_guard_32(G(a,32),b,c) + +#define PCRE2_SET_DEPTH_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_depth_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_depth_limit_16(G(a,16),b); \ + else \ + pcre2_set_depth_limit_32(G(a,32),b) + +#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_set_glob_separator_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_set_glob_separator_16(G(a,16),b); \ + else \ + r = pcre2_set_glob_separator_32(G(a,32),b) + +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_set_glob_escape_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_set_glob_escape_16(G(a,16),b); \ + else \ + r = pcre2_set_glob_escape_32(G(a,32),b) + +#define PCRE2_SET_HEAP_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_heap_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_heap_limit_16(G(a,16),b); \ + else \ + pcre2_set_heap_limit_32(G(a,32),b) + +#define PCRE2_SET_MATCH_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_match_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_match_limit_16(G(a,16),b); \ + else \ + pcre2_set_match_limit_32(G(a,32),b) + +#define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_max_pattern_compiled_length_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_max_pattern_compiled_length_16(G(a,16),b); \ + else \ + pcre2_set_max_pattern_compiled_length_32(G(a,32),b) + +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_max_pattern_length_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_max_pattern_length_16(G(a,16),b); \ + else \ + pcre2_set_max_pattern_length_32(G(a,32),b) + +#define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_max_varlookbehind_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_max_varlookbehind_16(G(a,16),b); \ + else \ + pcre2_set_max_varlookbehind_32(G(a,32),b) + +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_offset_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_offset_limit_16(G(a,16),b); \ + else \ + pcre2_set_offset_limit_32(G(a,32),b) + +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_parens_nest_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_parens_nest_limit_16(G(a,16),b); \ + else \ + pcre2_set_parens_nest_limit_32(G(a,32),b) + +#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_substitute_callout_8(G(a,8), \ + (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_substitute_callout_16(G(a,16), \ + (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \ + else \ + pcre2_set_substitute_callout_32(G(a,32), \ + (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c) + +#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \ + (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \ + (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \ + else \ + a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \ + (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) + +#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \ + else \ + a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) + +#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \ + else \ + a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e) + +#define PCRE2_SUBSTRING_FREE(a) \ + if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \ + else pcre2_substring_free_32((PCRE2_UCHAR32 *)a) + +#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \ + else \ + a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) + +#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \ + else \ + a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) + +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \ + else \ + a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) + +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \ + else \ + a = pcre2_substring_length_bynumber_32(G(b,32),c,d) + +#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \ + else \ + a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) + +#define PCRE2_SUBSTRING_LIST_FREE(a) \ + if (test_mode == PCRE8_MODE) \ + pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a); \ + else \ + pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a) + +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \ + else \ + a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)) + +#define PTR(x) ( \ + (test_mode == PCRE8_MODE)? (void *)G(x,8) : \ + (test_mode == PCRE16_MODE)? (void *)G(x,16) : \ + (void *)G(x,32)) + +#define SETFLD(x,y,z) \ + if (test_mode == PCRE8_MODE) G(x,8)->y = z; \ + else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \ + else G(x,32)->y = z + +#define SETFLDVEC(x,y,v,z) \ + if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \ + else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \ + else G(x,32)->y[v] = z + +#define SETOP(x,y,z) \ + if (test_mode == PCRE8_MODE) G(x,8) z y; \ + else if (test_mode == PCRE16_MODE) G(x,16) z y; \ + else G(x,32) z y + +#define SETCASTPTR(x,y) \ + if (test_mode == PCRE8_MODE) \ + G(x,8) = (uint8_t *)(y); \ + else if (test_mode == PCRE16_MODE) \ + G(x,16) = (uint16_t *)(y); \ + else \ + G(x,32) = (uint32_t *)(y) + +#define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \ + (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \ + ((int)strlen32((PCRE2_SPTR32)p))) + +#define SUB1(a,b) \ + if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \ + else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \ + else G(a,32)(G(b,32)) + +#define SUB2(a,b,c) \ + if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \ + else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \ + else G(a,32)(G(b,32),G(c,32)) + +#define TEST(x,r,y) ( \ + (test_mode == PCRE8_MODE && G(x,8) r (y)) || \ + (test_mode == PCRE16_MODE && G(x,16) r (y)) || \ + (test_mode == PCRE32_MODE && G(x,32) r (y))) + +#define TESTFLD(x,f,r,y) ( \ + (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \ + (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \ + (test_mode == PCRE32_MODE && G(x,32)->f r (y))) + + +/* ----- Two out of three modes are supported ----- */ + +#else + +/* We can use some macro trickery to make a single set of definitions work in +the three different cases. */ + +/* ----- 32-bit and 16-bit but not 8-bit supported ----- */ + +#if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16) +#define BITONE 32 +#define BITTWO 16 + +/* ----- 32-bit and 8-bit but not 16-bit supported ----- */ + +#elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8) +#define BITONE 32 +#define BITTWO 8 + +/* ----- 16-bit and 8-bit but not 32-bit supported ----- */ + +#else +#define BITONE 16 +#define BITTWO 8 +#endif + + +/* ----- Common macros for two-mode cases ----- */ + +#define BYTEONE (BITONE/8) +#define BYTETWO (BITTWO/8) + +#define CASTFLD(t,a,b) \ + ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \ + (t)(G(a,BITTWO)->b)) + +#define CASTVAR(t,x) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE))? \ + (t)G(x,BITONE) : (t)G(x,BITTWO)) + +#define CODE_UNIT(a,b) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE))? \ + (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \ + (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b])) + +#define CONCTXCPY(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \ + else \ + memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO))) + +#define CONVERT_COPY(a,b,c) \ + (test_mode == G(G(PCRE,BITONE),_MODE))? \ + memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \ + memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO) + +#define DATCTXCPY(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \ + else \ + memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO))) + +#define FLD(a,b) \ + ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b) + +#define PATCTXCPY(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \ + else \ + memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO))) + +#define PCHARS(lv, p, offset, len, utf, f) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ + else \ + lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + +#define PCHARSV(p, offset, len, utf, f) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ + else \ + (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + +#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \ + (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \ + else \ + a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \ + (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c) + +#define PCRE2_CODE_COPY_FROM_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \ + else \ + G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b) + +#define PCRE2_CODE_COPY_TO_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \ + else \ + a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \ + else \ + a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_COMPILE(a,b,c,d,e,f,g) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_compile_,BITONE)(b,c,d,e,f,g); \ + else \ + G(a,BITTWO) = G(pcre2_compile_,BITTWO)(b,c,d,e,f,g) + +#define PCRE2_CONVERTED_PATTERN_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \ + else \ + G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a) + +#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ + G(g,BITONE),h,i,j); \ + else \ + a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ + G(g,BITTWO),h,i,j) + +#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \ + else \ + r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO)) + +#define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_get_match_data_heapframes_size_,BITONE)(G(a,BITONE)); \ + else \ + r = G(pcre2_get_match_data_heapframes_size_,BITTWO)(G(a,BITTWO)) + +#define PCRE2_GET_OVECTOR_COUNT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \ + else \ + a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_GET_STARTCHAR(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \ + else \ + a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_JIT_COMPILE(r,a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ + else \ + r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \ + else \ + G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO)) + +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ + G(g,BITONE),h); \ + else \ + a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ + G(g,BITTWO),h) + +#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \ + else \ + a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \ + +#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \ + else \ + G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c); + +#define PCRE2_JIT_STACK_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \ + else \ + G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a); + +#define PCRE2_MAKETABLES(a,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_maketables_,BITONE)(G(c,BITONE)); \ + else \ + a = G(pcre2_maketables_,BITTWO)(G(c,BITTWO)) + +#define PCRE2_MAKETABLES_FREE(c,a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_maketables_free_,BITONE)(G(c,BITONE),a); \ + else \ + G(pcre2_maketables_free_,BITTWO)(G(c,BITTWO),a) + +#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ + G(g,BITONE),h); \ + else \ + a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ + G(g,BITTWO),h) + +#define PCRE2_MATCH_DATA_CREATE(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,G(c,BITONE)); \ + else \ + G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,G(c,BITTWO)) + +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),G(c,BITONE)); \ + else \ + G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),G(c,BITTWO)) + +#define PCRE2_MATCH_DATA_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \ + else \ + G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO)) + +#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \ + else \ + a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO)) + +#define PCRE2_PATTERN_INFO(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \ + else \ + a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d) + +#define PCRE2_PRINTINT(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \ + else \ + G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a) + +#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \ + else \ + r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO)) + +#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \ + else \ + r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO)) + +#define PCRE2_SERIALIZE_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_serialize_free_,BITONE)(a); \ + else \ + G(pcre2_serialize_free_,BITTWO)(a) + +#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \ + else \ + r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a) + +#define PCRE2_SET_CALLOUT(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_callout_,BITONE)(G(a,BITONE), \ + (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \ + else \ + G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \ + (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c); + +#define PCRE2_SET_CHARACTER_TABLES(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \ + else \ + G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c) + +#define PCRE2_SET_DEPTH_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \ + else \ + r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \ + else \ + r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_HEAP_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_MATCH_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_max_pattern_compiled_length_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_max_pattern_compiled_length_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_max_varlookbehind_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_max_varlookbehind_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b) + +#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \ + (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \ + else \ + G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \ + (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c) + +#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ + G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \ + (G(PCRE2_UCHAR,BITONE) *)k,l); \ + else \ + a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ + G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \ + (G(PCRE2_UCHAR,BITTWO) *)k,l) + +#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ + (G(PCRE2_UCHAR,BITONE) *)d,e); \ + else \ + a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ + (G(PCRE2_UCHAR,BITTWO) *)d,e) + +#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\ + (G(PCRE2_UCHAR,BITONE) *)d,e); \ + else \ + a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\ + (G(PCRE2_UCHAR,BITTWO) *)d,e) + +#define PCRE2_SUBSTRING_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \ + else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a) + +#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ + (G(PCRE2_UCHAR,BITONE) **)d,e); \ + else \ + a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ + (G(PCRE2_UCHAR,BITTWO) **)d,e) + +#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\ + (G(PCRE2_UCHAR,BITONE) **)d,e); \ + else \ + a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\ + (G(PCRE2_UCHAR,BITTWO) **)d,e) + +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \ + else \ + a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d) + +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \ + else \ + a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d) + +#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \ + (G(PCRE2_UCHAR,BITONE) ***)c,d); \ + else \ + a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \ + (G(PCRE2_UCHAR,BITTWO) ***)c,d) + +#define PCRE2_SUBSTRING_LIST_FREE(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_substring_list_free_,BITONE)((G(PCRE2_UCHAR,BITONE) **)a); \ + else \ + G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) **)a) + +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \ + else \ + a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO)) + +#define PTR(x) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \ + (void *)G(x,BITTWO)) + +#define SETFLD(x,y,z) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \ + else G(x,BITTWO)->y = z + +#define SETFLDVEC(x,y,v,z) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \ + else G(x,BITTWO)->y[v] = z + +#define SETOP(x,y,z) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \ + else G(x,BITTWO) z y + +#define SETCASTPTR(x,y) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \ + else \ + G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y) + +#define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \ + G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \ + G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p)) + +#define SUB1(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE)(G(b,BITONE)); \ + else \ + G(a,BITTWO)(G(b,BITTWO)) + +#define SUB2(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \ + else \ + G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO)) + +#define TEST(x,r,y) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \ + (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y))) + +#define TESTFLD(x,f,r,y) ( \ + (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \ + (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y))) + + +#endif /* Two out of three modes */ + +/* ----- End of cases where more than one mode is supported ----- */ + + +/* ----- Only 8-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE2_8 +#define CASTFLD(t,a,b) (t)(G(a,8)->b) +#define CASTVAR(t,x) (t)G(x,8) +#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b]) +#define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)) +#define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c) +#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)) +#define FLD(a,b) G(a,8)->b +#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)) +#define PCHARS(lv, p, offset, len, utf, f) \ + lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) +#define PCHARSV(p, offset, len, utf, f) \ + (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) +#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ + a = pcre2_callout_enumerate_8(compiled_code8, \ + (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8)) +#define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,8) = pcre2_compile_8(b,c,d,e,f,g) +#define PCRE2_CONVERTED_PATTERN_FREE(a) \ + pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a) +#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ + a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j) +#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ + r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)) +#define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \ + r = pcre2_get_match_data_heapframes_size_8(G(a,8)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) +#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); +#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ + pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); +#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); +#define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_8(G(c,8)) +#define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_8(G(c,8),a) +#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) +#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,G(c,8)) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8)) +#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) +#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)) +#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) +#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) +#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ + r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)) +#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ + r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)) +#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a) +#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ + r = pcre2_serialize_get_number_of_codes_8(a) +#define PCRE2_SET_CALLOUT(a,b,c) \ + pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c) +#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) +#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ + pcre2_set_compile_recursion_guard_8(G(a,8),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b) +#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b) +#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b) +#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) +#define PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH(a,b) pcre2_set_max_pattern_compiled_length_8(G(a,8),b) +#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) +#define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_8(G(a,8),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) +#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ + pcre2_set_substitute_callout_8(G(a,8), \ + (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c) +#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ + a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \ + (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l) +#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e) +#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e) +#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a) +#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e) +#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_8(G(b,8),c,d) +#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ + a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d) +#define PCRE2_SUBSTRING_LIST_FREE(a) \ + pcre2_substring_list_free_8((PCRE2_UCHAR8 **)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); +#define PTR(x) (void *)G(x,8) +#define SETFLD(x,y,z) G(x,8)->y = z +#define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z +#define SETOP(x,y,z) G(x,8) z y +#define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y) +#define STRLEN(p) (int)strlen((char *)p) +#define SUB1(a,b) G(a,8)(G(b,8)) +#define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8)) +#define TEST(x,r,y) (G(x,8) r (y)) +#define TESTFLD(x,f,r,y) (G(x,8)->f r (y)) + + +/* ----- Only 16-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE2_16 +#define CASTFLD(t,a,b) (t)(G(a,16)->b) +#define CASTVAR(t,x) (t)G(x,16) +#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b]) +#define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)) +#define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2) +#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)) +#define FLD(a,b) G(a,16)->b +#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)) +#define PCHARS(lv, p, offset, len, utf, f) \ + lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) +#define PCHARSV(p, offset, len, utf, f) \ + (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) +#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ + a = pcre2_callout_enumerate_16(compiled_code16, \ + (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16)) +#define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,16) = pcre2_compile_16(b,c,d,e,f,g) +#define PCRE2_CONVERTED_PATTERN_FREE(a) \ + pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a) +#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ + a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j) +#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ + r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16)) +#define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \ + r = pcre2_get_match_data_heapframes_size_16(G(a,16)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16)) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) +#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); +#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ + pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); +#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); +#define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_16(G(c,16)) +#define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_16(G(c,16),a) +#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) +#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,G(c,16)) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16)) +#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) +#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)) +#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d) +#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a) +#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ + r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)) +#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ + r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)) +#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a) +#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ + r = pcre2_serialize_get_number_of_codes_16(a) +#define PCRE2_SET_CALLOUT(a,b,c) \ + pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); +#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b) +#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ + pcre2_set_compile_recursion_guard_16(G(a,16),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b) +#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b) +#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b) +#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) +#define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_16(G(a,16),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) +#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ + pcre2_set_substitute_callout_16(G(a,16), \ + (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c) +#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ + a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \ + (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l) +#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e) +#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e) +#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a) +#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e) +#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_16(G(b,16),c,d) +#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ + a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d) +#define PCRE2_SUBSTRING_LIST_FREE(a) \ + pcre2_substring_list_free_16((PCRE2_UCHAR16 **)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); +#define PTR(x) (void *)G(x,16) +#define SETFLD(x,y,z) G(x,16)->y = z +#define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z +#define SETOP(x,y,z) G(x,16) z y +#define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y) +#define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p) +#define SUB1(a,b) G(a,16)(G(b,16)) +#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) +#define TEST(x,r,y) (G(x,16) r (y)) +#define TESTFLD(x,f,r,y) (G(x,16)->f r (y)) + + +/* ----- Only 32-bit mode is supported ----- */ + +#elif defined SUPPORT_PCRE2_32 +#define CASTFLD(t,a,b) (t)(G(a,32)->b) +#define CASTVAR(t,x) (t)G(x,32) +#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b]) +#define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32)) +#define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4) +#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) +#define FLD(a,b) G(a,32)->b +#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) +#define PCHARS(lv, p, offset, len, utf, f) \ + lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) +#define PCHARSV(p, offset, len, utf, f) \ + (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) +#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ + a = pcre2_callout_enumerate_32(compiled_code32, \ + (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) +#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b) +#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32)) +#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) +#define PCRE2_COMPILE(a,b,c,d,e,f,g) G(a,32) = pcre2_compile_32(b,c,d,e,f,g) +#define PCRE2_CONVERTED_PATTERN_FREE(a) \ + pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a) +#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ + a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) +#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ + r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32)) +#define PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(r,a) \ + r = pcre2_get_match_data_heapframes_size_32(G(a,32)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32)) +#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) +#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) +#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ + a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); +#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ + pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); +#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); +#define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_32(G(c,32)) +#define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_32(G(c,32),a) +#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ + a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) +#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,G(c,32)) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32)) +#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) +#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32)) +#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d) +#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a) +#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ + r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) +#define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ + r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) +#define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a) +#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ + r = pcre2_serialize_get_number_of_codes_32(a) +#define PCRE2_SET_CALLOUT(a,b,c) \ + pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c) +#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b) +#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ + pcre2_set_compile_recursion_guard_32(G(a,32),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b) +#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b) +#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b) +#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_MAX_VARLOOKBEHIND(a,b) pcre2_set_max_varlookbehind_32(G(a,32),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) +#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \ + pcre2_set_substitute_callout_32(G(a,32), \ + (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c) +#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ + a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \ + (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) +#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) +#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e); +#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a) +#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ + a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) +#define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ + a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_32(G(b,32),c,d) +#define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ + a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) +#define PCRE2_SUBSTRING_LIST_FREE(a) \ + pcre2_substring_list_free_32((PCRE2_UCHAR32 **)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)); +#define PTR(x) (void *)G(x,32) +#define SETFLD(x,y,z) G(x,32)->y = z +#define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z +#define SETOP(x,y,z) G(x,32) z y +#define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y) +#define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p) +#define SUB1(a,b) G(a,32)(G(b,32)) +#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) +#define TEST(x,r,y) (G(x,32) r (y)) +#define TESTFLD(x,f,r,y) (G(x,32)->f r (y)) + +#endif + +/* ----- End of mode-specific function call macros ----- */ + + + + +/************************************************* +* Alternate character tables * +*************************************************/ + +/* By default, the "tables" pointer in the compile context when calling +pcre2_compile() is not set (= NULL), thereby using the default tables of the +library. However, the tables modifier can be used to select alternate sets of +tables, for different kinds of testing. Note that the locale modifier also +adjusts the tables. */ + +/* This is the set of tables distributed as default with PCRE2. It recognizes +only ASCII characters. */ + +static const uint8_t tables1[] = { + +/* This table is a lower casing table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table is a case flipping table. */ + + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119, + 120,121,122, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151, + 152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167, + 168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199, + 200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247, + 248,249,250,251,252,253,254,255, + +/* This table contains bit maps for various character classes. Each map is 32 +bytes long and the bits run from the least significant end of each byte. The +classes that have their own maps are: space, xdigit, digit, upper, lower, word, +graph, print, punct, and cntrl. Other classes are built from combinations. */ + + 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, + 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, + 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + + 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + +/* This table identifies various classes of character by individual bits: + 0x01 white space character + 0x02 letter + 0x04 decimal digit + 0x08 hexadecimal digit + 0x10 alphanumeric or '_' + 0x80 regular expression metacharacter or binary zero +*/ + + 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ + 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ + 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ + 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ + 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ + 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ + 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ + +/* This is a set of tables that came originally from a Windows user. It seems +to be at least an approximation of ISO 8859. In particular, there are +characters greater than 128 that are marked as spaces, letters, etc. */ + +static const uint8_t tables2[] = { +0,1,2,3,4,5,6,7, +8,9,10,11,12,13,14,15, +16,17,18,19,20,21,22,23, +24,25,26,27,28,29,30,31, +32,33,34,35,36,37,38,39, +40,41,42,43,44,45,46,47, +48,49,50,51,52,53,54,55, +56,57,58,59,60,61,62,63, +64,97,98,99,100,101,102,103, +104,105,106,107,108,109,110,111, +112,113,114,115,116,117,118,119, +120,121,122,91,92,93,94,95, +96,97,98,99,100,101,102,103, +104,105,106,107,108,109,110,111, +112,113,114,115,116,117,118,119, +120,121,122,123,124,125,126,127, +128,129,130,131,132,133,134,135, +136,137,138,139,140,141,142,143, +144,145,146,147,148,149,150,151, +152,153,154,155,156,157,158,159, +160,161,162,163,164,165,166,167, +168,169,170,171,172,173,174,175, +176,177,178,179,180,181,182,183, +184,185,186,187,188,189,190,191, +224,225,226,227,228,229,230,231, +232,233,234,235,236,237,238,239, +240,241,242,243,244,245,246,215, +248,249,250,251,252,253,254,223, +224,225,226,227,228,229,230,231, +232,233,234,235,236,237,238,239, +240,241,242,243,244,245,246,247, +248,249,250,251,252,253,254,255, +0,1,2,3,4,5,6,7, +8,9,10,11,12,13,14,15, +16,17,18,19,20,21,22,23, +24,25,26,27,28,29,30,31, +32,33,34,35,36,37,38,39, +40,41,42,43,44,45,46,47, +48,49,50,51,52,53,54,55, +56,57,58,59,60,61,62,63, +64,97,98,99,100,101,102,103, +104,105,106,107,108,109,110,111, +112,113,114,115,116,117,118,119, +120,121,122,91,92,93,94,95, +96,65,66,67,68,69,70,71, +72,73,74,75,76,77,78,79, +80,81,82,83,84,85,86,87, +88,89,90,123,124,125,126,127, +128,129,130,131,132,133,134,135, +136,137,138,139,140,141,142,143, +144,145,146,147,148,149,150,151, +152,153,154,155,156,157,158,159, +160,161,162,163,164,165,166,167, +168,169,170,171,172,173,174,175, +176,177,178,179,180,181,182,183, +184,185,186,187,188,189,190,191, +224,225,226,227,228,229,230,231, +232,233,234,235,236,237,238,239, +240,241,242,243,244,245,246,215, +248,249,250,251,252,253,254,223, +192,193,194,195,196,197,198,199, +200,201,202,203,204,205,206,207, +208,209,210,211,212,213,214,247, +216,217,218,219,220,221,222,255, +0,62,0,0,1,0,0,0, +0,0,0,0,0,0,0,0, +32,0,0,0,1,0,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,255,3, +126,0,0,0,126,0,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,255,3, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,12,2, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0, +254,255,255,7,0,0,0,0, +0,0,0,0,0,0,0,0, +255,255,127,127,0,0,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,254,255,255,7, +0,0,0,0,0,4,32,4, +0,0,0,128,255,255,127,255, +0,0,0,0,0,0,255,3, +254,255,255,135,254,255,255,7, +0,0,0,0,0,4,44,6, +255,255,127,255,255,255,127,255, +0,0,0,0,254,255,255,255, +255,255,255,255,255,255,255,127, +0,0,0,0,254,255,255,255, +255,255,255,255,255,255,255,255, +0,2,0,0,255,255,255,255, +255,255,255,255,255,255,255,127, +0,0,0,0,255,255,255,255, +255,255,255,255,255,255,255,255, +0,0,0,0,254,255,0,252, +1,0,0,248,1,0,0,120, +0,0,0,0,254,255,255,255, +0,0,128,0,0,0,128,0, +255,255,255,255,0,0,0,0, +0,0,0,0,0,0,0,128, +255,255,255,255,0,0,0,0, +0,0,0,0,0,0,0,0, +128,0,0,0,0,0,0,0, +0,1,1,0,1,1,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0, +1,0,0,0,128,0,0,0, +128,128,128,128,0,0,128,0, +28,28,28,28,28,28,28,28, +28,28,0,0,0,0,0,128, +0,26,26,26,26,26,26,18, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,18, +18,18,18,128,128,0,128,16, +0,26,26,26,26,26,26,18, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,18, +18,18,18,128,128,0,0,0, +0,0,0,0,0,1,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0, +1,0,0,0,0,0,0,0, +0,0,18,0,0,0,0,0, +0,0,20,20,0,18,0,0, +0,20,18,0,0,0,0,0, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,0, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,18, +18,18,18,18,18,18,18,0, +18,18,18,18,18,18,18,18 +}; + + + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there are some non-Unix environments that lack both memmove() and +bcopy(). */ + +static void * +emulated_memmove(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#undef memmove +#define memmove(d,s,n) emulated_memmove(d,s,n) +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + + +#ifndef HAVE_STRERROR +/************************************************* +* Provide strerror() for non-ANSI libraries * +*************************************************/ + +/* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their +libraries. They may no longer be around, but just in case, we can try to +provide the same facility by this simple alternative function. */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +char * +strerror(int n) +{ +if (n < 0 || n >= sys_nerr) return "unknown error number"; +return sys_errlist[n]; +} +#endif /* HAVE_STRERROR */ + + + +/************************************************* +* Local memory functions * +*************************************************/ + +/* Alternative memory functions, to test functionality. */ + +static void *my_malloc(size_t size, void *data) +{ +void *block = malloc(size); +(void)data; +if (show_memory) + { + if (block == NULL) + { + fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size); + } + else + { + fprintf(outfile, "malloc %5" SIZ_FORM, size); +#ifdef DEBUG_SHOW_MALLOC_ADDRESSES + fprintf(outfile, " %p", block); /* Not portable */ +#endif + if (malloclistptr < MALLOCLISTSIZE) + { + malloclist[malloclistptr] = block; + malloclistlength[malloclistptr++] = size; + } + else + fprintf(outfile, " (not remembered)"); + fprintf(outfile, "\n"); + } + } +return block; +} + +static void my_free(void *block, void *data) +{ +(void)data; +if (show_memory && block != NULL) + { + uint32_t i, j; + BOOL found = FALSE; + + fprintf(outfile, "free"); + for (i = 0; i < malloclistptr; i++) + { + if (block == malloclist[i]) + { + fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]); + malloclistptr--; + for (j = i; j < malloclistptr; j++) + { + malloclist[j] = malloclist[j+1]; + malloclistlength[j] = malloclistlength[j+1]; + } + found = TRUE; + break; + } + } + if (!found) fprintf(outfile, " unremembered block"); +#ifdef DEBUG_SHOW_MALLOC_ADDRESSES + fprintf(outfile, " %p", block); /* Not portable */ +#endif + fprintf(outfile, "\n"); + } +free(block); +} + + + +/************************************************* +* Callback function for stack guard * +*************************************************/ + +/* This is set up to be called from pcre2_compile() when the stackguard=n +modifier sets a value greater than zero. The test we do is whether the +parenthesis nesting depth is greater than the value set by the modifier. + +Argument: the current parenthesis nesting depth +Returns: non-zero to kill the compilation +*/ + +static int +stack_guard(uint32_t depth, void *user_data) +{ +(void)user_data; +return depth > pat_patctl.stackguard_test; +} + + +/************************************************* +* JIT memory callback * +*************************************************/ + +static PCRE2_JIT_STACK* +jit_callback(void *arg) +{ +jit_was_used = TRUE; +return (PCRE2_JIT_STACK *)arg; +} + + +/************************************************* +* Convert UTF-8 character to code point * +*************************************************/ + +/* This function reads one or more bytes that represent a UTF-8 character, +and returns the codepoint of that character. Note that the function supports +the original UTF-8 definition of RFC 2279, allowing for values in the range 0 +to 0x7fffffff, up to 6 bytes long. This makes it possible to generate +codepoints greater than 0x10ffff which are useful for testing PCRE2's error +checking, and also for generating 32-bit non-UTF data values above the UTF +limit. + +Argument: + utf8bytes a pointer to the byte vector + end a pointer to the end of the byte vector + vptr a pointer to an int to receive the value + +Returns: > 0 => the number of bytes consumed + -6 to 0 => malformed UTF-8 character at offset = (-return) +*/ + +static int +utf82ord(PCRE2_SPTR8 utf8bytes, PCRE2_SPTR8 end, uint32_t *vptr) +{ +uint32_t c = *utf8bytes++; +uint32_t d = c; +int i, j, s; + +for (i = -1; i < 6; i++) /* i is number of additional bytes */ + { + if ((d & 0x80) == 0) break; + d <<= 1; + } + +if (i == -1) { *vptr = c; return 1; } /* ascii character */ +if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ + +/* i now has a value in the range 1-5 */ + +s = 6*i; +d = (c & utf8_table3[i]) << s; + +for (j = 0; j < i; j++) + { + if (utf8bytes >= end) return 0; + + c = *utf8bytes++; + if ((c & 0xc0) != 0x80) return -(j+1); + s -= 6; + d |= (c & 0x3f) << s; + } + +/* Check that encoding was the correct unique one */ + +for (j = 0; j < utf8_table1_size; j++) + if (d <= (uint32_t)utf8_table1[j]) break; +if (j != i) return -(i+1); + +/* Valid value */ + +*vptr = d; +return i+1; +} + + + +/************************************************* +* Print one character * +*************************************************/ + +/* Print a single character either literally, or as a hex escape, and count how +many printed characters are used. + +Arguments: + c the character + utf TRUE in UTF mode + f the FILE to print to, or NULL just to count characters + +Returns: number of characters written +*/ + +static int +pchar(uint32_t c, BOOL utf, FILE *f) +{ +int n = 0; +char tempbuffer[16]; + +if (PRINTOK(c)) + { + if (f != NULL) fprintf(f, "%c", c); + return 1; + } + +if (c < 0x100) + { + if (utf) + { + if (f != NULL) fprintf(f, "\\x{%02x}", c); + return 6; + } + else + { + if (f != NULL) fprintf(f, "\\x%02x", c); + return 4; + } + } + +if (f != NULL) n = fprintf(f, "\\x{%02x}", c); + else n = sprintf(tempbuffer, "\\x{%02x}", c); + +return n >= 0 ? n : 0; +} + + + +#ifdef SUPPORT_PCRE2_16 +/************************************************* +* Find length of 0-terminated 16-bit string * +*************************************************/ + +static size_t strlen16(PCRE2_SPTR16 p) +{ +PCRE2_SPTR16 pp = p; +while (*pp != 0) pp++; +return (int)(pp - p); +} +#endif /* SUPPORT_PCRE2_16 */ + + + +#ifdef SUPPORT_PCRE2_32 +/************************************************* +* Find length of 0-terminated 32-bit string * +*************************************************/ + +static size_t strlen32(PCRE2_SPTR32 p) +{ +PCRE2_SPTR32 pp = p; +while (*pp != 0) pp++; +return (int)(pp - p); +} +#endif /* SUPPORT_PCRE2_32 */ + + +#ifdef SUPPORT_PCRE2_8 +/************************************************* +* Print 8-bit character string * +*************************************************/ + +/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. +For printing *MARK strings, a negative length is given, indicating that the +length is in the first code unit. If handed a NULL file, this function just +counts chars without printing (because pchar() does that). */ + +static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) +{ +PCRE2_SPTR8 end; +uint32_t c = 0; +int yield = 0; +if (length < 0) length = *p++; +end = p + length; +while (length-- > 0) + { + if (utf) + { + int rc = utf82ord(p, end, &c); + if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ + { + length -= rc - 1; + p += rc; + yield += pchar(c, utf, f); + continue; + } + } + c = *p++; + yield += pchar(c, utf, f); + } + +return yield; +} +#endif + + +#ifdef SUPPORT_PCRE2_16 +/************************************************* +* Print 16-bit character string * +*************************************************/ + +/* Must handle UTF-16 strings in utf mode. Yields number of characters printed. +For printing *MARK strings, a negative length is given, indicating that the +length is in the first code unit. If handed a NULL file, just counts chars +without printing. */ + +static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f) +{ +int yield = 0; +if (length < 0) length = *p++; +while (length-- > 0) + { + uint32_t c = *p++ & 0xffff; + if (utf && c >= 0xD800 && c < 0xDC00 && length > 0) + { + int d = *p & 0xffff; + if (d >= 0xDC00 && d <= 0xDFFF) + { + c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; + length--; + p++; + } + } + yield += pchar(c, utf, f); + } +return yield; +} +#endif /* SUPPORT_PCRE2_16 */ + + + +#ifdef SUPPORT_PCRE2_32 +/************************************************* +* Print 32-bit character string * +*************************************************/ + +/* Must handle UTF-32 strings in utf mode. Yields number of characters printed. +For printing *MARK strings, a negative length is given, indicating that the +length is in the first code unit. If handed a NULL file, just counts chars +without printing. */ + +static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) +{ +int yield = 0; +(void)(utf); /* Avoid compiler warning */ +if (length < 0) length = *p++; +while (length-- > 0) + { + uint32_t c = *p++; + yield += pchar(c, utf, f); + } +return yield; +} +#endif /* SUPPORT_PCRE2_32 */ + + + + +/************************************************* +* Convert character value to UTF-8 * +*************************************************/ + +/* This function takes an integer value in the range 0 - 0x7fffffff +and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the +8-bit library is not supported, to generate UTF-8 output for non-ASCII +characters. + +Arguments: + cvalue the character value + utf8bytes pointer to buffer for result - at least 6 bytes long + +Returns: number of characters placed in the buffer +*/ + +static int +ord2utf8(uint32_t cvalue, uint8_t *utf8bytes) +{ +int i, j; +if (cvalue > 0x7fffffffu) + return -1; +for (i = 0; i < utf8_table1_size; i++) + if (cvalue <= (uint32_t)utf8_table1[i]) break; +utf8bytes += i; +for (j = i; j > 0; j--) + { + *utf8bytes-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*utf8bytes = utf8_table2[i] | cvalue; +return i + 1; +} + + + +#ifdef SUPPORT_PCRE2_16 +/************************************************* +* Convert string to 16-bit * +*************************************************/ + +/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using +the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and +code values from 0 to 0x7fffffff. However, values greater than the later UTF +limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as +UTF-8 if the utf8_input modifier is set, but an error is generated for values +greater than 0xffff. + +If all the input bytes are ASCII, the space needed for a 16-bit string is +exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string +is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 +but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes +in UTF-16. The result is always left in pbuffer16. Impose a minimum size to +save repeated re-sizing. + +Note that this function does not object to surrogate values. This is +deliberate; it makes it possible to construct UTF-16 strings that are invalid, +for the purpose of testing that they are correctly faulted. + +Arguments: + p points to a byte string + utf true in UTF mode + lenptr points to number of bytes in the string (excluding trailing zero) + +Returns: 0 on success, with the length updated to the number of 16-bit + data items used (excluding the trailing zero) + OR -1 if a UTF-8 string is malformed + OR -2 if a value > 0x10ffff is encountered in UTF mode + OR -3 if a value > 0xffff is encountered when not in UTF mode +*/ + +static int +to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr) +{ +uint16_t *pp; +PCRE2_SIZE len = *lenptr; + +if (pbuffer16_size < 2*len + 2) + { + if (pbuffer16 != NULL) free(pbuffer16); + pbuffer16_size = 2*len + 2; + if (pbuffer16_size < 4096) pbuffer16_size = 4096; + pbuffer16 = (uint16_t *)malloc(pbuffer16_size); + if (pbuffer16 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", + pbuffer16_size); + exit(1); + } + } + +pp = pbuffer16; +if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) + { + for (; len > 0; len--) *pp++ = *p++; + } +else while (len > 0) + { + uint32_t c; + const uint8_t *end = p + len; + int chlen = utf82ord(p, end, &c); + if (chlen <= 0) return -1; + if (!utf && c > 0xffff) return -3; + if (c > 0x10ffff) return -2; + p += chlen; + len -= chlen; + if (c < 0x10000) *pp++ = c; else + { + c -= 0x10000; + *pp++ = 0xD800 | (c >> 10); + *pp++ = 0xDC00 | (c & 0x3ff); + } + } + +*pp = 0; +*lenptr = pp - pbuffer16; +return 0; +} +#endif + + + +#ifdef SUPPORT_PCRE2_32 +/************************************************* +* Convert string to 32-bit * +*************************************************/ + +/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using +the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and +code values from 0 to 0x7fffffff. However, values greater than the later UTF +limit of 0x10ffff cause an error. + +In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier +is set, and no limit is imposed. There is special interpretation of the 0xff +byte (which is illegal in UTF-8) in this case: it causes the top bit of the +next character to be set. This provides a way of generating 32-bit characters +greater than 0x7fffffff. + +If all the input bytes are ASCII, the space needed for a 32-bit string is +exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit +string is no more than four times, because the number of characters must be +less than the number of bytes. The result is always left in pbuffer32. Impose a +minimum size to save repeated re-sizing. + +Note that this function does not object to surrogate values. This is +deliberate; it makes it possible to construct UTF-32 strings that are invalid, +for the purpose of testing that they are correctly faulted. + +Arguments: + p points to a byte string + utf true in UTF mode + lenptr points to number of bytes in the string (excluding trailing zero) + +Returns: 0 on success, with the length updated to the number of 32-bit + data items used (excluding the trailing zero) + OR -1 if a UTF-8 string is malformed + OR -2 if a value > 0x10ffff is encountered in UTF mode +*/ + +static int +to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr) +{ +uint32_t *pp; +PCRE2_SIZE len = *lenptr; + +if (pbuffer32_size < 4*len + 4) + { + if (pbuffer32 != NULL) free(pbuffer32); + pbuffer32_size = 4*len + 4; + if (pbuffer32_size < 8192) pbuffer32_size = 8192; + pbuffer32 = (uint32_t *)malloc(pbuffer32_size); + if (pbuffer32 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", + pbuffer32_size); + exit(1); + } + } + +pp = pbuffer32; + +if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) + { + for (; len > 0; len--) *pp++ = *p++; + } + +else while (len > 0) + { + int chlen; + uint32_t c; + uint32_t topbit = 0; + const uint8_t *end = p + len; + if (!utf && *p == 0xff && len > 1) + { + topbit = 0x80000000u; + p++; + len--; + } + chlen = utf82ord(p, end, &c); + if (chlen <= 0) return -1; + if (utf && c > 0x10ffff) return -2; + p += chlen; + len -= chlen; + *pp++ = c | topbit; + } + +*pp = 0; +*lenptr = pp - pbuffer32; +return 0; +} +#endif /* SUPPORT_PCRE2_32 */ + + + +/* This function is no longer used. Keep it around for a while, just in case it +needs to be re-instated. */ + +#ifdef NEVERNEVERNEVER + +/************************************************* +* Move back by so many characters * +*************************************************/ + +/* Given a code unit offset in a subject string, move backwards by a number of +characters, and return the resulting offset. + +Arguments: + subject pointer to the string + offset start offset + count count to move back by + utf TRUE if in UTF mode + +Returns: a possibly changed offset +*/ + +static PCRE2_SIZE +backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf) +{ +if (!utf || test_mode == PCRE32_MODE) + return (count >= offset)? 0 : (offset - count); + +else if (test_mode == PCRE8_MODE) + { + PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset; + for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--) + { + pp--; + while ((*pp & 0xc0) == 0x80) pp--; + } + return pp - (PCRE2_SPTR8)subject; + } + +else /* 16-bit mode */ + { + PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset; + for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--) + { + pp--; + if ((*pp & 0xfc00) == 0xdc00) pp--; + } + return pp - (PCRE2_SPTR16)subject; + } +} +#endif /* NEVERNEVERNEVER */ + + + +/************************************************* +* Expand input buffers * +*************************************************/ + +/* This function doubles the size of the input buffer and the buffer for +keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to +the new ones. + +Arguments: none +Returns: nothing (aborts if malloc() fails) +*/ + +static void +expand_input_buffers(void) +{ +int new_pbuffer8_size = 2*pbuffer8_size; +uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size); +uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); + +if (new_buffer == NULL || new_pbuffer8 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size); + exit(1); + } + +memcpy(new_buffer, buffer, pbuffer8_size); +memcpy(new_pbuffer8, pbuffer8, pbuffer8_size); + +pbuffer8_size = new_pbuffer8_size; + +free(buffer); +free(pbuffer8); + +buffer = new_buffer; +pbuffer8 = new_pbuffer8; +} + + + +/************************************************* +* Read or extend an input line * +*************************************************/ + +/* Input lines are read into buffer, but both patterns and data lines can be +continued over multiple input lines. In addition, if the buffer fills up, we +want to automatically expand it so as to be able to handle extremely large +lines that are needed for certain stress tests, although this is less likely +now that there are repetition features for both patterns and data. When the +input buffer is expanded, the other two buffers must also be expanded likewise, +and the contents of pbuffer, which are a copy of the input for callouts, must +be preserved (for when expansion happens for a data line). This is not the most +optimal way of handling this, but hey, this is just a test program! + +Arguments: + f the file to read + start where in buffer to start (this *must* be within buffer) + prompt for stdin or readline() + +Returns: pointer to the start of new data + could be a copy of start, or could be moved + NULL if no data read and EOF reached +*/ + +static uint8_t * +extend_inputline(FILE *f, uint8_t *start, const char *prompt) +{ +uint8_t *here = start; + +for (;;) + { + size_t rlen = (size_t)(pbuffer8_size - (here - buffer)); + + if (rlen > 1000) + { + size_t dlen; + + /* If libreadline or libedit support is required, use readline() to read a + line if the input is a terminal. Note that readline() removes the trailing + newline, so we must put it back again, to be compatible with fgets(). */ + +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) + if (INTERACTIVE(f)) + { + size_t len; + char *s = readline(prompt); + if (s == NULL) return (here == start)? NULL : start; + len = strlen(s); + if (len > 0) add_history(s); + if (len > rlen - 1) len = rlen - 1; + memcpy(here, s, len); + here[len] = '\n'; + here[len+1] = 0; + free(s); + } + else +#endif + + /* Read the next line by normal means, prompting if the file is a tty. */ + + { + if (INTERACTIVE(f)) printf("%s", prompt); + if (fgets((char *)here, rlen, f) == NULL) + return (here == start)? NULL : start; + } + + dlen = strlen((char *)here); + here += dlen; + + /* Check for end of line reached. Take care not to read data from before + start (dlen will be zero for a file starting with a binary zero). */ + + if (here > start && here[-1] == '\n') return start; + + /* If we have not read a newline when reading a file, we have either filled + the buffer or reached the end of the file. We can detect the former by + checking that the string fills the buffer, and the latter by feof(). If + neither of these is true, it means we read a binary zero which has caused + strlen() to give a short length. This is a hard error because pcre2test + expects to work with C strings. */ + + if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f)) + { + fprintf(outfile, "** Binary zero encountered in input\n"); + fprintf(outfile, "** pcre2test run abandoned\n"); + exit(1); + } + } + + else + { + size_t start_offset = start - buffer; + size_t here_offset = here - buffer; + expand_input_buffers(); + start = buffer + start_offset; + here = buffer + here_offset; + } + } + +/* Control never gets here */ +} + + + +/************************************************* +* Case-independent strncmp() function * +*************************************************/ + +/* +Arguments: + s first string + t second string + n number of characters to compare + +Returns: < 0, = 0, or > 0, according to the comparison +*/ + +static int +strncmpic(const uint8_t *s, const uint8_t *t, int n) +{ +while (n--) + { + int c = tolower(*s++) - tolower(*t++); + if (c != 0) return c; + } +return 0; +} + + + +/************************************************* +* Scan the main modifier list * +*************************************************/ + +/* This function searches the modifier list for a long modifier name. + +Argument: + p start of the name + lenp length of the name + +Returns: an index in the modifier list, or -1 on failure +*/ + +static int +scan_modifiers(const uint8_t *p, unsigned int len) +{ +int bot = 0; +int top = MODLISTCOUNT; + +while (top > bot) + { + int mid = (bot + top)/2; + unsigned int mlen = strlen(modlist[mid].name); + int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen); + if (c == 0) + { + if (len == mlen) return mid; + c = (int)len - (int)mlen; + } + if (c > 0) bot = mid + 1; else top = mid; + } + +return -1; + +} + + + +/************************************************* +* Check a modifer and find its field * +*************************************************/ + +/* This function is called when a modifier has been identified. We check that +it is allowed here and find the field that is to be changed. + +Arguments: + m the modifier list entry + ctx CTX_PAT => pattern context + CTX_POPPAT => pattern context for popped pattern + CTX_DEFPAT => default pattern context + CTX_DAT => data context + CTX_DEFDAT => default data context + pctl point to pattern control block + dctl point to data control block + c a single character or 0 + +Returns: a field pointer or NULL +*/ + +static void * +check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) +{ +void *field = NULL; +PCRE2_SIZE offset = m->offset; + +if (restrict_for_perl_test) switch(m->which) + { + case MOD_PNDP: + case MOD_PATP: + case MOD_DATP: + case MOD_PDP: + break; + + default: + fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n", + m->name); + return NULL; + } + +switch (m->which) + { + case MOD_CTC: /* Compile context modifier */ + if (ctx == CTX_DEFPAT) field = PTR(default_pat_context); + else if (ctx == CTX_PAT) field = PTR(pat_context); + break; + + case MOD_CTM: /* Match context modifier */ + if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); + else if (ctx == CTX_DAT) field = PTR(dat_context); + break; + + case MOD_DAT: /* Data line modifier */ + case MOD_DATP: /* Allowed for Perl test */ + if (dctl != NULL) field = dctl; + break; + + case MOD_PAT: /* Pattern modifier */ + case MOD_PATP: /* Allowed for Perl test */ + if (pctl != NULL) field = pctl; + break; + + case MOD_PD: /* Pattern or data line modifier */ + case MOD_PDP: /* Ditto, allowed for Perl test */ + case MOD_PND: /* Ditto, but not default pattern */ + case MOD_PNDP: /* Ditto, allowed for Perl test */ + if (dctl != NULL) field = dctl; + else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP || + ctx != CTX_DEFPAT)) + field = pctl; + break; + } + +if (field == NULL) + { + if (c == 0) + fprintf(outfile, "** '%s' is not valid here\n", m->name); + else + fprintf(outfile, "** /%c is not valid here\n", c); + return NULL; + } + +return (char *)field + offset; +} + + + +/************************************************* +* Decode a modifier list * +*************************************************/ + +/* A pointer to a control block is NULL when called in cases when that block is +not relevant. They are never all relevant in one call. At least one of patctl +and datctl is NULL. The second argument specifies which context to use for +modifiers that apply to contexts. + +Arguments: + p point to modifier string + ctx CTX_PAT => pattern context + CTX_POPPAT => pattern context for popped pattern + CTX_DEFPAT => default pattern context + CTX_DAT => data context + CTX_DEFDAT => default data context + pctl point to pattern control block + dctl point to data control block + +Returns: TRUE if successful decode, FALSE otherwise +*/ + +static BOOL +decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl) +{ +uint8_t *ep, *pp; +long li; +unsigned long uli; +BOOL first = TRUE; + +for (;;) + { + void *field; + modstruct *m; + BOOL off = FALSE; + unsigned int i, len; + int index; + char *endptr; + + /* Skip white space and commas. */ + + while (isspace(*p) || *p == ',') p++; + if (*p == 0) break; + + /* Find the end of the item; lose trailing whitespace at end of line. */ + + for (ep = p; *ep != 0 && *ep != ','; ep++); + if (*ep == 0) + { + while (ep > p && isspace(ep[-1])) ep--; + *ep = 0; + } + + /* Remember if the first character is '-'. */ + + if (*p == '-') + { + off = TRUE; + p++; + } + + /* Find the length of a full-length modifier name, and scan for it. */ + + pp = p; + while (pp < ep && *pp != '=') pp++; + index = scan_modifiers(p, pp - p); + + /* If the first modifier is unrecognized, try to interpret it as a sequence + of single-character abbreviated modifiers. None of these modifiers have any + associated data. They just set options or control bits. */ + + if (index < 0) + { + uint32_t cc; + uint8_t *mp = p; + + if (!first) + { + fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + if (ep - p == 1) + fprintf(outfile, "** Single-character modifiers must come first\n"); + return FALSE; + } + + for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p)) + { + for (i = 0; i < C1MODLISTCOUNT; i++) + if (cc == c1modlist[i].onechar) break; + + if (i >= C1MODLISTCOUNT) + { + fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n", + *p, (int)(ep-mp), mp); + return FALSE; + } + + if (c1modlist[i].index >= 0) + { + index = c1modlist[i].index; + } + + else + { + index = scan_modifiers((uint8_t *)(c1modlist[i].fullname), + strlen(c1modlist[i].fullname)); + if (index < 0) + { + fprintf(outfile, "** Internal error: single-character equivalent " + "modifier '%s' not found\n", c1modlist[i].fullname); + return FALSE; + } + c1modlist[i].index = index; /* Cache for next time */ + } + + field = check_modifier(modlist + index, ctx, pctl, dctl, *p); + if (field == NULL) return FALSE; + + /* /x is a special case; a second appearance changes PCRE2_EXTENDED to + PCRE2_EXTENDED_MORE. */ + + if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0) + { + *((uint32_t *)field) &= ~PCRE2_EXTENDED; + *((uint32_t *)field) |= PCRE2_EXTENDED_MORE; + } + else + *((uint32_t *)field) |= modlist[index].value; + } + + continue; /* With tne next (fullname) modifier */ + } + + /* We have a match on a full-name modifier. Check for the existence of data + when needed. */ + + m = modlist + index; /* Save typing */ + if (m->type != MOD_CTL && m->type != MOD_OPT && + (m->type != MOD_IND || *pp == '=')) + { + if (*pp++ != '=') + { + fprintf(outfile, "** '=' expected after '%s'\n", m->name); + return FALSE; + } + if (off) + { + fprintf(outfile, "** '-' is not valid for '%s'\n", m->name); + return FALSE; + } + } + + /* These on/off types have no data. */ + + else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) + { + fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + return FALSE; + } + + /* Set the data length for those types that have data. Then find the field + that is to be set. If check_modifier() returns NULL, it has already output an + error message. */ + + len = ep - pp; + field = check_modifier(m, ctx, pctl, dctl, 0); + if (field == NULL) return FALSE; + + /* Process according to data type. */ + + switch (m->type) + { + case MOD_CTL: + case MOD_OPT: + if (off) *((uint32_t *)field) &= ~m->value; + else *((uint32_t *)field) |= m->value; + break; + + case MOD_BSR: + if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0) + { +#ifdef BSR_ANYCRLF + *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; +#else + *((uint16_t *)field) = PCRE2_BSR_UNICODE; +#endif + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET; + else dctl->control2 &= ~CTL2_BSR_SET; + } + else + { + if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; + else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) + *((uint16_t *)field) = PCRE2_BSR_UNICODE; + else goto INVALID_VALUE; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET; + else dctl->control2 |= CTL2_BSR_SET; + } + pp = ep; + break; + + case MOD_CHR: /* A single character */ + *((uint32_t *)field) = *pp++; + break; + + case MOD_CON: /* A convert type/options list */ + for (;; pp++) + { + uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':'); + len = ((colon != NULL && colon < ep)? colon:ep) - pp; + for (i = 0; i < convertlistcount; i++) + { + if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0) + { + if (*((uint32_t *)field) == CONVERT_UNSET) + *((uint32_t *)field) = convertlist[i].option; + else + *((uint32_t *)field) |= convertlist[i].option; + break; + } + } + if (i >= convertlistcount) goto INVALID_VALUE; + pp += len; + if (*pp != ':') break; + } + break; + + case MOD_IN2: /* One or two unsigned integers */ + if (!isdigit(*pp)) goto INVALID_VALUE; + uli = strtoul((const char *)pp, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + ((uint32_t *)field)[0] = (uint32_t)uli; + if (*endptr == ':') + { + uli = strtoul((const char *)endptr+1, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + ((uint32_t *)field)[1] = (uint32_t)uli; + } + else ((uint32_t *)field)[1] = 0; + pp = (uint8_t *)endptr; + break; + + /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or + less than ULONG_MAX. So first test for overflowing the long int, and then + test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */ + + case MOD_SIZ: /* PCRE2_SIZE value */ + if (!isdigit(*pp)) goto INVALID_VALUE; + uli = strtoul((const char *)pp, &endptr, 10); + if (uli == ULONG_MAX) goto INVALID_VALUE; +#if ULONG_MAX > PCRE2_SIZE_MAX + if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE; +#endif + *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli; + pp = (uint8_t *)endptr; + break; + + case MOD_IND: /* Unsigned integer with default */ + if (len == 0) + { + *((uint32_t *)field) = (uint32_t)(m->value); + break; + } + /* Fall through */ + + case MOD_INT: /* Unsigned integer */ + if (!isdigit(*pp)) goto INVALID_VALUE; + uli = strtoul((const char *)pp, &endptr, 10); + if (U32OVERFLOW(uli)) goto INVALID_VALUE; + *((uint32_t *)field) = (uint32_t)uli; + pp = (uint8_t *)endptr; + break; + + case MOD_INS: /* Signed integer */ + if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE; + li = strtol((const char *)pp, &endptr, 10); + if (S32OVERFLOW(li)) goto INVALID_VALUE; + *((int32_t *)field) = (int32_t)li; + pp = (uint8_t *)endptr; + break; + + case MOD_NL: + for (i = 0; i < sizeof(newlines)/sizeof(char *); i++) + if (len == strlen(newlines[i]) && + strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break; + if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE; + if (i == 0) + { + *((uint16_t *)field) = NEWLINE_DEFAULT; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET; + else dctl->control2 &= ~CTL2_NL_SET; + } + else + { + *((uint16_t *)field) = i; + if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET; + else dctl->control2 |= CTL2_NL_SET; + } + pp = ep; + break; + + case MOD_NN: /* Name or (signed) number; may be several */ + if (isdigit(*pp) || *pp == '-') + { + int ct = MAXCPYGET - 1; + int32_t value; + li = strtol((const char *)pp, &endptr, 10); + if (S32OVERFLOW(li)) goto INVALID_VALUE; + value = (int32_t)li; + field = (char *)field - m->offset + m->value; /* Adjust field ptr */ + if (value >= 0) /* Add new number */ + { + while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */ + field = (char *)field + sizeof(int32_t); + if (ct <= 0) + { + fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name); + return FALSE; + } + } + *((int32_t *)field) = value; + if (ct > 0) ((int32_t *)field)[1] = -1; + pp = (uint8_t *)endptr; + } + + /* Multiple strings are put end to end. */ + + else + { + char *nn = (char *)field; + if (len > 0) /* Add new name */ + { + if (len > MAX_NAME_SIZE) + { + fprintf(outfile, "** Group name in '%s' is too long\n", m->name); + return FALSE; + } + while (*nn != 0) nn += strlen(nn) + 1; + if (nn + len + 2 - (char *)field > LENCPYGET) + { + fprintf(outfile, "** Too many characters in named '%s' modifiers\n", + m->name); + return FALSE; + } + memcpy(nn, pp, len); + } + nn[len] = 0 ; + nn[len+1] = 0; + pp = ep; + } + break; + + case MOD_STR: + if (len + 1 > m->value) + { + fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n", + m->name, m->value - 1); + return FALSE; + } + memcpy(field, pp, len); + ((uint8_t *)field)[len] = 0; + pp = ep; + break; + } + + if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) + { + fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name); + return FALSE; + } + + p = pp; + first = FALSE; + + if (ctx == CTX_POPPAT && + (pctl->options != 0 || + pctl->tables_id != 0 || + pctl->locale[0] != 0 || + (pctl->control & NOTPOP_CONTROLS) != 0)) + { + fprintf(outfile, "** '%s' is not valid here\n", m->name); + return FALSE; + } + } + +return TRUE; + +INVALID_VALUE: +fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p); +return FALSE; +} + + +/************************************************* +* Get info from a pattern * +*************************************************/ + +/* A wrapped call to pcre2_pattern_info(), applied to the current compiled +pattern. + +Arguments: + what code for the required information + where where to put the answer + unsetok PCRE2_ERROR_UNSET is an "expected" result + +Returns: the return from pcre2_pattern_info() +*/ + +static int +pattern_info(int what, void *where, BOOL unsetok) +{ +int rc; +PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */ +PCRE2_PATTERN_INFO(rc, compiled_code, what, where); +if (rc >= 0) return 0; +if (rc != PCRE2_ERROR_UNSET || !unsetok) + { + fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, + what); + if (rc == PCRE2_ERROR_BADMODE) + fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " + "%d-bit mode\n", test_mode, + 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); + } +return rc; +} + + + +#ifdef SUPPORT_PCRE2_8 +/************************************************* +* Show something in a list * +*************************************************/ + +/* This function just helps to keep the code that uses it tidier. It's used for +various lists of things where there needs to be introductory text before the +first item. As these calls are all in the POSIX-support code, they happen only +when 8-bit mode is supported. */ + +static void +prmsg(const char **msg, const char *s) +{ +fprintf(outfile, "%s %s", *msg, s); +*msg = ""; +} +#endif /* SUPPORT_PCRE2_8 */ + + + +/************************************************* +* Show control bits * +*************************************************/ + +/* Called for mutually exclusive controls and for unsupported POSIX controls. +Because the bits are unique, this can be used for both pattern and data control +words. + +Arguments: + controls control bits + controls2 more control bits + before text to print before + +Returns: nothing +*/ + +static void +show_controls(uint32_t controls, uint32_t controls2, const char *before) +{ +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + before, + ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", + ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", + ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "", + ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", + ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "", + ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", + ((controls & CTL_BINCODE) != 0)? " bincode" : "", + ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "", + ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", + ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "", + ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", + ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", + ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "", + ((controls & CTL_DFA) != 0)? " dfa" : "", + ((controls & CTL_EXPAND) != 0)? " expand" : "", + ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", + ((controls & CTL_FINDLIMITS_NOHEAP) != 0)? " find_limits_noheap" : "", + ((controls2 & CTL2_FRAMESIZE) != 0)? " framesize" : "", + ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "", + ((controls & CTL_GETALL) != 0)? " getall" : "", + ((controls & CTL_GLOBAL) != 0)? " global" : "", + ((controls2 & CTL2_HEAPFRAMES_SIZE) != 0)? " heapframes_size" : "", + ((controls & CTL_HEXPAT) != 0)? " hex" : "", + ((controls & CTL_INFO) != 0)? " info" : "", + ((controls & CTL_JITFAST) != 0)? " jitfast" : "", + ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", + ((controls & CTL_MARK) != 0)? " mark" : "", + ((controls & CTL_MEMORY) != 0)? " memory" : "", + ((controls2 & CTL2_NL_SET) != 0)? " newline" : "", + ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", + ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "", + ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "", + ((controls & CTL_POSIX) != 0)? " posix" : "", + ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", + ((controls & CTL_PUSH) != 0)? " push" : "", + ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "", + ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "", + ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", + ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "", + ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "", + ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "", + ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "", + ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "", + ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "", + ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "", + ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "", + ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "", + ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "", + ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : ""); +} + + + +/************************************************* +* Show compile options * +*************************************************/ + +/* Called from show_pattern_info() and for unsupported POSIX options. + +Arguments: + options an options word + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +show_compile_options(uint32_t options, const char *before, const char *after) +{ +if (options == 0) fprintf(outfile, "%s %s", before, after); +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + before, + ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", + ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", + ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", + ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", + ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", + ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "", + ((options & PCRE2_CASELESS) != 0)? " caseless" : "", + ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", + ((options & PCRE2_DOTALL) != 0)? " dotall" : "", + ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "", + ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", + ((options & PCRE2_EXTENDED) != 0)? " extended" : "", + ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "", + ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "", + ((options & PCRE2_LITERAL) != 0)? " literal" : "", + ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "", + ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "", + ((options & PCRE2_MULTILINE) != 0)? " multiline" : "", + ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "", + ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "", + ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "", + ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", + ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "", + ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "", + ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", + ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", + ((options & PCRE2_UCP) != 0)? " ucp" : "", + ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", + ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", + ((options & PCRE2_UTF) != 0)? " utf" : "", + after); +} + + +/************************************************* +* Show compile extra options * +*************************************************/ + +/* Called from show_pattern_info() and for unsupported POSIX options. + +Arguments: + options an options word + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +show_compile_extra_options(uint32_t options, const char *before, + const char *after) +{ +if (options == 0) fprintf(outfile, "%s %s", before, after); +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + before, + ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", + ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "", + ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "", + ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "", + ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "", + ((options & PCRE2_EXTRA_ASCII_DIGIT) != 0)? " ascii_digit" : "", + ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "", + ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", + ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "", + ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "", + ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", + ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", + after); +} + + + +#ifdef SUPPORT_PCRE2_8 +/************************************************* +* Show match options * +*************************************************/ + +/* Called for unsupported POSIX options. */ + +static void +show_match_options(uint32_t options) +{ +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", + ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "", + ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "", + ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "", + ((options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "", + ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", + ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "", + ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", + ((options & PCRE2_NOTBOL) != 0)? " notbol" : "", + ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", + ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "", + ((options & PCRE2_NOTEOL) != 0)? " noteol" : "", + ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", + ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); +} +#endif /* SUPPORT_PCRE2_8 */ + + + +/************************************************* +* Show memory usage info for a pattern * +*************************************************/ + +static void +show_memory_info(void) +{ +uint32_t name_count, name_entry_size; +PCRE2_SIZE size, cblock_size; + +/* One of the test_mode values will always be true, but to stop a compiler +warning we must initialize cblock_size. */ + +cblock_size = 0; +#ifdef SUPPORT_PCRE2_8 +if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8); +#endif +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16); +#endif +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32); +#endif + +(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE); +(void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE); +(void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE); + +/* The uint32_t variables are cast before multiplying to stop code analyzers +grumbling about potential overflow. */ + +fprintf(outfile, "Memory allocation - compiled block : %" SIZ_FORM "\n", size); +fprintf(outfile, "Memory allocation - code portion : %" SIZ_FORM "\n", size - + (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size - + cblock_size); + +if (pat_patctl.jit != 0) + { + (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE); + fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size); + } +} + + + +/************************************************* +* Show frame size info for a pattern * +*************************************************/ + +static void +show_framesize(void) +{ +PCRE2_SIZE frame_size; +(void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE); +fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size); +} + + + +/************************************************* +* Show heapframes size info for a match_data * +*************************************************/ + +static void +show_heapframes_size(void) +{ +PCRE2_SIZE heapframes_size; +PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(heapframes_size, match_data); +fprintf(outfile, "Heapframes size in match_data: %" SIZ_FORM "\n", + heapframes_size); +} + + + +/************************************************* +* Get and output an error message * +*************************************************/ + +static BOOL +print_error_message(int errorcode, const char *before, const char *after) +{ +int len; +PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer); +if (len < 0) + { + fprintf(outfile, "\n** pcre2test internal error: cannot interpret error " + "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len); + } +else + { + fprintf(outfile, "%s", before); + PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); + fprintf(outfile, "%s", after); + } +return len >= 0; +} + + +/************************************************* +* Callback function for callout enumeration * +*************************************************/ + +/* The only differences in the callout emumeration block for different code +unit widths are that the pointers to the subject, the most recent MARK, and a +callout argument string point to strings of the appropriate width. Casts can be +used to deal with this. + +Argument: + cb pointer to enumerate block + callout_data user data + +Returns: 0 +*/ + +static int callout_callback(pcre2_callout_enumerate_block_8 *cb, + void *callout_data) +{ +uint32_t i; +BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; + +(void)callout_data; /* Not currently displayed */ + +fprintf(outfile, "Callout "); +if (cb->callout_string != NULL) + { + uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); + fprintf(outfile, "%c", delimiter); + PCHARSV(cb->callout_string, 0, + cb->callout_string_length, utf, outfile); + for (i = 0; callout_start_delims[i] != 0; i++) + if (delimiter == callout_start_delims[i]) + { + delimiter = callout_end_delims[i]; + break; + } + fprintf(outfile, "%c ", delimiter); + } +else fprintf(outfile, "%d ", cb->callout_number); + +fprintf(outfile, "%.*s\n", + (int)((cb->next_item_length == 0)? 1 : cb->next_item_length), + pbuffer8 + cb->pattern_position); + +return 0; +} + + + +/************************************************* +* Show information about a pattern * +*************************************************/ + +/* This function is called after a pattern has been compiled if any of the +information-requesting controls have been set. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +show_pattern_info(void) +{ +uint32_t compile_options, overall_options, extra_options; +BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; + +if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) + { + fprintf(outfile, "------------------------------------------------------------------\n"); + PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0); + } + +if ((pat_patctl.control & CTL_INFO) != 0) + { + int rc; + void *nametable; + uint8_t *start_bits; + BOOL heap_limit_set, match_limit_set, depth_limit_set; + uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, + hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, + depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount, + newline_convention; + + /* Exercise the error route. */ + + PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL); + (void)rc; + + /* These info requests may return PCRE2_ERROR_UNSET. */ + + switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE)) + { + case 0: + heap_limit_set = TRUE; + break; + + case PCRE2_ERROR_UNSET: + heap_limit_set = FALSE; + break; + + default: + return PR_ABEND; + } + + switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE)) + { + case 0: + match_limit_set = TRUE; + break; + + case PCRE2_ERROR_UNSET: + match_limit_set = FALSE; + break; + + default: + return PR_ABEND; + } + + switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE)) + { + case 0: + depth_limit_set = TRUE; + break; + + case PCRE2_ERROR_UNSET: + depth_limit_set = FALSE; + break; + + default: + return PR_ABEND; + } + + /* These info requests should always succeed. */ + + if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) + + pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) + + pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) + + pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + + pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + + pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + + pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) + + pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + + pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + + pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + + pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) + + pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) + + pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) + + pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) + + pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) + + pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) + + pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE) + != 0) + return PR_ABEND; + + fprintf(outfile, "Capture group count = %d\n", capture_count); + + if (backrefmax > 0) + fprintf(outfile, "Max back reference = %d\n", backrefmax); + + if (maxlookbehind > 0) + fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); + + if (heap_limit_set) + fprintf(outfile, "Heap limit = %u\n", heap_limit); + + if (match_limit_set) + fprintf(outfile, "Match limit = %u\n", match_limit); + + if (depth_limit_set) + fprintf(outfile, "Depth limit = %u\n", depth_limit); + + if (namecount > 0) + { + fprintf(outfile, "Named capture groups:\n"); + for (; namecount > 0; namecount--) + { + int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; + uint32_t length = (uint32_t)STRLEN(nametable + imm2_size); + fprintf(outfile, " "); + + /* In UTF mode the name may be a UTF string containing non-ASCII + letters and digits. We must output it as a UTF-8 string. In non-UTF mode, + use the normal string printing functions, which use escapes for all + non-ASCII characters. */ + + if (utf) + { +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + { + PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size; + while (*nameptr != 0) + { + uint8_t u8buff[6]; + int len = ord2utf8(*nameptr++, u8buff); + fprintf(outfile, "%.*s", len, u8buff); + } + } +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size; + while (*nameptr != 0) + { + int len; + uint8_t u8buff[6]; + uint32_t c = *nameptr++ & 0xffff; + if (c >= 0xD800 && c < 0xDC00) + c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000; + len = ord2utf8(c, u8buff); + fprintf(outfile, "%.*s", len, u8buff); + } + } +#endif +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) + fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size); +#endif + } + else /* Not UTF mode */ + { + PCHARSV(nametable, imm2_size, length, FALSE, outfile); + } + + while (length++ < nameentrysize - imm2_size) putc(' ', outfile); + +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); +#endif +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) + fprintf(outfile, "%3d\n", (int)( + ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1])); +#endif + + nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size); + } + } + + if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); + if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); + if (match_empty) fprintf(outfile, "May match empty string\n"); + + pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); + pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); + pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE); + + /* Remove UTF/UCP if they were there only because of forbid_utf. This saves + cluttering up the verification output of non-UTF test files. */ + + if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0) + { + compile_options &= ~PCRE2_NEVER_UTF; + overall_options &= ~PCRE2_NEVER_UTF; + } + + if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0) + { + compile_options &= ~PCRE2_NEVER_UCP; + overall_options &= ~PCRE2_NEVER_UCP; + } + + if ((compile_options|overall_options) != 0) + { + if (compile_options == overall_options) + show_compile_options(compile_options, "Options:", "\n"); + else + { + show_compile_options(compile_options, "Compile options:", "\n"); + show_compile_options(overall_options, "Overall options:", "\n"); + } + } + + if (extra_options != 0) + show_compile_extra_options(extra_options, "Extra options:", "\n"); + + if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); + + if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 || + (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) + fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? + "any Unicode newline" : "CR, LF, or CRLF"); + + if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) + { + switch (newline_convention) + { + case PCRE2_NEWLINE_CR: + fprintf(outfile, "Forced newline is CR\n"); + break; + + case PCRE2_NEWLINE_LF: + fprintf(outfile, "Forced newline is LF\n"); + break; + + case PCRE2_NEWLINE_CRLF: + fprintf(outfile, "Forced newline is CRLF\n"); + break; + + case PCRE2_NEWLINE_ANYCRLF: + fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); + break; + + case PCRE2_NEWLINE_ANY: + fprintf(outfile, "Forced newline is any Unicode newline\n"); + break; + + case PCRE2_NEWLINE_NUL: + fprintf(outfile, "Forced newline is NUL\n"); + break; + + default: + break; + } + } + + if (first_ctype == 2) + { + fprintf(outfile, "First code unit at start or follows newline\n"); + } + else if (first_ctype == 1) + { + const char *caseless = + ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)? + "" : " (caseless)"; + if (PRINTOK(first_cunit)) + fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless); + else + { + fprintf(outfile, "First code unit = "); + pchar(first_cunit, FALSE, outfile); + fprintf(outfile, "%s\n", caseless); + } + } + else if (start_bits != NULL) + { + int i; + int c = 24; + fprintf(outfile, "Starting code units: "); + for (i = 0; i < 256; i++) + { + if ((start_bits[i/8] & (1u << (i&7))) != 0) + { + if (c > 75) + { + fprintf(outfile, "\n "); + c = 2; + } + if (PRINTOK(i) && i != ' ') + { + fprintf(outfile, "%c ", i); + c += 2; + } + else + { + fprintf(outfile, "\\x%02x ", i); + c += 5; + } + } + } + fprintf(outfile, "\n"); + } + + if (last_ctype != 0) + { + const char *caseless = + ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)? + "" : " (caseless)"; + if (PRINTOK(last_cunit)) + fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless); + else + { + fprintf(outfile, "Last code unit = "); + pchar(last_cunit, FALSE, outfile); + fprintf(outfile, "%s\n", caseless); + } + } + + if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0) + fprintf(outfile, "Subject length lower bound = %d\n", minlength); + + if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) + { +#ifdef SUPPORT_JIT + if (FLD(compiled_code, executable_jit) != NULL) + fprintf(outfile, "JIT compilation was successful\n"); + else + { + fprintf(outfile, "JIT compilation was not successful"); + if (jitrc != 0 && !print_error_message(jitrc, " (", ")")) + return PR_ABEND; + fprintf(outfile, "\n"); + } +#else + fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); +#endif + } + } + +if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0) + { + int errorcode; + PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0); + if (errorcode != 0) + { + fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode); + if (errorcode < 0 && !print_error_message(errorcode, "", "\n")) + return PR_ABEND; + return PR_SKIP; + } + } + +return PR_OK; +} + + + +/************************************************* +* Handle serialization error * +*************************************************/ + +/* Print an error message after a serialization failure. + +Arguments: + rc the error code + msg an initial message for what failed + +Returns: FALSE if print_error_message() fails +*/ + +static BOOL +serial_error(int rc, const char *msg) +{ +fprintf(outfile, "%s failed: error %d: ", msg, rc); +return print_error_message(rc, "", "\n"); +} + + + +/************************************************* +* Open file for save/load commands * +*************************************************/ + +/* This function decodes the file name and opens the file. + +Arguments: + buffptr point after the #command + mode open mode + fptr points to the FILE variable + name name of # command + +Returns: PR_OK or PR_ABEND +*/ + +static int +open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name) +{ +char *endf; +char *filename = (char *)buffptr; +while (isspace(*filename)) filename++; +endf = filename + strlen8(filename); +while (endf > filename && isspace(endf[-1])) endf--; + +if (endf == filename) + { + fprintf(outfile, "** File name expected after %s\n", name); + return PR_ABEND; + } + +*endf = 0; +*fptr = fopen((const char *)filename, mode); +if (*fptr == NULL) + { + fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno)); + return PR_ABEND; + } + +return PR_OK; +} + + + +/************************************************* +* Process command line * +*************************************************/ + +/* This function is called for lines beginning with # and a character that is +not ! or whitespace, when encountered between tests, which means that there is +no compiled pattern (compiled_code is NULL). The line is in buffer. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_command(void) +{ +FILE *f; +PCRE2_SIZE serial_size; +size_t i; +int rc, cmd, cmdlen, yield; +uint16_t first_listed_newline; +const char *cmdname; +uint8_t *argptr, *serial; + +yield = PR_OK; +cmd = CMD_UNKNOWN; +cmdlen = 0; + +for (i = 0; i < cmdlistcount; i++) + { + cmdname = cmdlist[i].name; + cmdlen = strlen(cmdname); + if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 && + isspace(buffer[cmdlen+1])) + { + cmd = cmdlist[i].value; + break; + } + } + +argptr = buffer + cmdlen + 1; + +if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT) + { + fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname); + return PR_ABEND; + } + +switch(cmd) + { + case CMD_UNKNOWN: + fprintf(outfile, "** Unknown command: %s", buffer); + break; + + case CMD_FORBID_UTF: + forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; + break; + + case CMD_PERLTEST: + restrict_for_perl_test = TRUE; + break; + + /* Set default pattern modifiers */ + + case CMD_PATTERN: + (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL); + if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0) + def_patctl.jit = JIT_DEFAULT; + break; + + /* Set default subject modifiers */ + + case CMD_SUBJECT: + (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl); + break; + + /* Check the default newline, and if not one of those listed, set up the + first one to be forced. An empty list unsets. */ + + case CMD_NEWLINE_DEFAULT: + local_newline_default = 0; /* Unset */ + first_listed_newline = 0; + for (;;) + { + while (isspace(*argptr)) argptr++; + if (*argptr == 0) break; + for (i = 1; i < sizeof(newlines)/sizeof(char *); i++) + { + size_t nlen = strlen(newlines[i]); + if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 && + isspace(argptr[nlen])) + { + if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ + if (first_listed_newline == 0) first_listed_newline = i; + } + } + while (*argptr != 0 && !isspace(*argptr)) argptr++; + } + local_newline_default = first_listed_newline; + break; + + /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect + the compiled pattern (e.g. to give information) are permitted. The default + pattern modifiers are ignored. */ + + case CMD_POP: + case CMD_POPCOPY: + if (patstacknext <= 0) + { + fprintf(outfile, "** Can't pop off an empty stack\n"); + return PR_SKIP; + } + memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */ + if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL)) + return PR_SKIP; + + if (cmd == CMD_POP) + { + SET(compiled_code, patstack[--patstacknext]); + } + else + { + PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]); + } + + if (pat_patctl.jit != 0) + { + PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); + } + if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); + if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize(); + if ((pat_patctl.control & CTL_ANYINFO) != 0) + { + rc = show_pattern_info(); + if (rc != PR_OK) return rc; + } + break; + + /* Save the stack of compiled patterns to a file, then empty the stack. */ + + case CMD_SAVE: + if (patstacknext <= 0) + { + fprintf(outfile, "** No stacked patterns to save\n"); + return PR_OK; + } + + rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save"); + if (rc != PR_OK) return rc; + + PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size, + general_context); + if (rc < 0) + { + fclose(f); + if (!serial_error(rc, "Serialization")) return PR_ABEND; + break; + } + + /* Write the length at the start of the file to make it straightforward to + get the right memory when re-loading. This saves having to read the file size + in different operating systems. To allow for different endianness (even + though reloading with the opposite endianness does not work), write the + length byte-by-byte. */ + + for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f); + if (fwrite(serial, 1, serial_size, f) != serial_size) + { + fprintf(outfile, "** Wrong return from fwrite()\n"); + fclose(f); + return PR_ABEND; + } + + fclose(f); + PCRE2_SERIALIZE_FREE(serial); + while(patstacknext > 0) + { + SET(compiled_code, patstack[--patstacknext]); + SUB1(pcre2_code_free, compiled_code); + } + SET(compiled_code, NULL); + break; + + /* Load a set of compiled patterns from a file onto the stack */ + + case CMD_LOAD: + rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load"); + if (rc != PR_OK) return rc; + + serial_size = 0; + for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8); + + serial = malloc(serial_size); + if (serial == NULL) + { + fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n", + serial_size); + fclose(f); + return PR_ABEND; + } + + i = fread(serial, 1, serial_size, f); + fclose(f); + + if (i != serial_size) + { + fprintf(outfile, "** Wrong return from fread()\n"); + yield = PR_ABEND; + } + else + { + PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial); + if (rc < 0) + { + if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND; + } + else + { + if (rc + patstacknext > PATSTACKSIZE) + { + fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n", + rc, (rc == 1)? "" : "s"); + rc = PATSTACKSIZE - patstacknext; + fprintf(outfile, "** Decoding %d pattern%s\n", rc, + (rc == 1)? "" : "s"); + } + PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial, + general_context); + if (rc < 0) + { + if (!serial_error(rc, "Deserialization")) yield = PR_ABEND; + } + else patstacknext += rc; + } + } + + free(serial); + break; + + /* Load a set of binary tables into tables3. */ + + case CMD_LOADTABLES: + rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables"); + if (rc != PR_OK) return rc; + + if (tables3 == NULL) + { + (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length); + tables3 = malloc(loadtables_length); + } + + if (tables3 == NULL) + { + fprintf(outfile, "** Failed: malloc failed for #loadtables\n"); + yield = PR_ABEND; + } + else if (fread(tables3, 1, loadtables_length, f) != loadtables_length) + { + fprintf(outfile, "** Wrong return from fread()\n"); + yield = PR_ABEND; + } + + fclose(f); + break; + } + +return yield; +} + + + +/************************************************* +* Process pattern line * +*************************************************/ + +/* This function is called when the input buffer contains the start of a +pattern. The first character is known to be a valid delimiter. The pattern is +read, modifiers are interpreted, and a suitable local context is set up for +this test. The pattern is then compiled. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_pattern(void) +{ +BOOL utf; +uint32_t k; +uint8_t *p = buffer; +unsigned int delimiter = *p++; +int errorcode; +void *use_pat_context; +void *use_pbuffer = NULL; +uint32_t use_forbid_utf = forbid_utf; +PCRE2_SIZE patlen; +PCRE2_SIZE valgrind_access_length; +PCRE2_SIZE erroroffset; + +/* The perltest.sh script supports only / as a delimiter. */ + +if (restrict_for_perl_test && delimiter != '/') + { + fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n"); + return PR_ABEND; + } + +/* Initialize the context and pattern/data controls for this test from the +defaults. */ + +PATCTXCPY(pat_context, default_pat_context); +memcpy(&pat_patctl, &def_patctl, sizeof(patctl)); + +/* Find the end of the pattern, reading more lines if necessary. */ + +for(;;) + { + while (*p != 0) + { + if (*p == '\\' && p[1] != 0) p++; + else if (*p == delimiter) break; + p++; + } + if (*p != 0) break; + if ((p = extend_inputline(infile, p, " > ")) == NULL) + { + fprintf(outfile, "** Unexpected EOF\n"); + return PR_ABEND; + } + if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p); + } + +/* If the first character after the delimiter is backslash, make the pattern +end with backslash. This is purely to provide a way of testing for the error +message when a pattern ends with backslash. */ + +if (p[1] == '\\') *p++ = '\\'; + +/* Terminate the pattern at the delimiter, and compute the length. */ + +*p++ = 0; +patlen = p - buffer - 2; + +/* Look for modifiers and options after the final delimiter. */ + +if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; + +/* Note that the match_invalid_utf option also sets utf when passed to +pcre2_compile(). */ + +utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0; + +/* The utf8_input modifier is not allowed in 8-bit mode, and is mutually +exclusive with the utf modifier. */ + +if ((pat_patctl.control & CTL_UTF8_INPUT) != 0) + { + if (test_mode == PCRE8_MODE) + { + fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n"); + return PR_SKIP; + } + if (utf) + { + fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n"); + return PR_SKIP; + } + } + +/* The convert and posix modifiers are mutually exclusive. */ + +if (pat_patctl.convert_type != CONVERT_UNSET && + (pat_patctl.control & CTL_POSIX) != 0) + { + fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n"); + return PR_SKIP; + } + +/* Check for mutually exclusive control modifiers. At present, these are all in +the first control word. */ + +for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++) + { + uint32_t c = pat_patctl.control & exclusive_pat_controls[k]; + if (c != 0 && c != (c & (~c+1))) + { + show_controls(c, 0, "** Not allowed together:"); + fprintf(outfile, "\n"); + return PR_SKIP; + } + } + +/* Assume full JIT compile for jitverify and/or jitfast if nothing else was +specified. */ + +if (pat_patctl.jit == 0 && + (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0) + pat_patctl.jit = JIT_DEFAULT; + +/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting +in callouts. Convert from hex if requested (literal strings in quotes may be +present within the hexadecimal pairs). The result must necessarily be fewer +characters so will always fit in pbuffer8. */ + +if ((pat_patctl.control & CTL_HEXPAT) != 0) + { + uint8_t *pp, *pt; + uint32_t c, d; + + pt = pbuffer8; + for (pp = buffer + 1; *pp != 0; pp++) + { + if (isspace(*pp)) continue; + c = *pp++; + + /* Handle a literal substring */ + + if (c == '\'' || c == '"') + { + uint8_t *pq = pp; + for (;; pp++) + { + d = *pp; + if (d == 0) + { + fprintf(outfile, "** Missing closing quote in hex pattern: " + "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2); + return PR_SKIP; + } + if (d == c) break; + *pt++ = d; + } + } + + /* Expect a hex pair */ + + else + { + if (!isxdigit(c)) + { + fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" + PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2); + return PR_SKIP; + } + if (*pp == 0) + { + fprintf(outfile, "** Odd number of digits in hex pattern\n"); + return PR_SKIP; + } + d = *pp; + if (!isxdigit(d)) + { + fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" + PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1); + return PR_SKIP; + } + c = toupper(c); + d = toupper(d); + *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) + + (isdigit(d)? (d - '0') : (d - 'A' + 10)); + } + } + *pt = 0; + patlen = pt - pbuffer8; + } + +/* If not a hex string, process for repetition expansion if requested. */ + +else if ((pat_patctl.control & CTL_EXPAND) != 0) + { + uint8_t *pp, *pt; + + pt = pbuffer8; + for (pp = buffer + 1; *pp != 0; pp++) + { + uint8_t *pc = pp; + uint32_t count = 1; + size_t length = 1; + + /* Check for replication syntax; if not found, the defaults just set will + prevail and one character will be copied. */ + + if (pp[0] == '\\' && pp[1] == '[') + { + uint8_t *pe; + for (pe = pp + 2; *pe != 0; pe++) + { + if (pe[0] == ']' && pe[1] == '{') + { + uint32_t clen = pe - pc - 2; + uint32_t i = 0; + unsigned long uli; + char *endptr; + + pe += 2; + uli = strtoul((const char *)pe, &endptr, 10); + if (U32OVERFLOW(uli)) + { + fprintf(outfile, "** Pattern repeat count too large\n"); + return PR_SKIP; + } + + i = (uint32_t)uli; + pe = (uint8_t *)endptr; + if (*pe == '}') + { + if (i == 0) + { + fprintf(outfile, "** Zero repeat not allowed\n"); + return PR_SKIP; + } + pc += 2; + count = i; + length = clen; + pp = pe; + break; + } + } + } + } + + /* Add to output. If the buffer is too small expand it. The function for + expanding buffers always keeps buffer and pbuffer8 in step as far as their + size goes. */ + + while (pt + count * length > pbuffer8 + pbuffer8_size) + { + size_t pc_offset = pc - buffer; + size_t pp_offset = pp - buffer; + size_t pt_offset = pt - pbuffer8; + expand_input_buffers(); + pc = buffer + pc_offset; + pp = buffer + pp_offset; + pt = pbuffer8 + pt_offset; + } + + for (; count > 0; count--) + { + memcpy(pt, pc, length); + pt += length; + } + } + + *pt = 0; + patlen = pt - pbuffer8; + + if ((pat_patctl.control & CTL_INFO) != 0) + fprintf(outfile, "Expanded: %s\n", pbuffer8); + } + +/* Neither hex nor expanded, just copy the input verbatim. */ + +else + { + strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1); + } + +/* Sort out character tables */ + +if (pat_patctl.locale[0] != 0) + { + if (pat_patctl.tables_id != 0) + { + fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n"); + return PR_SKIP; + } + if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) + { + fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale); + return PR_SKIP; + } + if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0) + { + strcpy((char *)locale_name, (char *)pat_patctl.locale); + if (locale_tables != NULL) + { + PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables); + } + PCRE2_MAKETABLES(locale_tables, general_context); + } + use_tables = locale_tables; + } + +else switch (pat_patctl.tables_id) + { + case 0: use_tables = NULL; break; + case 1: use_tables = tables1; break; + case 2: use_tables = tables2; break; + + case 3: + if (tables3 == NULL) + { + fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not " + "been loaded\n"); + return PR_SKIP; + } + use_tables = tables3; + break; + + default: + fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n"); + return PR_SKIP; + } + +PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables); + +/* Set up for the stackguard test. */ + +if (pat_patctl.stackguard_test != 0) + { + PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL); + } + +/* Handle compiling via the POSIX interface, which doesn't support the +timing, showing, or debugging options, nor the ability to pass over +local character tables. Neither does it have 16-bit or 32-bit support. */ + +if ((pat_patctl.control & CTL_POSIX) != 0) + { +#ifdef SUPPORT_PCRE2_8 + int rc; + int cflags = 0; + const char *msg = "** Ignored with POSIX interface:"; +#endif + + if (test_mode != PCRE8_MODE) + { + fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n"); + return PR_SKIP; + } + +#ifdef SUPPORT_PCRE2_8 + /* Check for features that the POSIX interface does not support. */ + + if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale"); + if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace"); + if (pat_patctl.tables_id != 0) prmsg(&msg, "tables"); + if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard"); + if (timeit > 0) prmsg(&msg, "timing"); + if (pat_patctl.jit != 0) prmsg(&msg, "JIT"); + + if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) + { + show_compile_options( + pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS), + msg, ""); + msg = ""; + } + + if ((FLD(pat_context, extra_options) & + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0) + { + show_compile_extra_options( + FLD(pat_context, extra_options) & + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, ""); + msg = ""; + } + + if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 || + (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0) + { + show_controls( + pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS), + pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2), + msg); + msg = ""; + + /* Remove ignored options so as not to get a repeated message for those + that are actually subject controls. */ + + pat_patctl.control &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS); + pat_patctl.control2 &= (uint32_t)(POSIX_SUPPORTED_COMPILE_CONTROLS2); + } + + if (local_newline_default != 0) prmsg(&msg, "#newline_default"); + if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET) + prmsg(&msg, "max_pattern_length"); + if (FLD(pat_context, max_pattern_compiled_length) != PCRE2_UNSET) + prmsg(&msg, "max_pattern_compiled_length"); + if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT) + prmsg(&msg, "parens_nest_limit"); + + if (msg[0] == 0) fprintf(outfile, "\n"); + + /* Translate PCRE2 options to POSIX options and then compile. */ + + if (utf) cflags |= REG_UTF; + if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB; + if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP; + if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE; + if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC; + if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE; + if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL; + if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY; + + if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0) + { + preg.re_endp = (char *)pbuffer8 + patlen; + cflags |= REG_PEND; + } + + rc = regcomp(&preg, (char *)pbuffer8, cflags); + + /* Compiling failed */ + + if (rc != 0) + { + size_t bsize, usize; + int psize; + + preg.re_pcre2_code = NULL; /* In case something was left in there */ + preg.re_match_data = NULL; + + bsize = (pat_patctl.regerror_buffsize != 0)? + pat_patctl.regerror_buffsize : pbuffer8_size; + if (bsize + 8 < pbuffer8_size) + memcpy(pbuffer8 + bsize, "DEADBEEF", 8); + usize = regerror(rc, &preg, (char *)pbuffer8, bsize); + + /* Inside regerror(), snprintf() is used. If the buffer is too small, some + versions of snprintf() put a zero byte at the end, but others do not. + Therefore, we print a maximum of one less than the size of the buffer. */ + + psize = (int)bsize - 1; + fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); + if (usize > bsize) + { + fprintf(outfile, "** regerror() message truncated\n"); + if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) + fprintf(outfile, "** regerror() buffer overflow\n"); + } + return PR_SKIP; + } + + /* Compiling succeeded. Check that the values in the preg block are sensible. + It can happen that pcre2test is accidentally linked with a different POSIX + library which succeeds, but of course puts different things into preg. In + this situation, calling regfree() may cause a segfault (or invalid free() in + valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the + calling of regfree() on exit. */ + + if (preg.re_pcre2_code == NULL || + ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER || + ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub || + preg.re_match_data == NULL || + preg.re_cflags != cflags) + { + fprintf(outfile, + "** The regcomp() function returned zero (success), but the values set\n" + "** in the preg block are not valid for PCRE2. Check that pcre2test is\n" + "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n" + "** some other POSIX regex library.\n**\n"); + preg.re_pcre2_code = NULL; + return PR_ABEND; + } + + return PR_OK; +#endif /* SUPPORT_PCRE2_8 */ + } + +/* Handle compiling via the native interface. Controls that act later are +ignored with "push". Replacements are locked out. */ + +if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) + { + if (pat_patctl.replacement[0] != 0) + { + fprintf(outfile, "** Replacement text is not supported with 'push'.\n"); + return PR_OK; + } + if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 || + (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0) + { + show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, + pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2, + "** Ignored when compiled pattern is stacked with 'push':"); + fprintf(outfile, "\n"); + } + if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 || + (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0) + { + show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, + pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2, + "** Applies only to compile when pattern is stacked with 'push':"); + fprintf(outfile, "\n"); + } + } + +/* Convert the input in non-8-bit modes. */ + +errorcode = 0; + +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen); +#endif + +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen); +#endif + +switch(errorcode) + { + case -1: + fprintf(outfile, "** Failed: invalid UTF-8 string cannot be " + "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32); + return PR_SKIP; + + case -2: + fprintf(outfile, "** Failed: character value greater than 0x10ffff " + "cannot be converted to UTF\n"); + return PR_SKIP; + + case -3: + fprintf(outfile, "** Failed: character value greater than 0xffff " + "cannot be converted to 16-bit in non-UTF mode\n"); + return PR_SKIP; + + default: + break; + } + +/* The pattern is now in pbuffer[8|16|32], with the length in code units in +patlen. If it is to be converted, copy the result back afterwards so that it +ends up back in the usual place. */ + +if (pat_patctl.convert_type != CONVERT_UNSET) + { + int rc; + int convert_return = PR_OK; + uint32_t convert_options = pat_patctl.convert_type; + void *converted_pattern; + PCRE2_SIZE converted_length; + + if (pat_patctl.convert_length != 0) + { + converted_length = pat_patctl.convert_length; + converted_pattern = malloc(converted_length * code_unit_size); + if (converted_pattern == NULL) + { + fprintf(outfile, "** Failed: malloc failed for converted pattern\n"); + return PR_SKIP; + } + } + else converted_pattern = NULL; /* Let the library allocate */ + + if (utf) convert_options |= PCRE2_CONVERT_UTF; + if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0) + convert_options |= PCRE2_CONVERT_NO_UTF_CHECK; + + CONCTXCPY(con_context, default_con_context); + + if (pat_patctl.convert_glob_escape != 0) + { + uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 : + pat_patctl.convert_glob_escape; + PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape); + if (rc != 0) + { + fprintf(outfile, "** Invalid glob escape '%c'\n", + pat_patctl.convert_glob_escape); + convert_return = PR_SKIP; + goto CONVERT_FINISH; + } + } + + if (pat_patctl.convert_glob_separator != 0) + { + PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator); + if (rc != 0) + { + fprintf(outfile, "** Invalid glob separator '%c'\n", + pat_patctl.convert_glob_separator); + convert_return = PR_SKIP; + goto CONVERT_FINISH; + } + } + + PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options, + &converted_pattern, &converted_length, con_context); + + if (rc != 0) + { + fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ", + converted_length); + convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND; + } + + /* Output the converted pattern, then copy it. */ + + else + { + BOOL toolong; + PCHARSV(converted_pattern, 0, converted_length, utf, outfile); + fprintf(outfile, "\n"); + + if (test_mode == PCRE8_MODE) + toolong = (converted_length + 1 > pbuffer8_size); + else if (test_mode == PCRE16_MODE) + toolong = (2*(converted_length + 1) > pbuffer8_size); + else /* 32-bit */ + toolong = (4*(converted_length + 1) > pbuffer8_size); + + if (toolong) + { + fprintf(outfile, "** Pattern conversion is too long for the buffer\n"); + convert_return = PR_SKIP; + } + else + { + CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1); + patlen = converted_length; + } + } + + /* Free the converted pattern. */ + + CONVERT_FINISH: + if (pat_patctl.convert_length != 0) + free(converted_pattern); + else + PCRE2_CONVERTED_PATTERN_FREE(converted_pattern); + + /* Return if conversion was unsuccessful. */ + + if (convert_return != PR_OK) return convert_return; + } + +/* By default we pass a zero-terminated pattern, but a length is passed if +"use_length" was specified or this is a hex pattern (which might contain binary +zeros). When valgrind is supported, arrange for the unused part of the buffer +to be marked as no access. */ + +valgrind_access_length = patlen; +if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0) + { + patlen = PCRE2_ZERO_TERMINATED; + valgrind_access_length += 1; /* For the terminating zero */ + } + +#ifdef SUPPORT_VALGRIND +#ifdef SUPPORT_PCRE2_8 +if (test_mode == PCRE8_MODE && pbuffer8 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length, + pbuffer8_size - valgrind_access_length); + } +#endif +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE && pbuffer16 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length, + pbuffer16_size - valgrind_access_length*sizeof(uint16_t)); + } +#endif +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE && pbuffer32 != NULL) + { + VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length, + pbuffer32_size - valgrind_access_length*sizeof(uint32_t)); + } +#endif +#else /* Valgrind not supported */ +(void)valgrind_access_length; /* Avoid compiler warning */ +#endif + +/* If #newline_default has been used and the library was not compiled with an +appropriate default newline setting, local_newline_default will be non-zero. We +use this if there is no explicit newline modifier. */ + +if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0) + { + SETFLD(pat_context, newline_convention, local_newline_default); + } + +/* The null_context modifier is used to test calling pcre2_compile() with a +NULL context. */ + +use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)? + NULL : PTR(pat_context); + +/* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF +and PCRE2_NEVER_UCP are invalid with it. */ + +if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0; + +/* Set use_pbuffer to the input buffer, or leave it as NULL if requested. */ + +if ((pat_patctl.control2 & CTL2_NULL_PATTERN) == 0) + { +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) use_pbuffer = pbuffer8; +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) use_pbuffer = pbuffer16; +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) use_pbuffer = pbuffer32; +#endif + } + +/* Compile many times when timing. */ + +if (timeit > 0) + { + int i; + clock_t time_taken = 0; + for (i = 0; i < timeit; i++) + { + clock_t start_time = clock(); + PCRE2_COMPILE(compiled_code, use_pbuffer, patlen, + pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, + use_pat_context); + time_taken += clock() - start_time; + if (TEST(compiled_code, !=, NULL)) + { SUB1(pcre2_code_free, compiled_code); } + } + total_compile_time += time_taken; + fprintf(outfile, "Compile time %8.4f microseconds\n", + ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit); + } + +/* A final compile that is used "for real". */ + +PCRE2_COMPILE(compiled_code, use_pbuffer, patlen, + pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, use_pat_context); + +/* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit +and 32-bit buffers can be marked completely undefined, but we must leave the +pattern in the 8-bit buffer defined because it may be read from a callout +during matching. */ + +#ifdef SUPPORT_VALGRIND +#ifdef SUPPORT_PCRE2_8 +if (test_mode == PCRE8_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length, + pbuffer8_size - valgrind_access_length); + } +#endif +#ifdef SUPPORT_PCRE2_16 +if (test_mode == PCRE16_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size); + } +#endif +#ifdef SUPPORT_PCRE2_32 +if (test_mode == PCRE32_MODE) + { + VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size); + } +#endif +#endif + +/* Call the JIT compiler if requested. When timing, we must free and recompile +the pattern each time because that is the only way to free the JIT compiled +code. We know that compilation will always succeed. */ + +if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) + { + if (timeit > 0) + { + int i; + clock_t time_taken = 0; + + for (i = 0; i < timeit; i++) + { + clock_t start_time; + SUB1(pcre2_code_free, compiled_code); + PCRE2_COMPILE(compiled_code, use_pbuffer, patlen, + pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, + use_pat_context); + start_time = clock(); + PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); + time_taken += clock() - start_time; + if (jitrc != 0) + { + fprintf(outfile, "JIT compilation was not successful"); + if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND; + break; + } + } + total_jit_compile_time += time_taken; + if (jitrc == 0) + fprintf(outfile, "JIT compile %8.4f microseconds\n", + ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit); + } + else + { + PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); + if (jitrc != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) + { + fprintf(outfile, "JIT compilation was not successful"); + if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND; + } + } + } + +/* Compilation failed; go back for another re, skipping to blank line +if non-interactive. */ + +if (TEST(compiled_code, ==, NULL)) + { + fprintf(outfile, "Failed: error %d at offset %d: ", errorcode, + (int)erroroffset); + if (!print_error_message(errorcode, "", "\n")) return PR_ABEND; + return PR_SKIP; + } + +/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are +locked out at compile time, but we must also check for occurrences of \P, \p, +and \X, which are only supported when Unicode is supported. */ + +if (forbid_utf != 0) + { + if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0) + { + fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the " + "#forbid_utf command\n"); + return PR_SKIP; + } + } + +/* Remember the maximum lookbehind, for partial matching. */ + +if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0) + return PR_ABEND; + +/* Remember the number of captures. */ + +if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) + return PR_ABEND; + +/* If an explicit newline modifier was given, set the information flag in the +pattern so that it is preserved over push/pop. */ + +if ((pat_patctl.control2 & CTL2_NL_SET) != 0) + { + SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET); + } + +/* Output code size and other information if requested. */ + +if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); +if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize(); +if ((pat_patctl.control & CTL_ANYINFO) != 0) + { + int rc = show_pattern_info(); + if (rc != PR_OK) return rc; + } + +/* The "push" control requests that the compiled pattern be remembered on a +stack. This is mainly for testing the serialization functionality. */ + +if ((pat_patctl.control & CTL_PUSH) != 0) + { + if (patstacknext >= PATSTACKSIZE) + { + fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); + return PR_ABEND; + } + patstack[patstacknext++] = PTR(compiled_code); + SET(compiled_code, NULL); + } + +/* The "pushcopy" and "pushtablescopy" controls are similar, but push a +copy of the pattern, the latter with a copy of its character tables. This tests +the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */ + +if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) + { + if (patstacknext >= PATSTACKSIZE) + { + fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); + return PR_ABEND; + } + if ((pat_patctl.control & CTL_PUSHCOPY) != 0) + { + PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code); + } + else + { + PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++], + compiled_code); } + } + +return PR_OK; +} + + + +/************************************************* +* Check heap, match or depth limit * +*************************************************/ + +/* This is used for DFA, normal, and JIT fast matching. For DFA matching it +should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT. + +Arguments: + pp the subject string + ulen length of subject or PCRE2_ZERO_TERMINATED + errnumber defines which limit to test + msg string to include in final message + +Returns: the return from the final match function call +*/ + +static int +check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg) +{ +int capcount; +uint32_t min = 0; +uint32_t mid = 64; +uint32_t max = UINT32_MAX; + +PCRE2_SET_MATCH_LIMIT(dat_context, max); +PCRE2_SET_DEPTH_LIMIT(dat_context, max); +PCRE2_SET_HEAP_LIMIT(dat_context, max); + +for (;;) + { + uint32_t stack_start = 0; + + /* If we are checking the heap limit, free any frames vector that is cached + in the match_data so we always start without one. */ + + if (errnumber == PCRE2_ERROR_HEAPLIMIT) + { + PCRE2_SET_HEAP_LIMIT(dat_context, mid); + +#ifdef SUPPORT_PCRE2_8 + if (code_unit_size == 1) + { + match_data8->memctl.free(match_data8->heapframes, + match_data8->memctl.memory_data); + match_data8->heapframes = NULL; + match_data8->heapframes_size = 0; + } +#endif + +#ifdef SUPPORT_PCRE2_16 + if (code_unit_size == 2) + { + match_data16->memctl.free(match_data16->heapframes, + match_data16->memctl.memory_data); + match_data16->heapframes = NULL; + match_data16->heapframes_size = 0; + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (code_unit_size == 4) + { + match_data32->memctl.free(match_data32->heapframes, + match_data32->memctl.memory_data); + match_data32->heapframes = NULL; + match_data32->heapframes_size = 0; + } +#endif + } + + /* No need to mess with the frames vector for match or depth limits. */ + + else if (errnumber == PCRE2_ERROR_MATCHLIMIT) + { + PCRE2_SET_MATCH_LIMIT(dat_context, mid); + } + else + { + PCRE2_SET_DEPTH_LIMIT(dat_context, mid); + } + + /* Do the appropriate match */ + + if ((dat_datctl.control & CTL_DFA) != 0) + { + stack_start = DFA_START_RWS_SIZE/1024; + if (dfa_workspace == NULL) + dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); + if (dfa_matched++ == 0) + dfa_workspace[0] = -1; /* To catch bad restart */ + PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, + dat_datctl.options, match_data, + PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION); + } + + else if ((pat_patctl.control & CTL_JITFAST) != 0) + PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, + dat_datctl.options, match_data, PTR(dat_context)); + + else + { + PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, + dat_datctl.options, match_data, PTR(dat_context)); + } + + if (capcount == errnumber) + { + if ((mid & 0x80000000u) != 0) + { + fprintf(outfile, "Can't find minimum %s limit: check pattern for " + "restriction\n", msg); + break; + } + + min = mid; + mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2; + } + else if (capcount >= 0 || + capcount == PCRE2_ERROR_NOMATCH || + capcount == PCRE2_ERROR_PARTIAL) + { + /* If we've not hit the error with a heap limit less than the size of the + initial stack frame vector (for pcre2_match()) or the initial stack + workspace vector (for pcre2_dfa_match()), the heap is not being used, so + the minimum limit is zero; there's no need to go on. The other limits are + always greater than zero. */ + + if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start) + { + fprintf(outfile, "Minimum %s limit = 0\n", msg); + break; + } + if (mid == min + 1) + { + fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); + break; + } + max = mid; + mid = (min + max)/2; + } + else break; /* Some other error */ + } + +return capcount; +} + + + +/************************************************* +* Substitute callout function * +*************************************************/ + +/* Called from pcre2_substitute() when the substitute_callout modifier is set. +Print out the data that is passed back. The substitute callout block is +identical for all code unit widths, so we just pick one. + +Arguments: + scb pointer to substitute callout block + data_ptr callout data + +Returns: nothing +*/ + +static int +substitute_callout_function(pcre2_substitute_callout_block_8 *scb, + void *data_ptr) +{ +int yield = 0; +BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; +(void)data_ptr; /* Not used */ + +fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"", + scb->subscount, scb->oveccount, + scb->ovector[0], scb->ovector[1]); + +PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0], + utf, outfile); + +fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"", + scb->output_offsets[0], scb->output_offsets[1]); + +PCHARSV(scb->output, scb->output_offsets[0], + scb->output_offsets[1] - scb->output_offsets[0], utf, outfile); + +if (scb->subscount == dat_datctl.substitute_stop) + { + yield = -1; + fprintf(outfile, " STOPPED"); + } +else if (scb->subscount == dat_datctl.substitute_skip) + { + yield = +1; + fprintf(outfile, " SKIPPED"); + } + +fprintf(outfile, "\"\n"); +return yield; +} + + +/************************************************* +* Callout function * +*************************************************/ + +/* Called from a PCRE2 library as a result of the (?C) item. We print out where +we are in the match (unless suppressed). Yield zero unless more callouts than +the fail count, or the callout data is not zero. The only differences in the +callout block for different code unit widths are that the pointers to the +subject, the most recent MARK, and a callout argument string point to strings +of the appropriate width. Casts can be used to deal with this. + +Arguments: + cb a pointer to a callout block + callout_data_ptr the provided callout data + +Returns: 0 or 1 or an error, as determined by settings +*/ + +static int +callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) +{ +FILE *f, *fdefault; +uint32_t i, pre_start, post_start, subject_length; +PCRE2_SIZE current_position; +BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; +BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; +BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0; + +/* The FILE f is used for echoing the subject string if it is non-NULL. This +happens only once in simple cases, but we want to repeat after any additional +output caused by CALLOUT_EXTRA. */ + +fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)? + NULL : outfile; + +if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0) + { + f = outfile; + switch (cb->callout_flags) + { + case PCRE2_CALLOUT_BACKTRACK: + fprintf(f, "Backtrack\n"); + break; + + case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK: + fprintf(f, "Backtrack\nNo other matching paths\n"); + /* Fall through */ + + case PCRE2_CALLOUT_STARTMATCH: + fprintf(f, "New match attempt\n"); + break; + + default: + f = fdefault; + break; + } + } +else f = fdefault; + +/* For a callout with a string argument, show the string first because there +isn't a tidy way to fit it in the rest of the data. */ + +if (cb->callout_string != NULL) + { + uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); + fprintf(outfile, "Callout (%" SIZ_FORM "): %c", + cb->callout_string_offset, delimiter); + PCHARSV(cb->callout_string, 0, + cb->callout_string_length, utf, outfile); + for (i = 0; callout_start_delims[i] != 0; i++) + if (delimiter == callout_start_delims[i]) + { + delimiter = callout_end_delims[i]; + break; + } + fprintf(outfile, "%c", delimiter); + if (!callout_capture) fprintf(outfile, "\n"); + } + +/* Show captured strings if required */ + +if (callout_capture) + { + if (cb->callout_string == NULL) + fprintf(outfile, "Callout %d:", cb->callout_number); + fprintf(outfile, " last capture = %d\n", cb->capture_last); + for (i = 2; i < cb->capture_top * 2; i += 2) + { + fprintf(outfile, "%2d: ", i/2); + if (cb->offset_vector[i] == PCRE2_UNSET) + fprintf(outfile, ""); + else + { + PCHARSV(cb->subject, cb->offset_vector[i], + cb->offset_vector[i+1] - cb->offset_vector[i], utf, f); + } + fprintf(outfile, "\n"); + } + } + +/* Unless suppressed, re-print the subject in canonical form (with escapes for +non-printing characters), the first time, or if giving full details. On +subsequent calls in the same match, we use PCHARS() just to find the printed +lengths of the substrings. */ + +if (callout_where) + { + if (f != NULL) fprintf(f, "--->"); + + /* The subject before the match start. */ + + PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); + + /* If a lookbehind is involved, the current position may be earlier than the + match start. If so, use the match start instead. */ + + current_position = (cb->current_position >= cb->start_match)? + cb->current_position : cb->start_match; + + /* The subject between the match start and the current position. */ + + PCHARS(post_start, cb->subject, cb->start_match, + current_position - cb->start_match, utf, f); + + /* Print from the current position to the end. */ + + PCHARSV(cb->subject, current_position, cb->subject_length - current_position, + utf, f); + + /* Calculate the total subject printed length (no print). */ + + PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); + + if (f != NULL) fprintf(f, "\n"); + + /* For automatic callouts, show the pattern offset. Otherwise, for a + numerical callout whose number has not already been shown with captured + strings, show the number here. A callout with a string argument has been + displayed above. */ + + if (cb->callout_number == 255) + { + fprintf(outfile, "%+3d ", (int)cb->pattern_position); + if (cb->pattern_position > 99) fprintf(outfile, "\n "); + } + else + { + if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " "); + else fprintf(outfile, "%3d ", cb->callout_number); + } + + /* Now show position indicators */ + + for (i = 0; i < pre_start; i++) fprintf(outfile, " "); + fprintf(outfile, "^"); + + if (post_start > 0) + { + for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); + fprintf(outfile, "^"); + } + + for (i = 0; i < subject_length - pre_start - post_start + 4; i++) + fprintf(outfile, " "); + + if (cb->next_item_length != 0) + fprintf(outfile, "%.*s", (int)(cb->next_item_length), + pbuffer8 + cb->pattern_position); + else + fprintf(outfile, "End of pattern"); + + fprintf(outfile, "\n"); + } + +first_callout = FALSE; + +/* Show any mark info */ + +if (cb->mark != last_callout_mark) + { + if (cb->mark == NULL) + fprintf(outfile, "Latest Mark: \n"); + else + { + fprintf(outfile, "Latest Mark: "); + PCHARSV(cb->mark, -1, -1, utf, outfile); + putc('\n', outfile); + } + last_callout_mark = cb->mark; + } + +/* Show callout data */ + +if (callout_data_ptr != NULL) + { + int callout_data = *((int32_t *)callout_data_ptr); + if (callout_data != 0) + { + fprintf(outfile, "Callout data = %d\n", callout_data); + return callout_data; + } + } + +/* Keep count and give the appropriate return code */ + +callout_count++; + +if (cb->callout_number == dat_datctl.cerror[0] && + callout_count >= dat_datctl.cerror[1]) + return PCRE2_ERROR_CALLOUT; + +if (cb->callout_number == dat_datctl.cfail[0] && + callout_count >= dat_datctl.cfail[1]) + return 1; + +return 0; +} + + + +/************************************************* +* Handle *MARK and copy/get tests * +*************************************************/ + +/* This function is called after complete and partial matches. It runs the +tests for substring extraction. + +Arguments: + utf TRUE for utf + capcount return from pcre2_match() + +Returns: FALSE if print_error_message() fails +*/ + +static BOOL +copy_and_get(BOOL utf, int capcount) +{ +int i; +uint8_t *nptr; + +/* Test copy strings by number */ + +for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) + { + int rc; + PCRE2_SIZE length, length2; + uint32_t copybuffer[256]; + uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]); + length = sizeof(copybuffer)/code_unit_size; + PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length); + if (rc < 0) + { + fprintf(outfile, "Copy substring %d failed (%d): ", n, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else + { + PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2); + if (rc < 0) + { + fprintf(outfile, "Get substring %d length failed (%d): ", n, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else if (length2 != length) + { + fprintf(outfile, "Mismatched substring lengths: %" + SIZ_FORM " %" SIZ_FORM "\n", length, length2); + } + fprintf(outfile, "%2dC ", n); + PCHARSV(copybuffer, 0, length, utf, outfile); + fprintf(outfile, " (%" SIZ_FORM ")\n", length); + } + } + +/* Test copy strings by name */ + +nptr = dat_datctl.copy_names; +for (;;) + { + int rc; + int groupnumber; + PCRE2_SIZE length, length2; + uint32_t copybuffer[256]; + int namelen = strlen((const char *)nptr); +#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 + PCRE2_SIZE cnl = namelen; +#endif + if (namelen == 0) break; + +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); +#endif + + PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); + if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) + fprintf(outfile, "Number not found for group '%s'\n", nptr); + + length = sizeof(copybuffer)/code_unit_size; + PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); + if (rc < 0) + { + fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else + { + PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); + if (rc < 0) + { + fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else if (length2 != length) + { + fprintf(outfile, "Mismatched substring lengths: %" + SIZ_FORM " %" SIZ_FORM "\n", length, length2); + } + fprintf(outfile, " C "); + PCHARSV(copybuffer, 0, length, utf, outfile); + fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr); + if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); + else fprintf(outfile, " (non-unique)\n"); + } + nptr += namelen + 1; + } + +/* Test get strings by number */ + +for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++) + { + int rc; + PCRE2_SIZE length; + void *gotbuffer; + uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]); + PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length); + if (rc < 0) + { + fprintf(outfile, "Get substring %d failed (%d): ", n, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else + { + fprintf(outfile, "%2dG ", n); + PCHARSV(gotbuffer, 0, length, utf, outfile); + fprintf(outfile, " (%" SIZ_FORM ")\n", length); + PCRE2_SUBSTRING_FREE(gotbuffer); + } + } + +/* Test get strings by name */ + +nptr = dat_datctl.get_names; +for (;;) + { + PCRE2_SIZE length; + void *gotbuffer; + int rc; + int groupnumber; + int namelen = strlen((const char *)nptr); +#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 + PCRE2_SIZE cnl = namelen; +#endif + if (namelen == 0) break; + +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); +#endif + + PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); + if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) + fprintf(outfile, "Number not found for group '%s'\n", nptr); + + PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); + if (rc < 0) + { + fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else + { + fprintf(outfile, " G "); + PCHARSV(gotbuffer, 0, length, utf, outfile); + fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr); + if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); + else fprintf(outfile, " (non-unique)\n"); + PCRE2_SUBSTRING_FREE(gotbuffer); + } + nptr += namelen + 1; + } + +/* Test getting the complete list of captured strings. */ + +if ((dat_datctl.control & CTL_GETALL) != 0) + { + int rc; + void **stringlist; + PCRE2_SIZE *lengths; + PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths); + if (rc < 0) + { + fprintf(outfile, "get substring list failed (%d): ", rc); + if (!print_error_message(rc, "", "\n")) return FALSE; + } + else + { + for (i = 0; i < capcount; i++) + { + fprintf(outfile, "%2dL ", i); + PCHARSV(stringlist[i], 0, lengths[i], utf, outfile); + putc('\n', outfile); + } + if (stringlist[i] != NULL) + fprintf(outfile, "string list not terminated by NULL\n"); + PCRE2_SUBSTRING_LIST_FREE(stringlist); + } + } + +return TRUE; +} + + + +/************************************************* +* Show an entire ovector * +*************************************************/ + +/* This function is called after partial matching or match failure, when the +"allvector" modifier is set. It is a means of checking the contents of the +entire ovector, to ensure no modification of fields that should be unchanged. + +Arguments: + ovector points to the ovector + oveccount number of pairs + +Returns: nothing +*/ + +static void +show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount) +{ +uint32_t i; +for (i = 0; i < 2*oveccount; i += 2) + { + PCRE2_SIZE start = ovector[i]; + PCRE2_SIZE end = ovector[i+1]; + + fprintf(outfile, "%2d: ", i/2); + if (start == PCRE2_UNSET && end == PCRE2_UNSET) + fprintf(outfile, "\n"); + else if (start == JUNK_OFFSET && end == JUNK_OFFSET) + fprintf(outfile, "\n"); + else + fprintf(outfile, "%ld %ld\n", (unsigned long int)start, + (unsigned long int)end); + } +} + + +/************************************************* +* Process a data line * +*************************************************/ + +/* The line is in buffer; it will not be empty. + +Arguments: none + +Returns: PR_OK continue processing next line + PR_SKIP skip to a blank line + PR_ABEND abort the pcre2test run +*/ + +static int +process_data(void) +{ +PCRE2_SIZE len, ulen, arg_ulen; +uint32_t gmatched; +uint32_t c, k; +uint32_t g_notempty = 0; +uint8_t *p, *pp, *start_rep; +size_t needlen; +void *use_dat_context; +BOOL utf; +BOOL subject_literal; + +PCRE2_SIZE *ovector; +PCRE2_SIZE ovecsave[3]; +uint32_t oveccount; + +#ifdef SUPPORT_PCRE2_8 +uint8_t *q8 = NULL; +#endif +#ifdef SUPPORT_PCRE2_16 +uint16_t *q16 = NULL; +#endif +#ifdef SUPPORT_PCRE2_32 +uint32_t *q32 = NULL; +#endif + +subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0; + +/* Copy the default context and data control blocks to the active ones. Then +copy from the pattern the controls that can be set in either the pattern or the +data. This allows them to be overridden in the data line. We do not do this for +options because those that are common apply separately to compiling and +matching. */ + +DATCTXCPY(dat_context, default_dat_context); +memcpy(&dat_datctl, &def_datctl, sizeof(datctl)); +dat_datctl.control |= (pat_patctl.control & CTL_ALLPD); +dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD); +strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement); +if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack; + +if (dat_datctl.substitute_skip == 0) + dat_datctl.substitute_skip = pat_patctl.substitute_skip; +if (dat_datctl.substitute_stop == 0) + dat_datctl.substitute_stop = pat_patctl.substitute_stop; + +/* Initialize for scanning the data line. */ + +#ifdef SUPPORT_PCRE2_8 +utf = ((((pat_patctl.control & CTL_POSIX) != 0)? + ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options : + FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0; +#else +utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; +#endif + +start_rep = NULL; +len = strlen((const char *)buffer); +while (len > 0 && isspace(buffer[len-1])) len--; +buffer[len] = 0; +p = buffer; +while (isspace(*p)) p++; + +/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create +invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ + +if (utf) + { + uint8_t *q; + uint32_t cc; + int n = 1; + uint8_t *q_end = p + len; + + for (q = p; n > 0 && *q; q += n) n = utf82ord(q, q_end, &cc); + if (n <= 0) + { + fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input " + "in UTF mode\n"); + return PR_OK; + } + } + +#ifdef SUPPORT_VALGRIND +/* Mark the dbuffer as addressable but undefined again. */ +if (dbuffer != NULL) + { + VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size); + } +#endif + +/* Allocate a buffer to hold the data line; len+1 is an upper bound on +the number of code units that will be needed (though the buffer may have to be +extended if replication is involved). */ + +needlen = (len+1) * code_unit_size; +if (dbuffer == NULL || needlen >= dbuffer_size) + { + while (needlen >= dbuffer_size) + { + if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2; + else dbuffer_size = needlen + 1; + } + dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); + if (dbuffer == NULL) + { + fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size); + exit(1); + } + } +SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */ + +/* Scan the data line, interpreting data escapes, and put the result into a +buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise, +in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier. +*/ + +while ((c = *p++) != 0) + { + int32_t i = 0; + size_t replen; + + /* ] may mark the end of a replicated sequence */ + + if (c == ']' && start_rep != NULL) + { + PCRE2_SIZE d; + long li; + char *endptr; + + if (*p++ != '{') + { + fprintf(outfile, "** Expected '{' after \\[....]\n"); + return PR_OK; + } + + li = strtol((const char *)p, &endptr, 10); + if (S32OVERFLOW(li)) + { + fprintf(outfile, "** Repeat count too large\n"); + return PR_OK; + } + + p = (uint8_t *)endptr; + if (*p++ != '}') + { + fprintf(outfile, "** Expected '}' after \\[...]{...\n"); + return PR_OK; + } + + i = (int32_t)li; + if (i-- <= 0) + { + fprintf(outfile, "** Zero or negative repeat not allowed\n"); + return PR_OK; + } + + replen = CAST8VAR(q) - start_rep; + if (PRIV(ckd_smul)(&d, replen, i)) + { + fprintf(outfile, "** Expanded content too large\n"); + return PR_OK; + } + needlen += d; + + if (needlen >= dbuffer_size) + { + size_t qoffset = CAST8VAR(q) - dbuffer; + size_t rep_offset = start_rep - dbuffer; + while (needlen >= dbuffer_size) + { + if (dbuffer_size < SIZE_MAX/2) dbuffer_size *= 2; + else dbuffer_size = needlen + 1; + } + dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); + if (dbuffer == NULL) + { + fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", + dbuffer_size); + exit(1); + } + SETCASTPTR(q, dbuffer + qoffset); + start_rep = dbuffer + rep_offset; + } + + while (i-- > 0) + { + memcpy(CAST8VAR(q), start_rep, replen); + SETPLUS(q, replen/code_unit_size); + } + + start_rep = NULL; + continue; + } + + /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input + set, do the fudge for setting the top bit. */ + + if (c != '\\' || subject_literal) + { + uint32_t topbit = 0; + if (test_mode == PCRE32_MODE && c == 0xff && *p != 0) + { + topbit = 0x80000000; + c = *p++; + } + if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) && + HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } + c |= topbit; + } + + /* Handle backslash escapes */ + + else switch ((c = *p++)) + { + case '\\': break; + case 'a': c = CHAR_BEL; break; + case 'b': c = '\b'; break; + case 'e': c = CHAR_ESC; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') + c = c * 8 + *p++ - '0'; + break; + + case 'o': + if (*p == '{') + { + uint8_t *pt = p; + c = 0; + for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) + { + if (++i == 12) + fprintf(outfile, "** Too many octal digits in \\o{...} item; " + "using only the first twelve.\n"); + else c = c * 8 + *pt - '0'; + } + if (*pt == '}') p = pt + 1; + else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); + } + break; + + case 'x': + if (*p == '{') + { + uint8_t *pt = p; + c = 0; + + /* We used to have "while (isxdigit(*(++pt)))" here, but it fails + when isxdigit() is a macro that refers to its argument more than + once. This is banned by the C Standard, but apparently happens in at + least one MacOS environment. */ + + for (pt++; isxdigit(*pt); pt++) + { + if (++i == 9) + fprintf(outfile, "** Too many hex digits in \\x{...} item; " + "using only the first eight.\n"); + else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); + } + if (*pt == '}') + { + p = pt + 1; + break; + } + /* Not correct form for \x{...}; fall through */ + } + + /* \x without {} always defines just one byte in 8-bit mode. This + allows UTF-8 characters to be constructed byte by byte, and also allows + invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode. + Otherwise, pass it down as data. */ + + c = 0; + while (i++ < 2 && isxdigit(*p)) + { + c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); + p++; + } +#if defined SUPPORT_PCRE2_8 + if (utf && (test_mode == PCRE8_MODE)) + { + *q8++ = c; + continue; + } +#endif + break; + + case 0: /* \ followed by EOF allows for an empty line */ + p--; + continue; + + case '=': /* \= terminates the data, starts modifiers */ + goto ENDSTRING; + + case '[': /* \[ introduces a replicated character sequence */ + if (start_rep != NULL) + { + fprintf(outfile, "** Nested replication is not supported\n"); + return PR_OK; + } + start_rep = CAST8VAR(q); + continue; + + default: + if (isalnum(c)) + { + fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); + return PR_OK; + } + } + + /* We now have a character value in c that may be greater than 255. + In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater + than 127 in UTF mode must have come from \x{...} or octal constructs + because values from \x.. get this far only in non-UTF mode. */ + +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) + { + if (utf) + { + if (c > 0x7fffffff) + { + fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " + "and so cannot be converted to UTF-8\n", c); + return PR_OK; + } + q8 += ord2utf8(c, q8); + } + else + { + if (c > 0xffu) + { + fprintf(outfile, "** Character \\x{%x} is greater than 255 " + "and UTF-8 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + *q8++ = (uint8_t)c; + } + } +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + if (utf) + { + if (c > 0x10ffffu) + { + fprintf(outfile, "** Failed: character \\x{%x} is greater than " + "0x10ffff and so cannot be converted to UTF-16\n", c); + return PR_OK; + } + else if (c >= 0x10000u) + { + c-= 0x10000u; + *q16++ = 0xD800 | (c >> 10); + *q16++ = 0xDC00 | (c & 0x3ff); + } + else + *q16++ = c; + } + else + { + if (c > 0xffffu) + { + fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " + "and UTF-16 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + + *q16++ = (uint16_t)c; + } + } +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + { + *q32++ = c; + } +#endif + } + +ENDSTRING: +SET(*q, 0); +len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */ +ulen = len/code_unit_size; /* Length in code units */ +arg_ulen = ulen; /* Value to use in match arg */ + +/* If the string was terminated by \= we must now interpret modifiers. */ + +if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) + return PR_OK; + +/* Setting substitute_{skip,fail} implies a substitute callout. */ + +if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0) + dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT; + +/* Check for mutually exclusive modifiers. At present, these are all in the +first control word. */ + +for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) + { + c = dat_datctl.control & exclusive_dat_controls[k]; + if (c != 0 && c != (c & (~c+1))) + { + show_controls(c, 0, "** Not allowed together:"); + fprintf(outfile, "\n"); + return PR_OK; + } + } + +if (pat_patctl.replacement[0] != 0) + { + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 && + (dat_datctl.control & CTL_NULLCONTEXT) != 0) + { + fprintf(outfile, "** Replacement callouts are not supported with null_context.\n"); + return PR_OK; + } + + if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) + fprintf(outfile, "** Ignored with replacement text: allcaptures\n"); + } + +/* Warn for modifiers that are ignored for DFA. */ + +if ((dat_datctl.control & CTL_DFA) != 0) + { + if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) + fprintf(outfile, "** Ignored for DFA matching: allcaptures\n"); + if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0) + fprintf(outfile, "** Ignored for DFA matching: heapframes_size\n"); + } + +/* We now have the subject in dbuffer, with len containing the byte length, and +ulen containing the code unit length, with a copy in arg_ulen for use in match +function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the +zero_terminate modifier is present). + +Move the data to the end of the buffer so that a read over the end can be +caught by valgrind or other means. If we have explicit valgrind support, mark +the unused start of the buffer unaddressable. If we are using the POSIX +interface, or testing zero-termination, we must include the terminating zero in +the usable data. */ + +c = code_unit_size * (((pat_patctl.control & CTL_POSIX) + + (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0); +pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); +#ifdef SUPPORT_VALGRIND + VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); +#endif + +/* Now pp points to the subject string, but if null_subject was specified, set +it to NULL to test PCRE2's behaviour. */ + +if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL; + +/* POSIX matching is only possible in 8-bit mode, and it does not support +timing or other fancy features. Some were checked at compile time, but we need +to check the match-time settings here. */ + +#ifdef SUPPORT_PCRE2_8 +if ((pat_patctl.control & CTL_POSIX) != 0) + { + int rc; + int eflags = 0; + regmatch_t *pmatch = NULL; + const char *msg = "** Ignored with POSIX interface:"; + + if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET) + prmsg(&msg, "callout_error"); + if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET) + prmsg(&msg, "callout_fail"); + if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0) + prmsg(&msg, "copy"); + if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0) + prmsg(&msg, "get"); + if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack"); + if (dat_datctl.offset != 0) prmsg(&msg, "offset"); + + if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0) + { + fprintf(outfile, "%s", msg); + show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); + msg = ""; + } + + if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 || + (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0) + { + show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, + dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg); + msg = ""; + } + + if (msg[0] == 0) fprintf(outfile, "\n"); + + if (dat_datctl.oveccount > 0) + { + pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount); + if (pmatch == NULL) + { + fprintf(outfile, "** Failed to get memory for recording matching " + "information (size set = %du)\n", dat_datctl.oveccount); + return PR_OK; + } + } + + if (dat_datctl.startend[0] != CFORE_UNSET) + { + pmatch[0].rm_so = dat_datctl.startend[0]; + pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)? + dat_datctl.startend[1] : len; + eflags |= REG_STARTEND; + } + + if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL; + if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL; + if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; + + rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags); + if (rc != 0) + { + (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); + fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8); + } + else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) + fprintf(outfile, "Matched with REG_NOSUB\n"); + else if (dat_datctl.oveccount == 0) + fprintf(outfile, "Matched without capture\n"); + else + { + size_t i, j; + size_t last_printed = (size_t)dat_datctl.oveccount; + for (i = 0; i < (size_t)dat_datctl.oveccount; i++) + { + if (pmatch[i].rm_so >= 0) + { + PCRE2_SIZE start = pmatch[i].rm_so; + PCRE2_SIZE end = pmatch[i].rm_eo; + for (j = last_printed + 1; j < i; j++) + fprintf(outfile, "%2d: \n", (int)j); + last_printed = i; + if (start > end) + { + start = pmatch[i].rm_eo; + end = pmatch[i].rm_so; + fprintf(outfile, "Start of matched string is beyond its end - " + "displaying from end to start.\n"); + } + fprintf(outfile, "%2d: ", (int)i); + PCHARSV(pp, start, end - start, utf, outfile); + fprintf(outfile, "\n"); + + if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) || + (dat_datctl.control & CTL_ALLAFTERTEXT) != 0) + { + fprintf(outfile, "%2d+ ", (int)i); + /* Note: don't use the start/end variables here because we want to + show the text from what is reported as the end. */ + PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile); + fprintf(outfile, "\n"); } + } + } + } + free(pmatch); + return PR_OK; + } +#endif /* SUPPORT_PCRE2_8 */ + + /* Handle matching via the native interface. Check for consistency of +modifiers. */ + +if (dat_datctl.startend[0] != CFORE_UNSET) + fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n"); + +/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA +matching, even if the JIT compiler was used. */ + +if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && + FLD(compiled_code, executable_jit) != NULL) + { + fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n"); + dat_datctl.control &= ~CTL_ALLUSEDTEXT; + } + +/* Handle passing the subject as zero-terminated. */ + +if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) + arg_ulen = PCRE2_ZERO_TERMINATED; + +/* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a +NULL context. */ + +use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)? + NULL : PTR(dat_context); + +/* Enable display of malloc/free if wanted. We can do this only if either the +pattern or the subject is processed with a context. */ + +show_memory = (dat_datctl.control & CTL_MEMORY) != 0; + +if (show_memory && + (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0) + fprintf(outfile, "** \\=memory requires either a pattern or a subject " + "context: ignored\n"); + +/* Create and assign a JIT stack if requested. */ + +if (dat_datctl.jitstack != 0) + { + if (dat_datctl.jitstack != jit_stack_size) + { + PCRE2_JIT_STACK_FREE(jit_stack); + PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL); + jit_stack_size = dat_datctl.jitstack; + } + PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack); + } + +/* Or de-assign */ + +else if (jit_stack != NULL) + { + PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL); + PCRE2_JIT_STACK_FREE(jit_stack); + jit_stack = NULL; + jit_stack_size = 0; + } + +/* When no JIT stack is assigned, we must ensure that there is a JIT callback +if we want to verify that JIT was actually used. */ + +if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) + { + PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL); + } + +/* Adjust match_data according to size of offsets required. A size of zero +causes a new match data block to be obtained that exactly fits the pattern. */ + +if (dat_datctl.oveccount == 0) + { + PCRE2_MATCH_DATA_FREE(match_data); + PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, + general_context); + PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data); + } +else if (dat_datctl.oveccount <= max_oveccount) + { + SETFLD(match_data, oveccount, dat_datctl.oveccount); + } +else + { + max_oveccount = dat_datctl.oveccount; + PCRE2_MATCH_DATA_FREE(match_data); + PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, general_context); + } + +if (CASTVAR(void *, match_data) == NULL) + { + fprintf(outfile, "** Failed to get memory for recording matching " + "information (size requested: %d)\n", dat_datctl.oveccount); + max_oveccount = 0; + return PR_OK; + } + +ovector = FLD(match_data, ovector); +PCRE2_GET_OVECTOR_COUNT(oveccount, match_data); + +/* Replacement processing is ignored for DFA matching. */ + +if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0) + { + fprintf(outfile, "** Ignored for DFA matching: replace\n"); + dat_datctl.replacement[0] = 0; + } + +/* If a replacement string is provided, call pcre2_substitute() instead of or +after one of the matching functions. First we have to convert the replacement +string to the appropriate width. */ + +if (dat_datctl.replacement[0] != 0) + { + int rc; + uint8_t *pr; + uint8_t rbuffer[REPLACE_BUFFSIZE]; + uint8_t nbuffer[REPLACE_BUFFSIZE]; + uint8_t *rbptr; + uint32_t xoptions; + uint32_t emoption; /* External match option */ + PCRE2_SIZE j, rlen, nsize, erroroffset; + BOOL badutf = FALSE; + +#ifdef SUPPORT_PCRE2_8 + uint8_t *r8 = NULL; +#endif +#ifdef SUPPORT_PCRE2_16 + uint16_t *r16 = NULL; +#endif +#ifdef SUPPORT_PCRE2_32 + uint32_t *r32 = NULL; +#endif + + /* Fill the ovector with junk to detect elements that do not get set + when they should be (relevant only when "allvector" is specified). */ + + for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET; + + if (timeitm) + fprintf(outfile, "** Timing is not supported with replace: ignored\n"); + + if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) + fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); + + /* Check for a test that does substitution after an initial external match. + If this is set, we run the external match, but leave the interpretation of + its output to pcre2_substitute(). */ + + emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 : + PCRE2_SUBSTITUTE_MATCHED; + + if (emoption != 0) + { + if ((pat_patctl.control & CTL_JITFAST) != 0) + { + PCRE2_JIT_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, + dat_datctl.options, match_data, use_dat_context); + } + else + { + PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, + dat_datctl.options, match_data, use_dat_context); + } + } + + xoptions = emoption | + (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : + PCRE2_SUBSTITUTE_GLOBAL) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 : + PCRE2_SUBSTITUTE_EXTENDED) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 : + PCRE2_SUBSTITUTE_LITERAL) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 : + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 : + PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 : + PCRE2_SUBSTITUTE_UNKNOWN_UNSET) | + (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 : + PCRE2_SUBSTITUTE_UNSET_EMPTY); + + SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */ + pr = dat_datctl.replacement; + + /* If the replacement starts with '[]' we interpret that as length + value for the replacement buffer. */ + + nsize = REPLACE_BUFFSIZE/code_unit_size; + if (*pr == '[') + { + PCRE2_SIZE n = 0; + while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0; + if (*pr++ != ']') + { + fprintf(outfile, "Bad buffer size in replacement string\n"); + return PR_OK; + } + if (n > nsize) + { + fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too " + "large (max %" SIZ_FORM ")\n", n, nsize); + return PR_OK; + } + nsize = n; + } + + /* Now copy the replacement string to a buffer of the appropriate width. No + escape processing is done for replacements. In UTF mode, check for an invalid + UTF-8 input string, and if it is invalid, just copy its code units without + UTF interpretation. This provides a means of checking that an invalid string + is detected. Otherwise, UTF-8 can be used to include wide characters in a + replacement. */ + + if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset); + + /* Not UTF or invalid UTF-8: just copy the code units. */ + + if (!utf || badutf) + { + while ((c = *pr++) != 0) + { +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) *r8++ = c; +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) *r16++ = c; +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) *r32++ = c; +#endif + } + } + + /* Valid UTF-8 replacement string */ + + else while ((c = *pr++) != 0) + { + if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); } + +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8); +#endif + +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + if (c >= 0x10000u) + { + c-= 0x10000u; + *r16++ = 0xD800 | (c >> 10); + *r16++ = 0xDC00 | (c & 0x3ff); + } + else *r16++ = c; + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) *r32++ = c; +#endif + } + + SET(*r, 0); + if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) + rlen = PCRE2_ZERO_TERMINATED; + else + rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; + + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0) + { + PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL); + } + else + { + PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */ + } + + /* There is a special option to set the replacement to NULL in order to test + that case. */ + + rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL; + + PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, + dat_datctl.options|xoptions, match_data, use_dat_context, + rbptr, rlen, nbuffer, &nsize); + + if (rc < 0) + { + fprintf(outfile, "Failed: error %d", rc); + if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) + fprintf(outfile, " at offset %ld in replacement", (long int)nsize); + fprintf(outfile, ": "); + if (!print_error_message(rc, "", "")) return PR_ABEND; + if (rc == PCRE2_ERROR_NOMEMORY && + (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0) + fprintf(outfile, ": %ld code units are needed", (long int)nsize); + } + else + { + fprintf(outfile, "%2d: ", rc); + PCHARSV(nbuffer, 0, nsize, utf, outfile); + } + + fprintf(outfile, "\n"); + show_memory = FALSE; + + /* Show final ovector contents and resulting heapframe size if requested. */ + + if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) + show_ovector(ovector, oveccount); + + if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 && + (dat_datctl.control & CTL_DFA) == 0) + show_heapframes_size(); + + return PR_OK; + } /* End of substitution handling */ + +/* When a replacement string is not provided, run a loop for global matching +with one of the basic matching functions. For altglobal (or first time round +the loop), set an "unset" value for the previous match info. */ + +ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; + +for (gmatched = 0;; gmatched++) + { + PCRE2_SIZE j; + int capcount; + + /* Fill the ovector with junk to detect elements that do not get set + when they should be. */ + + for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET; + + /* When matching is via pcre2_match(), we will detect the use of JIT via the + stack callback function. */ + + jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0; + + /* Do timing if required. */ + + if (timeitm > 0) + { + int i; + clock_t start_time, time_taken; + + if ((dat_datctl.control & CTL_DFA) != 0) + { + if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0) + { + fprintf(outfile, "Timing DFA restarts is not supported\n"); + return PR_OK; + } + if (dfa_workspace == NULL) + dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); + start_time = clock(); + for (i = 0; i < timeitm; i++) + { + PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen, + dat_datctl.offset, dat_datctl.options | g_notempty, match_data, + use_dat_context, dfa_workspace, DFA_WS_DIMENSION); + } + } + + else if ((pat_patctl.control & CTL_JITFAST) != 0) + { + start_time = clock(); + for (i = 0; i < timeitm; i++) + { + PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, + dat_datctl.offset, dat_datctl.options | g_notempty, match_data, + use_dat_context); + } + } + + else + { + start_time = clock(); + for (i = 0; i < timeitm; i++) + { + PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, + dat_datctl.offset, dat_datctl.options | g_notempty, match_data, + use_dat_context); + } + } + total_match_time += (time_taken = clock() - start_time); + fprintf(outfile, "Match time %7.4f microseconds\n", + ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeitm); + } + + /* Find the heap, match and depth limits if requested. The depth and heap + limits are not relevant for JIT. The return from check_match_limit() is the + return from the final call to pcre2_match() or pcre2_dfa_match(). */ + + if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0) + { + capcount = 0; /* This stops compiler warnings */ + + if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 && + (FLD(compiled_code, executable_jit) == NULL || + (dat_datctl.options & PCRE2_NO_JIT) != 0)) + { + (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap"); + } + + capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, + "match"); + + if (FLD(compiled_code, executable_jit) == NULL || + (dat_datctl.options & PCRE2_NO_JIT) != 0 || + (dat_datctl.control & CTL_DFA) != 0) + { + capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT, + "depth"); + } + + if (capcount == 0) + { + fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); + capcount = dat_datctl.oveccount; + } + } + + /* Otherwise just run a single match, setting up a callout if required (the + default). There is a copy of the pattern in pbuffer8 for use by callouts. */ + + else + { + if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0) + { + PCRE2_SET_CALLOUT(dat_context, callout_function, + (void *)(&dat_datctl.callout_data)); + first_callout = TRUE; + last_callout_mark = NULL; + callout_count = 0; + } + else + { + PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ + } + + /* Run a single DFA or NFA match. */ + + if ((dat_datctl.control & CTL_DFA) != 0) + { + if (dfa_workspace == NULL) + dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); + if (dfa_matched++ == 0) + dfa_workspace[0] = -1; /* To catch bad restart */ + PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen, + dat_datctl.offset, dat_datctl.options | g_notempty, match_data, + use_dat_context, dfa_workspace, DFA_WS_DIMENSION); + if (capcount == 0) + { + fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); + capcount = dat_datctl.oveccount; + } + } + else + { + if ((pat_patctl.control & CTL_JITFAST) != 0) + PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset, + dat_datctl.options | g_notempty, match_data, use_dat_context); + else + PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset, + dat_datctl.options | g_notempty, match_data, use_dat_context); + if (capcount == 0) + { + fprintf(outfile, "Matched, but too many substrings\n"); + capcount = dat_datctl.oveccount; + } + } + } + + /* The result of the match is now in capcount. First handle a successful + match. If pp was forced to be NULL (to test NULL handling) it will have been + treated as an empty string if the length was zero. So re-create that for + outputting. */ + + if (capcount >= 0) + { + int i; + + if (pp == NULL) pp = (uint8_t *)""; + + if (capcount > (int)oveccount) /* Check for lunatic return value */ + { + fprintf(outfile, + "** PCRE2 error: returned count %d is too big for ovector count %d\n", + capcount, oveccount); + capcount = oveccount; + if ((dat_datctl.control & CTL_ANYGLOB) != 0) + { + fprintf(outfile, "** Global loop abandoned\n"); + dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ + } + } + + /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they + should be, but not for fast JIT, where it isn't supported. */ + + if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 && + (pat_patctl.control & CTL_JITFAST) == 0) + { + if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0) + fprintf(outfile, + "** PCRE2 error: flag not set after copy_matched_subject\n"); + + if (CASTFLD(void *, match_data, subject) == pp) + fprintf(outfile, + "** PCRE2 error: copy_matched_subject has not copied\n"); + + if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0) + fprintf(outfile, + "** PCRE2 error: copy_matched_subject mismatch\n"); + } + + /* If this is not the first time round a global loop, check that the + returned string has changed. If it has not, check for an empty string match + at different starting offset from the previous match. This is a failed test + retry for null-matching patterns that don't match at their starting offset, + for example /(?<=\G.)/. A repeated match at the same point is not such a + pattern, and must be discarded, and we then proceed to seek a non-null + match at the current point. For any other repeated match, there is a bug + somewhere and we must break the loop because it will go on for ever. We + know that there are always at least two elements in the ovector. */ + + if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) + { + if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset) + { + g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + ovecsave[2] = dat_datctl.offset; + continue; /* Back to the top of the loop */ + } + fprintf(outfile, + "** PCRE2 error: global repeat returned the same string as previous\n"); + fprintf(outfile, "** Global loop abandoned\n"); + dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ + } + + /* "allcaptures" requests showing of all captures in the pattern, to check + unset ones at the end. It may be set on the pattern or the data. Implement + by setting capcount to the maximum. This is not relevant for DFA matching, + so ignore it (warning given above). */ + + if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES) + { + capcount = maxcapcount + 1; /* Allow for full match */ + if (capcount > (int)oveccount) capcount = oveccount; + } + + /* "allvector" request showing the entire ovector. */ + + if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount; + + /* Output the captured substrings. Note that, for the matched string, + the use of \K in an assertion can make the start later than the end. */ + + for (i = 0; i < 2*capcount; i += 2) + { + PCRE2_SIZE lleft, lmiddle, lright; + PCRE2_SIZE start = ovector[i]; + PCRE2_SIZE end = ovector[i+1]; + + if (start > end) + { + start = ovector[i+1]; + end = ovector[i]; + fprintf(outfile, "Start of matched string is beyond its end - " + "displaying from end to start.\n"); + } + + fprintf(outfile, "%2d: ", i/2); + + /* Check for an unset group */ + + if (start == PCRE2_UNSET && end == PCRE2_UNSET) + { + fprintf(outfile, "\n"); + continue; + } + + /* Check for silly offsets, in particular, values that have not been + set when they should have been. However, if we are past the end of the + captures for this pattern ("allvector" causes this), or if we are DFA + matching, it isn't an error if the entry is unchanged. */ + + if (start > ulen || end > ulen) + { + if (((dat_datctl.control & CTL_DFA) != 0 || + i >= (int)(2*maxcapcount + 2)) && + start == JUNK_OFFSET && end == JUNK_OFFSET) + fprintf(outfile, "\n"); + else + fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", + (unsigned long int)start, (unsigned long int)end); + continue; + } + + /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with + JIT, it is disabled above, with a comment.) When the match is done by the + interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is + set, and if the leftmost consulted character is before the start of the + match or the rightmost consulted character is past the end of the match, + we want to show all consulted characters for the main matched string, and + indicate which were lookarounds. */ + + if (i == 0) + { + BOOL showallused; + PCRE2_SIZE leftchar, rightchar; + + if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0) + { + leftchar = FLD(match_data, leftchar); + rightchar = FLD(match_data, rightchar); + showallused = i == 0 && (leftchar < start || rightchar > end); + } + else showallused = FALSE; + + if (showallused) + { + PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); + PCHARS(lmiddle, pp, start, end - start, utf, outfile); + PCHARS(lright, pp, end, rightchar - end, utf, outfile); + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + fprintf(outfile, "\n "); + for (j = 0; j < lleft; j++) fprintf(outfile, "<"); + for (j = 0; j < lmiddle; j++) fprintf(outfile, " "); + for (j = 0; j < lright; j++) fprintf(outfile, ">"); + } + + /* When a pattern contains \K, the start of match position may be + different to the start of the matched string. When this is the case, + show it when requested. */ + + else if ((dat_datctl.control & CTL_STARTCHAR) != 0) + { + PCRE2_SIZE startchar; + PCRE2_GET_STARTCHAR(startchar, match_data); + PCHARS(lleft, pp, startchar, start - startchar, utf, outfile); + PCHARSV(pp, start, end - start, utf, outfile); + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + if (startchar != start) + { + fprintf(outfile, "\n "); + for (j = 0; j < lleft; j++) fprintf(outfile, "^"); + } + } + + /* Otherwise, just show the matched string. */ + + else + { + PCHARSV(pp, start, end - start, utf, outfile); + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + } + } + + /* Not the main matched string. Just show it unadorned. */ + + else + { + PCHARSV(pp, start, end - start, utf, outfile); + } + + fprintf(outfile, "\n"); + + /* Note: don't use the start/end variables here because we want to + show the text from what is reported as the end. */ + + if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 || + (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0)) + { + fprintf(outfile, "%2d+ ", i/2); + PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile); + fprintf(outfile, "\n"); + } + } + + /* Output (*MARK) data if requested */ + + if ((dat_datctl.control & CTL_MARK) != 0 && + TESTFLD(match_data, mark, !=, NULL)) + { + fprintf(outfile, "MK: "); + PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile); + fprintf(outfile, "\n"); + } + + /* Process copy/get strings */ + + if (!copy_and_get(utf, capcount)) return PR_ABEND; + + } /* End of handling a successful match */ + + /* There was a partial match. The value of ovector[0] is the bumpalong point, + that is, startchar, not any \K point that might have been passed. When JIT is + not in use, "allusedtext" may be set, in which case we indicate the leftmost + consulted character. */ + + else if (capcount == PCRE2_ERROR_PARTIAL) + { + PCRE2_SIZE leftchar; + int backlength; + int rubriclength = 0; + + if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0) + { + leftchar = FLD(match_data, leftchar); + } + else leftchar = ovector[0]; + + fprintf(outfile, "Partial match"); + if ((dat_datctl.control & CTL_MARK) != 0 && + TESTFLD(match_data, mark, !=, NULL)) + { + fprintf(outfile, ", mark="); + PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf, + outfile); + rubriclength += 7; + } + fprintf(outfile, ": "); + rubriclength += 15; + + PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile); + PCHARSV(pp, ovector[0], ovector[1] - ovector[0], utf, outfile); + + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + fprintf(outfile, "\n"); + + if (backlength != 0) + { + int i; + for (i = 0; i < rubriclength; i++) fprintf(outfile, " "); + for (i = 0; i < backlength; i++) fprintf(outfile, "<"); + fprintf(outfile, "\n"); + } + + if (ulen != ovector[1]) + fprintf(outfile, "** ovector[1] is not equal to the subject length: " + "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen); + + /* Process copy/get strings */ + + if (!copy_and_get(utf, 1)) return PR_ABEND; + + /* "allvector" outputs the entire vector */ + + if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) + show_ovector(ovector, oveccount); + + break; /* Out of the /g loop */ + } /* End of handling partial match */ + + /* Failed to match. If this is a /g or /G loop, we might previously have + set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match. + If that is the case, this is not necessarily the end. We want to advance the + start offset, and continue. We won't be at the end of the string - that was + checked before setting g_notempty. We achieve the effect by pretending that a + single character was matched. + + Complication arises in the case when the newline convention is "any", "crlf", + or "anycrlf". If the previous match was at the end of a line terminated by + CRLF, an advance of one character just passes the CR, whereas we should + prefer the longer newline sequence, as does the code in pcre2_match(). + + Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one + character, not one byte. */ + + else if (g_notempty != 0) /* There was a previous null match */ + { + uint16_t nl = FLD(compiled_code, newline_convention); + PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */ + PCRE2_SIZE end_offset = start_offset + 1; + + if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY || + nl == PCRE2_NEWLINE_ANYCRLF) && + start_offset < ulen - 1 && + CODE_UNIT(pp, start_offset) == '\r' && + CODE_UNIT(pp, end_offset) == '\n') + end_offset++; + + else if (utf && test_mode != PCRE32_MODE) + { + if (test_mode == PCRE8_MODE) + { + for (; end_offset < ulen; end_offset++) + if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; + } + else /* 16-bit mode */ + { + for (; end_offset < ulen; end_offset++) + if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; + } + } + + SETFLDVEC(match_data, ovector, 0, start_offset); + SETFLDVEC(match_data, ovector, 1, end_offset); + } /* End of handling null match in a global loop */ + + /* A "normal" match failure. There will be a negative error number in + capcount. */ + + else + { + switch(capcount) + { + case PCRE2_ERROR_NOMATCH: + if (gmatched == 0) + { + fprintf(outfile, "No match"); + if ((dat_datctl.control & CTL_MARK) != 0 && + TESTFLD(match_data, mark, !=, NULL)) + { + fprintf(outfile, ", mark = "); + PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile); + } + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + fprintf(outfile, "\n"); + + /* "allvector" outputs the entire vector */ + + if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) + show_ovector(ovector, oveccount); + } + break; + + case PCRE2_ERROR_BADUTFOFFSET: + fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode); + break; + + default: + fprintf(outfile, "Failed: error %d: ", capcount); + if (!print_error_message(capcount, "", "")) return PR_ABEND; + if (capcount <= PCRE2_ERROR_UTF8_ERR1 && + capcount >= PCRE2_ERROR_UTF32_ERR2) + { + PCRE2_SIZE startchar; + PCRE2_GET_STARTCHAR(startchar, match_data); + fprintf(outfile, " at offset %" SIZ_FORM, startchar); + } + fprintf(outfile, "\n"); + break; + } + + break; /* Out of the /g loop */ + } /* End of failed match handling */ + + /* Control reaches here in two circumstances: (a) after a match, and (b) + after a non-match that immediately followed a match on an empty string when + doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and + PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match + of one character. So effectively we get here only after a match. If we + are not doing a global search, we are done. */ + + if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else + { + PCRE2_SIZE match_offset = FLD(match_data, ovector)[0]; + PCRE2_SIZE end_offset = FLD(match_data, ovector)[1]; + + /* We must now set up for the next iteration of a global search. If we have + matched an empty string, first check to see if we are at the end of the + subject. If so, the loop is over. Otherwise, mimic what Perl's /g option + does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again + at the same point. If this fails it will be picked up above, where a fake + match is set up so that at this point we advance to the next character. + + However, in order to cope with patterns that never match at their starting + offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater + than the starting offset. This means there will be a retry with the + starting offset at the match offset. If this returns the same match again, + it is picked up above and ignored, and the special action is then taken. */ + + if (match_offset == end_offset) + { + if (end_offset == ulen) break; /* End of subject */ + if (match_offset <= dat_datctl.offset) + g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /* However, even after matching a non-empty string, there is still one + tricky case. If a pattern contains \K within a lookbehind assertion at the + start, the end of the matched string can be at the offset where the match + started. In the case of a normal /g iteration without special action, this + leads to a loop that keeps on returning the same substring. The loop would + be caught above, but we really want to move on to the next match. */ + + else + { + g_notempty = 0; /* Set for a "normal" repeat */ + if ((dat_datctl.control & CTL_GLOBAL) != 0) + { + PCRE2_SIZE startchar; + PCRE2_GET_STARTCHAR(startchar, match_data); + if (end_offset <= startchar) + { + if (startchar >= ulen) break; /* End of subject */ + end_offset = startchar + 1; + if (utf && test_mode != PCRE32_MODE) + { + if (test_mode == PCRE8_MODE) + { + for (; end_offset < ulen; end_offset++) + if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; + } + else /* 16-bit mode */ + { + for (; end_offset < ulen; end_offset++) + if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; + } + } + } + } + } + + /* For a normal global (/g) iteration, save the current ovector[0,1] and + the starting offset so that we can check that they do change each time. + Otherwise a matching bug that returns the same string causes an infinite + loop. It has happened! Then update the start offset, leaving other + parameters alone. */ + + if ((dat_datctl.control & CTL_GLOBAL) != 0) + { + ovecsave[0] = ovector[0]; + ovecsave[1] = ovector[1]; + ovecsave[2] = dat_datctl.offset; + dat_datctl.offset = end_offset; + } + + /* For altglobal, just update the pointer and length. */ + + else + { + pp += end_offset * code_unit_size; + len -= end_offset * code_unit_size; + ulen -= end_offset; + if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset; + } + } + } /* End of global loop */ + +/* All matching is done; show the resulting heapframe size if requested. */ + +if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0 && + (dat_datctl.control & CTL_DFA) == 0) + show_heapframes_size(); + +show_memory = FALSE; +return PR_OK; +} + + + + +/************************************************* +* Print PCRE2 version * +*************************************************/ + +static void +print_version(FILE *f, BOOL include_mode) +{ +char buf[16]; +VERSION_TYPE *vp; +fprintf(f, "PCRE2 version "); +for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp); +if (include_mode) + { + sprintf(buf, "%d-bit", test_mode); + fprintf(f, " (%s)", buf); + } +fprintf(f, "\n"); +} + + + +/************************************************* +* Print Unicode version * +*************************************************/ + +static void +print_unicode_version(FILE *f) +{ +VERSION_TYPE *vp; +fprintf(f, "Unicode version "); +for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp); +} + + + +/************************************************* +* Print JIT target * +*************************************************/ + +static void +print_jit_target(FILE *f) +{ +VERSION_TYPE *vp; +for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp); +} + + + +/************************************************* +* Print newline configuration * +*************************************************/ + +/* Output is always to stdout. + +Arguments: + rc the return code from PCRE2_CONFIG_NEWLINE + isc TRUE if called from "-C newline" +Returns: nothing +*/ + +static void +print_newline_config(uint32_t optval, BOOL isc) +{ +if (!isc) printf(" Default newline sequence is "); +if (optval < sizeof(newlines)/sizeof(char *)) + printf("%s\n", newlines[optval]); +else + printf("a non-standard value: %d\n", optval); +} + + + +/************************************************* +* Usage function * +*************************************************/ + +static void +usage(void) +{ +printf("Usage: pcre2test [options] [ []]\n\n"); +printf("Input and output default to stdin and stdout.\n"); +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +printf("If input is a terminal, readline() is used to read from it.\n"); +#else +printf("This version of pcre2test is not linked with readline().\n"); +#endif +printf("\nOptions:\n"); +#ifdef SUPPORT_PCRE2_8 +printf(" -8 use the 8-bit library\n"); +#endif +#ifdef SUPPORT_PCRE2_16 +printf(" -16 use the 16-bit library\n"); +#endif +#ifdef SUPPORT_PCRE2_32 +printf(" -32 use the 32-bit library\n"); +#endif +printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n"); +printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n"); +printf(" -b set default pattern modifier 'fullbincode'\n"); +printf(" -C show PCRE2 compile-time options and exit\n"); +printf(" -C arg show a specific compile-time option and exit with its\n"); +printf(" value if numeric (else 0). The arg can be:\n"); +printf(" backslash-C use of \\C is enabled [0, 1]\n"); +printf(" bsr \\R type [ANYCRLF, ANY]\n"); +printf(" ebcdic compiled for EBCDIC character code [0,1]\n"); +printf(" ebcdic-nl NL code if compiled for EBCDIC\n"); +printf(" jit just-in-time compiler supported [0, 1]\n"); +printf(" linksize internal link size [2, 3, 4]\n"); +printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n"); +printf(" pcre2-8 8 bit library support enabled [0, 1]\n"); +printf(" pcre2-16 16 bit library support enabled [0, 1]\n"); +printf(" pcre2-32 32 bit library support enabled [0, 1]\n"); +printf(" unicode Unicode and UTF support enabled [0, 1]\n"); +printf(" -d set default pattern modifier 'debug'\n"); +printf(" -dfa set default subject modifier 'dfa'\n"); +printf(" -error show messages for error numbers, then exit\n"); +printf(" -help show usage information\n"); +printf(" -i set default pattern modifier 'info'\n"); +printf(" -jit set default pattern modifier 'jit'\n"); +printf(" -jitfast set default pattern modifier 'jitfast'\n"); +printf(" -jitverify set default pattern modifier 'jitverify'\n"); +printf(" -LM list pattern and subject modifiers, then exit\n"); +printf(" -LP list non-script properties, then exit\n"); +printf(" -LS list supported scripts, then exit\n"); +printf(" -q quiet: do not output PCRE2 version number at start\n"); +printf(" -pattern set default pattern modifier fields\n"); +printf(" -subject set default subject modifier fields\n"); +printf(" -S set stack size to mebibytes\n"); +printf(" -t [] time compilation and execution, repeating times\n"); +printf(" -tm [] time execution (matching) only, repeating times\n"); +printf(" -T same as -t, but show total times at the end\n"); +printf(" -TM same as -tm, but show total time at the end\n"); +printf(" -v|--version show PCRE2 version and exit\n"); +} + + + +/************************************************* +* Handle -C option * +*************************************************/ + +/* This option outputs configuration options and sets an appropriate return +code when asked for a single option. The code is abstracted into a separate +function because of its size. Use whichever pcre2_config() function is +available. + +Argument: an option name or NULL +Returns: the return code +*/ + +static int +c_option(const char *arg) +{ +uint32_t optval; +unsigned int i = COPTLISTCOUNT; +int yield = 0; + +if (arg != NULL && arg[0] != CHAR_MINUS) + { + for (i = 0; i < COPTLISTCOUNT; i++) + if (strcmp(arg, coptlist[i].name) == 0) break; + + if (i >= COPTLISTCOUNT) + { + fprintf(stderr, "** Unknown -C option '%s'\n", arg); + return 0; + } + + switch (coptlist[i].type) + { + case CONF_BSR: + (void)PCRE2_CONFIG(coptlist[i].value, &optval); + printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY"); + break; + + case CONF_FIX: + yield = coptlist[i].value; + printf("%d\n", yield); + break; + + case CONF_FIZ: + optval = coptlist[i].value; + printf("%d\n", optval); + break; + + case CONF_INT: + (void)PCRE2_CONFIG(coptlist[i].value, &yield); + printf("%d\n", yield); + break; + + case CONF_NL: + (void)PCRE2_CONFIG(coptlist[i].value, &optval); + print_newline_config(optval, TRUE); + break; + } + +/* For VMS, return the value by setting a symbol, for certain values only. This +is contributed code which the PCRE2 developers have no means of testing. */ + +#ifdef __VMS + +/* This is the original code provided by the first VMS contributor. */ +#ifdef NEVER + if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT) + { + char ucname[16]; + strcpy(ucname, coptlist[i].name); + for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]]; + vms_setsymbol(ucname, 0, optval); + } +#endif + +/* This is the new code, provided by a second VMS contributor. */ + + if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT) + { + char nam_buf[22], val_buf[4]; + $DESCRIPTOR(nam, nam_buf); + $DESCRIPTOR(val, val_buf); + + strcpy(nam_buf, coptlist[i].name); + nam.dsc$w_length = strlen(nam_buf); + sprintf(val_buf, "%d", yield); + val.dsc$w_length = strlen(val_buf); + lib$set_symbol(&nam, &val); + } +#endif /* __VMS */ + + return yield; + } + +/* No argument for -C: output all configuration information. */ + +print_version(stdout, FALSE); +printf("Compiled with\n"); + +#ifdef EBCDIC +printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); +#if defined NATIVE_ZOS +printf(" EBCDIC code page %s or similar\n", pcrz_cpversion()); +#endif +#endif + +(void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval); +if (optval & 1) printf(" 8-bit support\n"); +if (optval & 2) printf(" 16-bit support\n"); +if (optval & 4) printf(" 32-bit support\n"); + +#ifdef SUPPORT_VALGRIND +printf(" Valgrind support\n"); +#endif + +(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval); +if (optval != 0) + { + printf(" UTF and UCP support ("); + print_unicode_version(stdout); + printf(")\n"); + } +else printf(" No Unicode support\n"); + +(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval); +if (optval != 0) + { + printf(" Just-in-time compiler support: "); + print_jit_target(stdout); + printf("\n"); + } +else + { + printf(" No just-in-time compiler support\n"); + } + +(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval); +print_newline_config(optval, FALSE); +(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval); +printf(" \\R matches %s\n", + (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" : + "all Unicode newlines"); +(void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval); +printf(" \\C is %ssupported\n", optval? "not ":""); +(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval); +printf(" Internal link size = %d\n", optval); +(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval); +printf(" Parentheses nest limit = %d\n", optval); +(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval); +printf(" Default heap limit = %d kibibytes\n", optval); +(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval); +printf(" Default match limit = %d\n", optval); +(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval); +printf(" Default depth limit = %d\n", optval); + +#if defined SUPPORT_LIBREADLINE +printf(" pcre2test has libreadline support\n"); +#elif defined SUPPORT_LIBEDIT +printf(" pcre2test has libedit support\n"); +#else +printf(" pcre2test has neither libreadline nor libedit support\n"); +#endif + +return 0; +} + + +/************************************************* +* Format one property/script list item * +*************************************************/ + +#ifdef SUPPORT_UNICODE +static void +format_list_item(int16_t *ff, char *buff, BOOL isscript) +{ +int count; +int maxi = 0; +const char *maxs = ""; +size_t max = 0; + +for (count = 0; ff[count] >= 0; count++) {} + +/* Find the name to put first. For scripts, any 3-character name is chosen. +For non-scripts, or if there is no 3-character name, take the longest. */ + +for (int i = 0; ff[i] >= 0; i++) + { + const char *s = PRIV(utt_names) + ff[i]; + size_t len = strlen(s); + if (isscript && len == 3) + { + maxi = i; + max = len; + maxs = s; + break; + } + else if (len > max) + { + max = len; + maxi = i; + maxs = s; + } + } + +strcpy(buff, maxs); +buff += max; + +if (count > 1) + { + const char *sep = " ("; + for (int i = 0; i < count; i++) + { + if (i == maxi) continue; + buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]); + sep = ", "; + } + (void)sprintf(buff, ")"); + } +} +#endif /* SUPPORT_UNICODE */ + + + +/************************************************* +* Display scripts or properties * +*************************************************/ + +#define MAX_SYNONYMS 5 + +static void +display_properties(BOOL wantscripts) +{ +#ifndef SUPPORT_UNICODE +(void)wantscripts; +printf("** This version of PCRE2 was compiled without Unicode support.\n"); +#else + +uint16_t seentypes[1024]; +uint16_t seenvalues[1024]; +int seencount = 0; +int16_t found[256][MAX_SYNONYMS + 1]; +int fc = 0; +int colwidth = 40; +int n = wantscripts? ucp_Script_Count : ucp_Bprop_Count; + +for (size_t i = 0; i < PRIV(utt_size); i++) + { + int k; + int m = 0; + int16_t *fv; + const ucp_type_table *t = PRIV(utt) + i; + unsigned int value = t->value; + + if (wantscripts) + { + if (t->type != PT_SC && t->type != PT_SCX) continue; + } + else + { + if (t->type != PT_BOOL) continue; + } + + for (k = 0; k < seencount; k++) + { + if (t->type == seentypes[k] && t->value == seenvalues[k]) break; + } + if (k < seencount) continue; + + seentypes[seencount] = t->type; + seenvalues[seencount++] = t->value; + + fv = found[fc++]; + fv[m++] = t->name_offset; + + for (size_t j = i + 1; j < PRIV(utt_size); j++) + { + const ucp_type_table *tt = PRIV(utt) + j; + if (tt->type != t->type || tt->value != value) continue; + if (m >= MAX_SYNONYMS) + printf("** Too many synonyms: %s ignored\n", + PRIV(utt_names) + tt->name_offset); + else fv[m++] = tt->name_offset; + } + + fv[m] = -1; + } + +printf("-------------------------- SUPPORTED %s --------------------------\n\n", + wantscripts? "SCRIPTS" : "PROPERTIES"); + +if (!wantscripts) printf( +"This release of PCRE2 supports Unicode's general category properties such\n" +"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n" +"and the following binary (yes/no) properties:\n\n"); + + +for (int k = 0; k < (n+1)/2; k++) + { + int x; + char buff1[128]; + char buff2[128]; + + format_list_item(found[k], buff1, wantscripts); + x = k + (n+1)/2; + if (x < n) format_list_item(found[x], buff2, wantscripts); + else buff2[0] = 0; + + x = printf("%s", buff1); + while (x++ < colwidth) printf(" "); + printf("%s\n", buff2); + } + +#endif /* SUPPORT_UNICODE */ +} + + + +/************************************************* +* Display one modifier * +*************************************************/ + +static void +display_one_modifier(modstruct *m, BOOL for_pattern) +{ +uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))? + '*' : ' '; +printf("%c%s", c, m->name); +for (size_t i = 0; i < C1MODLISTCOUNT; i++) + { + if (strcmp(m->name, c1modlist[i].fullname) == 0) + printf(" (%c)", c1modlist[i].onechar); + } +} + + + +/************************************************* +* Display pattern or subject modifiers * +*************************************************/ + +/* In order to print in two columns, first scan without printing to get a list +of the modifiers that are required. + +Arguments: + for_pattern TRUE for pattern modifiers, FALSE for subject modifiers + title string to be used in title + +Returns: nothing +*/ + +static void +display_selected_modifiers(BOOL for_pattern, const char *title) +{ +uint32_t i, j; +uint32_t n = 0; +uint32_t list[MODLISTCOUNT]; +uint32_t extra[MODLISTCOUNT]; + +for (i = 0; i < MODLISTCOUNT; i++) + { + BOOL is_pattern = TRUE; + modstruct *m = modlist + i; + + switch (m->which) + { + case MOD_CTC: /* Compile context */ + case MOD_PAT: /* Pattern */ + case MOD_PATP: /* Pattern, OK for Perl-compatible test */ + break; + + /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect + subjects, but can be given with a pattern. We list them as subject + modifiers, but marked with an asterisk.*/ + + case MOD_CTM: /* Match context */ + case MOD_DAT: /* Subject line */ + case MOD_DATP: /* Subject line, OK for Perl-compatible test */ + case MOD_PND: /* As PD, but not default pattern */ + case MOD_PNDP: /* As PND, OK for Perl-compatible test */ + is_pattern = FALSE; + break; + + default: printf("** Unknown type for modifier '%s'\n", m->name); + /* Fall through */ + case MOD_PD: /* Pattern or subject */ + case MOD_PDP: /* As PD, OK for Perl-compatible test */ + is_pattern = for_pattern; + break; + } + + if (for_pattern == is_pattern) + { + extra[n] = 0; + for (size_t k = 0; k < C1MODLISTCOUNT; k++) + { + if (strcmp(m->name, c1modlist[k].fullname) == 0) + { + extra[n] += 4; + break; + } + } + list[n++] = i; + } + } + +/* Now print from the list in two columns. */ + +printf("-------------- %s MODIFIERS --------------\n", title); + +for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++) + { + modstruct *m = modlist + list[i]; + display_one_modifier(m, for_pattern); + if (j < n) + { + uint32_t k = 27 - strlen(m->name) - extra[i]; + while (k-- > 0) printf(" "); + display_one_modifier(modlist + list[j], for_pattern); + } + printf("\n"); + } +} + + + +/************************************************* +* Display the list of modifiers * +*************************************************/ + +static void +display_modifiers(void) +{ +printf( + "An asterisk on a subject modifier means that it may be given on a pattern\n" + "line, in order to apply to all subjects matched by that pattern. Modifiers\n" + "that are listed for both patterns and subjects have different effects in\n" + "each case.\n\n"); +display_selected_modifiers(TRUE, "PATTERN"); +printf("\n"); +display_selected_modifiers(FALSE, "SUBJECT"); +} + + + +/************************************************* +* Main Program * +*************************************************/ + +int +main(int argc, char **argv) +{ +uint32_t temp; +uint32_t yield = 0; +uint32_t op = 1; +BOOL notdone = TRUE; +BOOL quiet = FALSE; +BOOL showtotaltimes = FALSE; +BOOL skipping = FALSE; +char *arg_subject = NULL; +char *arg_pattern = NULL; +char *arg_error = NULL; + +/* The offsets to the options and control bits fields of the pattern and data +control blocks must be the same so that common options and controls such as +"anchored" or "memory" can work for either of them from a single table entry. +We cannot test this till runtime because "offsetof" does not work in the +preprocessor. */ + +if (PO(options) != DO(options) || PO(control) != DO(control) || + PO(control2) != DO(control2)) + { + fprintf(stderr, "** Coding error: " + "options and control offsets for pattern and data must be the same.\n"); + return 1; + } + +/* Get the PCRE2 and Unicode version number and JIT target information, at the +same time checking that a request for the length gives the same answer. Also +check lengths for non-string items. */ + +if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != + PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) || + + PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) != + PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) || + + PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) != + PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) || + + PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) || + PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t)) + { + fprintf(stderr, "** Error in pcre2_config(): bad length\n"); + return 1; + } + +/* Check that bad options are diagnosed. */ + +if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION || + PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION) + { + fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n"); + return 1; + } + +/* This configuration option is now obsolete, but running a quick check ensures +that its code is covered. */ + +(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp); + +/* Get buffers from malloc() so that valgrind will check their misuse when +debugging. They grow automatically when very long lines are read. The 16- +and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */ + +buffer = (uint8_t *)malloc(pbuffer8_size); +pbuffer8 = (uint8_t *)malloc(pbuffer8_size); + +/* The following _setmode() stuff is some Windows magic that tells its runtime +library to translate CRLF into a single LF character. At least, that's what +I've been told: never having used Windows I take this all on trust. Originally +it set 0x8000, but then I was advised that _O_BINARY was better. */ + +#if defined(_WIN32) || defined(WIN32) +_setmode( _fileno( stdout ), _O_BINARY ); +#endif + +/* Initialization that does not depend on the running mode. */ + +locale_name[0] = 0; + +memset(&def_patctl, 0, sizeof(patctl)); +def_patctl.convert_type = CONVERT_UNSET; + +memset(&def_datctl, 0, sizeof(datctl)); +def_datctl.oveccount = DEFAULT_OVECCOUNT; +def_datctl.copy_numbers[0] = -1; +def_datctl.get_numbers[0] = -1; +def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET; +def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; +def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; + +/* Scan command line options. */ + +while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) + { + char *endptr; + char *arg = argv[op]; + unsigned long uli; + + /* List modifiers and exit. */ + + if (strcmp(arg, "-LM") == 0) + { + display_modifiers(); + goto EXIT; + } + + /* List properties and exit */ + + if (strcmp(arg, "-LP") == 0) + { + display_properties(FALSE); + goto EXIT; + } + + /* List scripts and exit */ + + if (strcmp(arg, "-LS") == 0) + { + display_properties(TRUE); + goto EXIT; + } + + /* Display and/or set return code for configuration options. */ + + if (strcmp(arg, "-C") == 0) + { + yield = c_option(argv[op + 1]); + goto EXIT; + } + + /* Select operating mode. Ensure that pcre2_config() is called in 16-bit + and 32-bit modes because that won't happen naturally when 8-bit is also + configured. Also call some other functions that are not otherwise used. This + means that a coverage report won't claim there are uncalled functions. */ + + if (strcmp(arg, "-8") == 0) + { +#ifdef SUPPORT_PCRE2_8 + test_mode = PCRE8_MODE; + (void)pcre2_set_bsr_8(pat_context8, 999); + (void)pcre2_set_newline_8(pat_context8, 999); +#else + fprintf(stderr, + "** This version of PCRE2 was built without 8-bit support\n"); + exit(1); +#endif + } + + else if (strcmp(arg, "-16") == 0) + { +#ifdef SUPPORT_PCRE2_16 + test_mode = PCRE16_MODE; + (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL); + (void)pcre2_set_bsr_16(pat_context16, 999); + (void)pcre2_set_newline_16(pat_context16, 999); +#else + fprintf(stderr, + "** This version of PCRE2 was built without 16-bit support\n"); + exit(1); +#endif + } + + else if (strcmp(arg, "-32") == 0) + { +#ifdef SUPPORT_PCRE2_32 + test_mode = PCRE32_MODE; + (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL); + (void)pcre2_set_bsr_32(pat_context32, 999); + (void)pcre2_set_newline_32(pat_context32, 999); +#else + fprintf(stderr, + "** This version of PCRE2 was built without 32-bit support\n"); + exit(1); +#endif + } + + /* Set quiet (no version verification) */ + + else if (strcmp(arg, "-q") == 0) quiet = TRUE; + + /* Set system stack size */ + + else if (strcmp(arg, "-S") == 0 && argc > 2 && + ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) + { +#if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS) + fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); + exit(1); +#else + int rc; + uint32_t stack_size; + struct rlimit rlim; + if (U32OVERFLOW(uli)) + { + fprintf(stderr, "** Argument for -S is too big\n"); + exit(1); + } + stack_size = (uint32_t)uli; + getrlimit(RLIMIT_STACK, &rlim); + rlim.rlim_cur = stack_size * 1024 * 1024; + if (rlim.rlim_cur > rlim.rlim_max) + { + fprintf(stderr, + "pcre2test: requested stack size %luMiB is greater than hard limit ", + (unsigned long int)stack_size); + if (rlim.rlim_max % (1024*1024) == 0) fprintf(stderr, "%luMiB\n", + (unsigned long int)(rlim.rlim_max/(1024 * 1024))); + else if (rlim.rlim_max % 1024 == 0) fprintf(stderr, "%luKiB\n", + (unsigned long int)(rlim.rlim_max/1024)); + else fprintf(stderr, "%lu bytes\n", (unsigned long int)(rlim.rlim_max)); + exit(1); + } + rc = setrlimit(RLIMIT_STACK, &rlim); + if (rc != 0) + { + fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n", + (unsigned long int)stack_size, strerror(errno)); + exit(1); + } + op++; + argc--; +#endif + } + + /* Set some common pattern and subject controls */ + + else if (strcmp(arg, "-AC") == 0) + { + def_patctl.options |= PCRE2_AUTO_CALLOUT; + def_datctl.control2 |= CTL2_CALLOUT_EXTRA; + } + else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT; + else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; + else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; + else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; + else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; + else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 || + strcmp(arg, "-jitfast") == 0) + { + if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY; + else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST; + def_patctl.jit = JIT_DEFAULT; /* full & partial */ +#ifndef SUPPORT_JIT + fprintf(stderr, "** Warning: JIT support is not available: " + "-jit[fast|verify] calls functions that do nothing.\n"); +#endif + } + + /* Set timing parameters */ + + else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || + strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) + { + int both = arg[2] == 0; + showtotaltimes = arg[1] == 'T'; + if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0)) + { + if (uli == 0) + { + fprintf(stderr, "** Argument for %s must not be zero\n", arg); + exit(1); + } + if (U32OVERFLOW(uli)) + { + fprintf(stderr, "** Argument for %s is too big\n", arg); + exit(1); + } + timeitm = (int)uli; + op++; + argc--; + } + else timeitm = LOOPREPEAT; + if (both) timeit = timeitm; + } + + /* Give help */ + + else if (strcmp(arg, "-help") == 0 || + strcmp(arg, "--help") == 0) + { + usage(); + goto EXIT; + } + + /* Show version */ + + else if (memcmp(arg, "-v", 2) == 0 || + strcmp(arg, "--version") == 0) + { + print_version(stdout, FALSE); + goto EXIT; + } + + /* The following options save their data for processing once we know what + the running mode is. */ + + else if (strcmp(arg, "-error") == 0) + { + arg_error = argv[op+1]; + goto CHECK_VALUE_EXISTS; + } + + else if (strcmp(arg, "-subject") == 0) + { + arg_subject = argv[op+1]; + goto CHECK_VALUE_EXISTS; + } + + else if (strcmp(arg, "-pattern") == 0) + { + arg_pattern = argv[op+1]; + CHECK_VALUE_EXISTS: + if (argc <= 2) + { + fprintf(stderr, "** Missing value for %s\n", arg); + yield = 1; + goto EXIT; + } + op++; + argc--; + } + + /* Unrecognized option */ + + else + { + fprintf(stderr, "** Unknown or malformed option '%s'\n", arg); + usage(); + yield = 1; + goto EXIT; + } + op++; + argc--; + } + +/* If -error was present, get the error numbers, show the messages, and exit. +We wait to do this until we know which mode we are in. */ + +if (arg_error != NULL) + { + int len; + int errcode; + char *endptr; + +/* Ensure the relevant non-8-bit buffer is available. Ensure that it is at +least 128 code units, because it is used for retrieving error messages. */ + +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + pbuffer16_size = 256; + pbuffer16 = (uint16_t *)malloc(pbuffer16_size); + if (pbuffer16 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", + pbuffer16_size); + yield = 1; + goto EXIT; + } + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) + { + pbuffer32_size = 512; + pbuffer32 = (uint32_t *)malloc(pbuffer32_size); + if (pbuffer32 == NULL) + { + fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", + pbuffer32_size); + yield = 1; + goto EXIT; + } + } +#endif + + /* Loop along a list of error numbers. */ + + for (;;) + { + errcode = strtol(arg_error, &endptr, 10); + if (*endptr != 0 && *endptr != CHAR_COMMA) + { + fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error); + yield = 1; + goto EXIT; + } + printf("Error %d: ", errcode); + PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer); + if (len < 0) + { + switch (len) + { + case PCRE2_ERROR_BADDATA: + printf("PCRE2_ERROR_BADDATA (unknown error number)"); + break; + + case PCRE2_ERROR_NOMEMORY: + printf("PCRE2_ERROR_NOMEMORY (buffer too small)"); + break; + + default: + printf("Unexpected return (%d) from pcre2_get_error_message()", len); + break; + } + } + else + { + PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout); + } + printf("\n"); + if (*endptr == 0) goto EXIT; + arg_error = endptr + 1; + } + /* Control never reaches here */ + } /* End of -error handling */ + +/* Initialize things that cannot be done until we know which test mode we are +running in. Exercise the general context copying and match data size functions, +which are not otherwise used. */ + +code_unit_size = test_mode/8; +max_oveccount = DEFAULT_OVECCOUNT; + +/* Use macros to save a lot of duplication. */ + +#define CREATECONTEXTS \ + G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \ + G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \ + G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \ + G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \ + G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \ + G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \ + G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \ + G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \ + G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS)) + +#define CONTEXTTESTS \ + (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \ + (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \ + (void)G(pcre2_set_max_pattern_compiled_length_,BITS)(G(pat_context,BITS), 0); \ + (void)G(pcre2_set_max_varlookbehind_,BITS)(G(pat_context,BITS), 0); \ + (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \ + (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS)) + +/* Call the appropriate functions for the current mode, and exercise some +functions that are not otherwise called. */ + +#ifdef SUPPORT_PCRE2_8 +#undef BITS +#define BITS 8 +if (test_mode == PCRE8_MODE) + { + CREATECONTEXTS; + CONTEXTTESTS; + } +#endif + +#ifdef SUPPORT_PCRE2_16 +#undef BITS +#define BITS 16 +if (test_mode == PCRE16_MODE) + { + CREATECONTEXTS; + CONTEXTTESTS; + } +#endif + +#ifdef SUPPORT_PCRE2_32 +#undef BITS +#define BITS 32 +if (test_mode == PCRE32_MODE) + { + CREATECONTEXTS; + CONTEXTTESTS; + } +#endif + +/* Set a default parentheses nest limit that is large enough to run the +standard tests (this also exercises the function). */ + +PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT); + +/* Handle command line modifier settings, sending any error messages to +stderr. We need to know the mode before modifying the context, and it is tidier +to do them all in the same way. */ + +outfile = stderr; +if ((arg_pattern != NULL && + !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) || + (arg_subject != NULL && + !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl))) + { + yield = 1; + goto EXIT; + } + +/* Sort out the input and output files, defaulting to stdin/stdout. */ + +infile = stdin; +outfile = stdout; + +if (argc > 1 && strcmp(argv[op], "-") != 0) + { + infile = fopen(argv[op], INPUT_MODE); + if (infile == NULL) + { + printf("** Failed to open '%s': %s\n", argv[op], strerror(errno)); + yield = 1; + goto EXIT; + } + } + +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +if (INTERACTIVE(infile)) using_history(); +#endif + +if (argc > 2) + { + outfile = fopen(argv[op+1], OUTPUT_MODE); + if (outfile == NULL) + { + printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno)); + yield = 1; + goto EXIT; + } + } + +/* Output a heading line unless quiet, then process input lines. */ + +if (!quiet) print_version(outfile, TRUE); + +SET(compiled_code, NULL); + +#ifdef SUPPORT_PCRE2_8 +preg.re_pcre2_code = NULL; +preg.re_match_data = NULL; +#endif + +while (notdone) + { + uint8_t *p; + int rc = PR_OK; + BOOL expectdata = TEST(compiled_code, !=, NULL); +#ifdef SUPPORT_PCRE2_8 + expectdata |= preg.re_pcre2_code != NULL; +#endif + + if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) + break; + if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer); + fflush(outfile); + p = buffer; + + /* If we have a pattern set up for testing, or we are skipping after a + compile failure, a blank line terminates this test. */ + + if (expectdata || skipping) + { + while (isspace(*p)) p++; + if (*p == 0) + { +#ifdef SUPPORT_PCRE2_8 + if (preg.re_pcre2_code != NULL) + { + regfree(&preg); + preg.re_pcre2_code = NULL; + preg.re_match_data = NULL; + } +#endif /* SUPPORT_PCRE2_8 */ + if (TEST(compiled_code, !=, NULL)) + { + SUB1(pcre2_code_free, compiled_code); + SET(compiled_code, NULL); + } + skipping = FALSE; + setlocale(LC_CTYPE, "C"); + } + + /* Otherwise, if we are not skipping, and the line is not a data comment + line starting with "\=", process a data line. */ + + else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) + { + rc = process_data(); + } + } + + /* We do not have a pattern set up for testing. Lines starting with # are + either comments or special commands. Blank lines are ignored. Otherwise, the + line must start with a valid delimiter. It is then processed as a pattern + line. A copy of the pattern is left in pbuffer8 for use by callouts. Under + valgrind, make the unused part of the buffer undefined, to catch overruns. */ + + else if (*p == '#') + { + if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue; + rc = process_command(); + } + + else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL) + { + rc = process_pattern(); + dfa_matched = 0; + } + + else + { + while (isspace(*p)) p++; + if (*p != 0) + { + fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, + *buffer); + rc = PR_SKIP; + } + } + + if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE; + else if (rc == PR_ABEND) + { + fprintf(outfile, "** pcre2test run abandoned\n"); + yield = 1; + goto EXIT; + } + } + +/* Finish off a normal run. */ + +if (INTERACTIVE(infile)) fprintf(outfile, "\n"); + +if (showtotaltimes) + { + const char *pad = ""; + fprintf(outfile, "--------------------------------------\n"); + if (timeit > 0) + { + fprintf(outfile, "Total compile time %8.2f microseconds\n", + ((1000000 / CLOCKS_PER_SEC) * (double)total_compile_time) / timeit); + if (total_jit_compile_time > 0) + fprintf(outfile, "Total JIT compile %8.2f microseconds\n", + ((1000000 / CLOCKS_PER_SEC) * (double)total_jit_compile_time) / \ + timeit); + pad = " "; + } + fprintf(outfile, "Total match time %s%8.2f microseconds\n", pad, + ((1000000 / CLOCKS_PER_SEC) * (double)total_match_time) / timeitm); + } + + +EXIT: + +#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) +if (infile != NULL && INTERACTIVE(infile)) clear_history(); +#endif + +if (infile != NULL && infile != stdin) fclose(infile); +if (outfile != NULL && outfile != stdout) fclose(outfile); + +free(buffer); +free(dbuffer); +free(pbuffer8); +free(dfa_workspace); +free(tables3); +PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables); +PCRE2_MATCH_DATA_FREE(match_data); +SUB1(pcre2_code_free, compiled_code); + +while(patstacknext-- > 0) + { + SET(compiled_code, patstack[patstacknext]); + SUB1(pcre2_code_free, compiled_code); + } + +PCRE2_JIT_FREE_UNUSED_MEMORY(general_context); +if (jit_stack != NULL) + { + PCRE2_JIT_STACK_FREE(jit_stack); + } + +#define FREECONTEXTS \ + G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \ + G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \ + G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \ + G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \ + G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \ + G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \ + G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \ + G(pcre2_convert_context_free_,BITS)(G(con_context,BITS)); + +#ifdef SUPPORT_PCRE2_8 +#undef BITS +#define BITS 8 +if (preg.re_pcre2_code != NULL) regfree(&preg); +FREECONTEXTS; +#endif + +#ifdef SUPPORT_PCRE2_16 +#undef BITS +#define BITS 16 +free(pbuffer16); +FREECONTEXTS; +#endif + +#ifdef SUPPORT_PCRE2_32 +#undef BITS +#define BITS 32 +free(pbuffer32); +FREECONTEXTS; +#endif + +#if defined(__VMS) + yield = SS$_NORMAL; /* Return values via DCL symbols */ +#endif + +return yield; +} + +/* End of pcre2test.c */ diff --git a/src/sljit/allocator_src/sljitExecAllocatorApple.c b/src/sljit/allocator_src/sljitExecAllocatorApple.c new file mode 100644 index 0000000..9bd2094 --- /dev/null +++ b/src/sljit/allocator_src/sljitExecAllocatorApple.c @@ -0,0 +1,137 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +/* + On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a + version where it's OK to have more than one JIT block or where MAP_JIT is + required. + On non-macOS systems, returns MAP_JIT if it is defined. +*/ +#include + +#if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (TARGET_OS_MAC && !TARGET_OS_IPHONE) + +#if defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86 + +#include +#include + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +#ifdef MAP_JIT +#define SLJIT_MAP_JIT (get_map_jit_flag()) +static SLJIT_INLINE int get_map_jit_flag(void) +{ + size_t page_size; + void *ptr; + struct utsname name; + static int map_jit_flag = -1; + + if (map_jit_flag < 0) { + map_jit_flag = 0; + uname(&name); + + /* Kernel version for 10.14.0 (Mojave) or later */ + if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; + /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr != MAP_FAILED) + munmap(ptr, page_size); + else + map_jit_flag = MAP_JIT; + } + } + return map_jit_flag; +} +#else /* !defined(MAP_JIT) */ +#define SLJIT_MAP_JIT (0) +#endif + +#elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM + +#include +#include + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ +#if MAC_OS_X_VERSION_MIN_REQUIRED < 110000 + if (__builtin_available(macos 11, *)) +#endif /* BigSur */ + pthread_jit_write_protect_np(enable_exec); +} + +#elif defined(SLJIT_CONFIG_PPC) && SLJIT_CONFIG_PPC + +#define SLJIT_MAP_JIT (0) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +#else +#error "Unsupported architecture" +#endif /* SLJIT_CONFIG */ + +#else /* !TARGET_OS_OSX */ + +#ifdef MAP_JIT +#define SLJIT_MAP_JIT (MAP_JIT) +#else +#define SLJIT_MAP_JIT (0) +#endif + +#endif /* TARGET_OS_OSX */ + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void *retval; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; + + flags |= MAP_ANON | SLJIT_MAP_JIT; + + retval = mmap(NULL, size, prot, flags, fd, 0); + if (retval == MAP_FAILED) + return NULL; + + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitExecAllocatorCore.c b/src/sljit/allocator_src/sljitExecAllocatorCore.c new file mode 100644 index 0000000..4e1119b --- /dev/null +++ b/src/sljit/allocator_src/sljitExecAllocatorCore.c @@ -0,0 +1,327 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* Expected functions: + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK : + * provided as part of sljitUtils + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables + + Supported defines: + SLJIT_HAS_CHUNK_HEADER - (optional) sljit_chunk_header is defined + SLJIT_HAS_EXECUTABLE_OFFSET - (optional) has executable offset data + SLJIT_UPDATE_WX_FLAGS - (optional) update WX flags +*/ + +#ifdef SLJIT_HAS_CHUNK_HEADER +#define CHUNK_HEADER_SIZE (sizeof(struct sljit_chunk_header)) +#else /* !SLJIT_HAS_CHUNK_HEADER */ +#define CHUNK_HEADER_SIZE 0 +#endif /* SLJIT_HAS_CHUNK_HEADER */ + +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* SLJIT_UPDATE_WX_FLAGS */ + +#ifndef CHUNK_SIZE +/* 64 KByte if not specified. */ +#define CHUNK_SIZE (sljit_uw)0x10000 +#endif /* CHUNK_SIZE */ + +struct block_header { + sljit_uw size; + sljit_uw prev_size; +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + sljit_sw executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_u8*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)((base) + 1)) +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) +#define CHUNK_EXTRA_SIZE (sizeof(struct block_header) + CHUNK_HEADER_SIZE) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = NULL; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + +#ifdef SLJIT_HAS_CHUNK_HEADER + struct sljit_chunk_header *chunk_header; +#else /* !SLJIT_HAS_CHUNK_HEADER */ + void *chunk_header; +#endif /* SLJIT_HAS_CHUNK_HEADER */ + +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + sljit_sw executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); + size = ALIGN_SIZE(size); + + SLJIT_ALLOCATOR_LOCK(); + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + header->executable_offset = free_block->header.executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + AS_BLOCK_HEADER(header, size)->prev_size = size; + } else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = (size + CHUNK_EXTRA_SIZE + CHUNK_SIZE - 1) & CHUNK_MASK; + + chunk_header = alloc_chunk(chunk_size); + if (!chunk_header) { + SLJIT_ALLOCATOR_UNLOCK(); + return NULL; + } + +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header); +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + + chunk_size -= CHUNK_EXTRA_SIZE; + total_size += chunk_size; + + header = (struct block_header*)(((sljit_u8*)chunk_header) + CHUNK_HEADER_SIZE); + + header->prev_size = 0; +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + header->executable_offset = executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + free_block->header.executable_offset = executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + next_header->executable_offset = executable_offset; +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void *ptr) +{ + struct block_header *header; + struct free_block *free_block; + + SLJIT_ALLOCATOR_LOCK(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET + header = AS_BLOCK_HEADER(header, -header->executable_offset); +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ + allocated_size -= header->size; + + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + /* Connecting free blocks together if possible. */ + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + CHUNK_EXTRA_SIZE); + } + } + + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + CHUNK_EXTRA_SIZE); + } + free_block = next_free_block; + } + + SLJIT_ASSERT(total_size || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} + +#ifdef SLJIT_HAS_EXECUTABLE_OFFSET +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code) +{ + return ((struct block_header*)SLJIT_CODE_TO_PTR(code))[-1].executable_offset; +} +#endif /* SLJIT_HAS_EXECUTABLE_OFFSET */ diff --git a/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c b/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c new file mode 100644 index 0000000..3b93a4d --- /dev/null +++ b/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c @@ -0,0 +1,89 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#ifdef PROC_WXMAP_CTL +static SLJIT_INLINE int sljit_is_wx_block(void) +{ + static int wx_block = -1; + if (wx_block < 0) { + int sljit_wx_enable = PROC_WX_MAPPINGS_PERMIT; + wx_block = !!procctl(P_PID, 0, PROC_WXMAP_CTL, &sljit_wx_enable); + } + return wx_block; +} + +#define SLJIT_IS_WX_BLOCK sljit_is_wx_block() +#else /* !PROC_WXMAP_CTL */ +#define SLJIT_IS_WX_BLOCK (1) +#endif /* PROC_WXMAP_CTL */ + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void *retval; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; + +#ifdef PROT_MAX + prot |= PROT_MAX(prot); +#endif + +#ifdef MAP_ANON + flags |= MAP_ANON; +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) + return NULL; + + fd = dev_zero; +#endif /* MAP_ANON */ + +retry: + retval = mmap(NULL, size, prot, flags, fd, 0); + if (retval == MAP_FAILED) { + if (!SLJIT_IS_WX_BLOCK) + goto retry; + + return NULL; + } + + /* HardenedBSD's mmap lies, so check permissions again. */ + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; + } + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitExecAllocatorPosix.c b/src/sljit/allocator_src/sljitExecAllocatorPosix.c new file mode 100644 index 0000000..a775f56 --- /dev/null +++ b/src/sljit/allocator_src/sljitExecAllocatorPosix.c @@ -0,0 +1,62 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void *retval; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; + +#ifdef PROT_MAX + prot |= PROT_MAX(prot); +#endif + +#ifdef MAP_ANON + flags |= MAP_ANON; +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) + return NULL; + + fd = dev_zero; +#endif /* MAP_ANON */ + + retval = mmap(NULL, size, prot, flags, fd, 0); + if (retval == MAP_FAILED) + return NULL; + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitExecAllocatorWindows.c b/src/sljit/allocator_src/sljitExecAllocatorWindows.c new file mode 100644 index 0000000..f152a5a --- /dev/null +++ b/src/sljit/allocator_src/sljitExecAllocatorWindows.c @@ -0,0 +1,40 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + SLJIT_UNUSED_ARG(size); + VirtualFree(chunk, 0, MEM_RELEASE); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c b/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c new file mode 100644 index 0000000..0b7fd57 --- /dev/null +++ b/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c @@ -0,0 +1,72 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define SLJIT_HAS_CHUNK_HEADER +#define SLJIT_HAS_EXECUTABLE_OFFSET + +struct sljit_chunk_header { + void *executable; +}; + +/* + * MAP_REMAPDUP is a NetBSD extension available sinde 8.0, make sure to + * adjust your feature macros (ex: -D_NETBSD_SOURCE) as needed + */ +static SLJIT_INLINE struct sljit_chunk_header* alloc_chunk(sljit_uw size) +{ + struct sljit_chunk_header *retval; + + retval = (struct sljit_chunk_header *)mmap(NULL, size, + PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), + MAP_ANON | MAP_SHARED, -1, 0); + + if (retval == MAP_FAILED) + return NULL; + + retval->executable = mremap(retval, size, NULL, size, MAP_REMAPDUP); + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + return NULL; + } + + if (mprotect(retval->executable, size, PROT_READ | PROT_EXEC) == -1) { + munmap(retval->executable, size); + munmap((void *)retval, size); + return NULL; + } + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + struct sljit_chunk_header *header = ((struct sljit_chunk_header *)chunk) - 1; + + munmap(header->executable, size); + munmap((void *)header, size); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c b/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c new file mode 100644 index 0000000..f7cb6c5 --- /dev/null +++ b/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c @@ -0,0 +1,172 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define SLJIT_HAS_CHUNK_HEADER +#define SLJIT_HAS_EXECUTABLE_OFFSET + +struct sljit_chunk_header { + void *executable; +}; + +#include +#include +#include +#include + +#ifndef O_NOATIME +#define O_NOATIME 0 +#endif + +/* this is a linux extension available since kernel 3.11 */ +#ifndef O_TMPFILE +#define O_TMPFILE 0x404000 +#endif + +#ifndef _GNU_SOURCE +char *secure_getenv(const char *name); +int mkostemp(char *template, int flags); +#endif + +static SLJIT_INLINE int create_tempfile(void) +{ + int fd; + char tmp_name[256]; + size_t tmp_name_len = 0; + char *dir; + struct stat st; +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode_t mode; +#endif + +#ifdef HAVE_MEMFD_CREATE + /* this is a GNU extension, make sure to use -D_GNU_SOURCE */ + fd = memfd_create("sljit", MFD_CLOEXEC); + if (fd != -1) { + fchmod(fd, 0); + return fd; + } +#endif + + dir = secure_getenv("TMPDIR"); + + if (dir) { + size_t len = strlen(dir); + if (len > 0 && len < sizeof(tmp_name)) { + if ((stat(dir, &st) == 0) && S_ISDIR(st.st_mode)) { + memcpy(tmp_name, dir, len + 1); + tmp_name_len = len; + } + } + } + +#ifdef P_tmpdir + if (!tmp_name_len) { + tmp_name_len = strlen(P_tmpdir); + if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)) + strcpy(tmp_name, P_tmpdir); + } +#endif + if (!tmp_name_len) { + strcpy(tmp_name, "/tmp"); + tmp_name_len = 4; + } + + SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)); + + if (tmp_name_len > 1 && tmp_name[tmp_name_len - 1] == '/') + tmp_name[--tmp_name_len] = '\0'; + + fd = open(tmp_name, O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, 0); + if (fd != -1) + return fd; + + if (tmp_name_len >= sizeof(tmp_name) - 7) + return -1; + + strcpy(tmp_name + tmp_name_len, "/XXXXXX"); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode = umask(0777); +#endif + fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + umask(mode); +#else + fchmod(fd, 0); +#endif + + if (fd == -1) + return -1; + + if (unlink(tmp_name)) { + close(fd); + return -1; + } + + return fd; +} + +static SLJIT_INLINE struct sljit_chunk_header* alloc_chunk(sljit_uw size) +{ + struct sljit_chunk_header *retval; + int fd; + + fd = create_tempfile(); + if (fd == -1) + return NULL; + + if (ftruncate(fd, (off_t)size)) { + close(fd); + return NULL; + } + + retval = (struct sljit_chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (retval == MAP_FAILED) { + close(fd); + return NULL; + } + + retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + close(fd); + return NULL; + } + + close(fd); + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + struct sljit_chunk_header *header = ((struct sljit_chunk_header *)chunk) - 1; + + munmap(header->executable, size); + munmap((void *)header, size); +} + +#include "sljitExecAllocatorCore.c" diff --git a/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c b/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c new file mode 100644 index 0000000..36d3014 --- /dev/null +++ b/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c @@ -0,0 +1,141 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple W^X executable memory allocator + + In *NIX, MAP_ANON is required (that is considered a feature) so make + sure to set the right availability macros for your system or the code + will fail to build. + + If your system doesn't support mapping of anonymous pages (ex: IRIX) it + is also likely that it doesn't need this allocator and should be using + the standard one instead. + + It allocates a separate map for each code block and may waste a lot of + memory, because whatever was requested, will be rounded up to the page + size (minimum 4KB, but could be even bigger). + + It changes the page permissions (RW <-> RX) as needed and therefore, if you + will be updating the code after it has been generated, need to make sure to + block any concurrent execution, or could result in a SIGBUS, that could + even manifest itself at a different address than the one that was being + modified. + + Only use if you are unable to use the regular allocator because of security + restrictions and adding exceptions to your application or the system are + not possible. +*/ + +#include +#include + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + sljit_update_wx_flags((from), (to), (enable_exec)) + +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#include +#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock) +#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock) +#else +#define SLJIT_SE_LOCK() +#define SLJIT_SE_UNLOCK() +#endif /* !SLJIT_SINGLE_THREADED */ + +#define SLJIT_WX_IS_BLOCK(ptr, size) generic_check_is_wx_block(ptr, size) + +static SLJIT_INLINE int generic_check_is_wx_block(void *ptr, sljit_uw size) +{ + if (SLJIT_LIKELY(!mprotect(ptr, size, PROT_EXEC))) + return !!mprotect(ptr, size, PROT_READ | PROT_WRITE); + + return 1; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; +#endif + static int wx_block = -1; + int prot = PROT_READ | PROT_WRITE; + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(wx_block > 0)) + return NULL; + +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr == MAP_FAILED) + return NULL; + + if (SLJIT_UNLIKELY(wx_block < 0)) { + SLJIT_SE_LOCK(); + wx_block = SLJIT_WX_IS_BLOCK(ptr, size); + SLJIT_SE_UNLOCK(); + if (SLJIT_UNLIKELY(wx_block)) { + munmap((void *)ptr, size); + return NULL; + } + } + + *ptr++ = size; + return ptr; +} + +#undef SLJIT_SE_UNLOCK +#undef SLJIT_SE_LOCK + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw *start_ptr = ((sljit_uw*)ptr) - 1; + munmap((void*)start_ptr, *start_ptr); +} + +static void sljit_update_wx_flags(void *from, void *to, int enable_exec) +{ + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + int prot = PROT_READ | (enable_exec ? PROT_EXEC : PROT_WRITE); + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + mprotect((void*)start, end - start, prot); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + /* This allocator does not keep unused memory for future allocations. */ +} diff --git a/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c b/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c new file mode 100644 index 0000000..a9553bd --- /dev/null +++ b/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c @@ -0,0 +1,102 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple W^X executable memory allocator + + In *NIX, MAP_ANON is required (that is considered a feature) so make + sure to set the right availability macros for your system or the code + will fail to build. + + If your system doesn't support mapping of anonymous pages (ex: IRIX) it + is also likely that it doesn't need this allocator and should be using + the standard one instead. + + It allocates a separate map for each code block and may waste a lot of + memory, because whatever was requested, will be rounded up to the page + size (minimum 4KB, but could be even bigger). + + It changes the page permissions (RW <-> RX) as needed and therefore, if you + will be updating the code after it has been generated, need to make sure to + block any concurrent execution, or could result in a SIGBUS, that could + even manifest itself at a different address than the one that was being + modified. + + Only use if you are unable to use the regular allocator because of security + restrictions and adding exceptions to your application or the system are + not possible. +*/ + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + sljit_update_wx_flags((from), (to), (enable_exec)) + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + sljit_uw *ptr; + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + + if (!ptr) + return NULL; + + *ptr++ = size; + + return ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw start = (sljit_uw)ptr - sizeof(sljit_uw); +#if defined(SLJIT_DEBUG) && SLJIT_DEBUG + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + + SLJIT_ASSERT(!(start & page_mask)); +#endif + VirtualFree((void*)start, 0, MEM_RELEASE); +} + +static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec) +{ + DWORD oldprot; + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + DWORD prot = enable_exec ? PAGE_EXECUTE : PAGE_READWRITE; + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + VirtualProtect((void*)start, end - start, prot, &oldprot); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + /* This allocator does not keep unused memory for future allocations. */ +} diff --git a/src/sljit/sljitConfig.h b/src/sljit/sljitConfig.h new file mode 100644 index 0000000..364c8bb --- /dev/null +++ b/src/sljit/sljitConfig.h @@ -0,0 +1,142 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_H_ +#define SLJIT_CONFIG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + This file contains the basic configuration options for the SLJIT compiler + and their default values. These options can be overridden in the + sljitConfigPre.h header file when SLJIT_HAVE_CONFIG_PRE is set to a + non-zero value. +*/ + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +/* Implements a stack like data structure (by using mmap / VirtualAlloc */ +/* or a custom allocator). */ +#ifndef SLJIT_UTIL_STACK +/* Enabled by default */ +#define SLJIT_UTIL_STACK 1 +#endif + +/* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */ +#ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION +/* Disabled by default */ +#define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0 +#endif + +/* Single threaded application. Does not require any locks. */ +#ifndef SLJIT_SINGLE_THREADED +/* Disabled by default. */ +#define SLJIT_SINGLE_THREADED 0 +#endif + +/* --------------------------------------------------------------------- */ +/* Configuration */ +/* --------------------------------------------------------------------- */ + +/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ +#ifndef SLJIT_STD_MACROS_DEFINED +/* Disabled by default. */ +#define SLJIT_STD_MACROS_DEFINED 0 +#endif + +/* Executable code allocation: + If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should + define SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. + Optionally, depending on the implementation used for the allocator, + SLJIT_EXEC_OFFSET and SLJIT_UPDATE_WX_FLAGS might also be needed. */ +#ifndef SLJIT_EXECUTABLE_ALLOCATOR +/* Enabled by default. */ +#define SLJIT_EXECUTABLE_ALLOCATOR 1 + +/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses + an allocator which does not set writable and executable + permission flags at the same time. + Instead, it creates a shared memory segment (usually backed by a file) + and maps it twice, with different permissions, depending on the use + case. + The trade-off is increased use of virtual memory, incompatibility with + fork(), and some possible additional security risks by the use of + publicly accessible files for the generated code. */ +#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 +#endif + +/* When SLJIT_WX_EXECUTABLE_ALLOCATOR is enabled SLJIT uses an + allocator which does not set writable and executable permission + flags at the same time. + Instead, it creates a new independent map on each invocation and + switches permissions at the underlying pages as needed. + The trade-off is increased memory use and degraded performance. */ +#ifndef SLJIT_WX_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_WX_EXECUTABLE_ALLOCATOR 0 +#endif + +#endif /* !SLJIT_EXECUTABLE_ALLOCATOR */ + +/* Return with error when an invalid argument is passed. */ +#ifndef SLJIT_ARGUMENT_CHECKS +/* Disabled by default */ +#define SLJIT_ARGUMENT_CHECKS 0 +#endif + +/* Debug checks (assertions, etc.). */ +#ifndef SLJIT_DEBUG +/* Enabled by default */ +#define SLJIT_DEBUG 1 +#endif + +/* Verbose operations. */ +#ifndef SLJIT_VERBOSE +/* Enabled by default */ +#define SLJIT_VERBOSE 1 +#endif + +/* + SLJIT_IS_FPU_AVAILABLE + The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. + zero value - FPU is NOT present. + nonzero value - FPU is present. +*/ + +/* For further configurations, see the beginning of sljitConfigInternal.h */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_H_ */ diff --git a/src/sljit/sljitConfigCPU.h b/src/sljit/sljitConfigCPU.h new file mode 100644 index 0000000..2720bda --- /dev/null +++ b/src/sljit/sljitConfigCPU.h @@ -0,0 +1,188 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_CPU_H_ +#define SLJIT_CONFIG_CPU_H_ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection. */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V6 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_ARM_64 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_MIPS_64 1 */ +/* #define SLJIT_CONFIG_RISCV_32 1 */ +/* #define SLJIT_CONFIG_RISCV_64 1 */ +/* #define SLJIT_CONFIG_S390X 1 */ +/* #define SLJIT_CONFIG_LOONGARCH_64 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/*****************/ +/* Sanity check. */ +/*****************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + + (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) \ + && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) \ + && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ + && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO +#error "An architecture must be selected" +#else /* SLJIT_CONFIG_AUTO */ +#define SLJIT_CONFIG_AUTO 1 +#endif /* !SLJIT_CONFIG_AUTO */ +#endif /* !SLJIT_CONFIG */ + +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#elif defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \ + ((defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7S__)) \ + || (defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8R__)) \ + || (defined(__ARM_ARCH_9A__))) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(__arm__) || defined (__ARM__) +#define SLJIT_CONFIG_ARM_V6 1 +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) && !defined(_LP64) +#define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__mips64) +#define SLJIT_CONFIG_MIPS_64 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 32) +#define SLJIT_CONFIG_RISCV_32 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 64) +#define SLJIT_CONFIG_RISCV_64 1 +#elif defined (__loongarch_lp64) +#define SLJIT_CONFIG_LOONGARCH_64 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* _WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(_M_ARM) && _M_ARM >= 7) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V6 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !_WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif /* SLJIT_CONFIG_UNSUPPORTED */ + +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define SLJIT_CONFIG_RISCV 1 +#elif (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) +#define SLJIT_CONFIG_LOONGARCH 1 +#endif + +#endif /* SLJIT_CONFIG_CPU_H_ */ diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h new file mode 100644 index 0000000..de06dd8 --- /dev/null +++ b/src/sljit/sljitConfigInternal.h @@ -0,0 +1,907 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_INTERNAL_H_ +#define SLJIT_CONFIG_INTERNAL_H_ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE))) +#include +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG \ + && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS))) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_s8, sljit_u8 : signed and unsigned 8 bit integer type + sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type + sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_sp, sljit_up : signed and unsigned pointer value (usually the same as + sljit_uw, but some 64 bit ABIs may use 32 bit pointers) + sljit_f32 : 32 bit single precision floating point value + sljit_f64 : 64 bit double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported + SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported + SLJIT_MASKED_SHIFT : all word shifts are always masked + SLJIT_MASKED_SHIFT32 : all 32 bit shifts are always masked + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_F32_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_F64_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_PREF_SHIFT_REG : x86 systems prefers ecx for shifting by register + the scratch register index of ecx is stored in this variable + SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + SLJIT_CONV_MAX_FLOAT : result when a floating point value is converted to integer + and the floating point value is higher than the maximum integer value + (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT) + SLJIT_CONV_MIN_FLOAT : result when a floating point value is converted to integer + and the floating point value is lower than the minimum integer value + (possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT) + SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer + (possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT, + or SLJIT_CONV_RESULT_ZERO) + + Other macros: + SLJIT_TMP_R0 .. R9 : accessing temporary registers + SLJIT_TMP_R(i) : accessing temporary registers + SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers + SLJIT_TMP_FR(i) : accessing temporary floating point registers + SLJIT_TMP_DEST_REG : a temporary register for results + SLJIT_TMP_MEM_REG : a temporary base register for accessing memory + (can be the same as SLJIT_TMP_DEST_REG) + SLJIT_TMP_DEST_FREG : a temporary register for float results + SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) + SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit + floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero +*/ + +/***********************************************************/ +/* Intel Control-flow Enforcement Technology (CET) spport. */ +/***********************************************************/ + +#ifdef SLJIT_CONFIG_X86 + +#if defined(__CET__) && !(defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) +#define SLJIT_CONFIG_X86_CET 1 +#endif + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined(__GNUC__) +#include +#endif + +#endif /* SLJIT_CONFIG_X86 */ + +/**********************************/ +/* External function definitions. */ +/**********************************/ + +/* General macros: + Note: SLJIT is designed to be independent from them as possible. + + In release mode (SLJIT_DEBUG is not defined) only the following + external functions are needed: +*/ + +#ifndef SLJIT_MALLOC +#define SLJIT_MALLOC(size, allocator_data) (malloc(size)) +#endif + +#ifndef SLJIT_FREE +#define SLJIT_FREE(ptr, allocator_data) (free(ptr)) +#endif + +#ifndef SLJIT_MEMCPY +#define SLJIT_MEMCPY(dest, src, len) (memcpy(dest, src, len)) +#endif + +#ifndef SLJIT_MEMMOVE +#define SLJIT_MEMMOVE(dest, src, len) (memmove(dest, src, len)) +#endif + +#ifndef SLJIT_ZEROMEM +#define SLJIT_ZEROMEM(dest, len) (memset(dest, 0, len)) +#endif + +/***************************/ +/* Compiler helper macros. */ +/***************************/ + +#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define SLJIT_LIKELY(x) __builtin_expect((x), 1) +#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) +#else +#define SLJIT_LIKELY(x) (x) +#define SLJIT_UNLIKELY(x) (x) +#endif + +#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ + +#ifndef SLJIT_INLINE +/* Inline functions. Some old compilers do not support them. */ +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 +#define SLJIT_INLINE +#else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else +#define SLJIT_INLINE __inline +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_NOINLINE +/* Not inline functions. */ +#if defined(__GNUC__) +#define SLJIT_NOINLINE __attribute__ ((noinline)) +#else +#define SLJIT_NOINLINE +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_UNUSED_ARG +/* Unused arguments. */ +#define SLJIT_UNUSED_ARG(arg) (void)arg +#endif + +/*********************************/ +/* Type of public API functions. */ +/*********************************/ + +#ifndef SLJIT_API_FUNC_ATTRIBUTE +#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) +/* Static ABI functions. For all-in-one programs. */ + +#if defined(__GNUC__) +/* Disable unused warnings in gcc. */ +#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused)) +#else +#define SLJIT_API_FUNC_ATTRIBUTE static +#endif + +#else +#define SLJIT_API_FUNC_ATTRIBUTE +#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ +#endif /* defined SLJIT_API_FUNC_ATTRIBUTE */ + +/****************************/ +/* Instruction cache flush. */ +/****************************/ + +#ifdef __APPLE__ +#include +#endif + +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware some vendors (ex: Microsoft, Apple) are known to have + * removed the code to support this builtin even if the call for + * __has_builtin reports it is available. + * + * make sure linking doesn't fail because __clear_cache() is + * missing before changing it or add an exception so that the + * system provided method that should be defined below is used + * instead. + */ +#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) + +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power is broken + */ +#if !defined(SLJIT_CONFIG_PPC) +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) +#endif + +#endif /* gcc >= 10 */ +#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ + +#ifndef SLJIT_CACHE_FLUSH + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +/* Not required to implement on archs with unified caches. */ +#define SLJIT_CACHE_FLUSH(from, to) + +#elif defined(__APPLE__) && MAC_OS_X_VERSION_MIN_REQUIRED >= 1050 + +/* Supported by all macs since Mac OS 10.5. + However, it does not work on non-jailbroken iOS devices, + although the compilation is successful. */ +#include +#define SLJIT_CACHE_FLUSH(from, to) \ + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + ppc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + +#elif defined(_WIN32) + +#define SLJIT_CACHE_FLUSH(from, to) \ + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) + +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#elif defined __ANDROID__ + +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + +#else + +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + __clear_cache((char*)(from), (char*)(to)) + +#endif + +#endif /* !SLJIT_CACHE_FLUSH */ + +/******************************************************/ +/* Integer and floating point type definitions. */ +/******************************************************/ + +/* 8 bit byte type. */ +typedef unsigned char sljit_u8; +typedef signed char sljit_s8; + +/* 16 bit half-word type. */ +typedef unsigned short int sljit_u16; +typedef signed short int sljit_s16; + +/* 32 bit integer type. */ +typedef unsigned int sljit_u32; +typedef signed int sljit_s32; + +/* Machine word type. Enough for storing a pointer. + 32 bit for 32 bit machines. + 64 bit for 64 bit machines. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ +#define SLJIT_WORD_SHIFT 0 +typedef unsigned int sljit_uw; +typedef int sljit_sw; +#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64) +#define SLJIT_32BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 2 +typedef unsigned int sljit_uw; +typedef int sljit_sw; +#else +#define SLJIT_64BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 3 +#ifdef _WIN32 +#ifdef __GNUC__ +/* These types do not require windows.h */ +typedef unsigned long long sljit_uw; +typedef long long sljit_sw; +#else +typedef unsigned __int64 sljit_uw; +typedef __int64 sljit_sw; +#endif +#else /* !_WIN32 */ +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#endif /* _WIN32 */ +#endif + +typedef sljit_sw sljit_sp; +typedef sljit_uw sljit_up; + +/* Floating point types. */ +typedef float sljit_f32; +typedef double sljit_f64; + +/* Shift for pointer sized data. */ +#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT + +/* Shift for double precision sized data. */ +#define SLJIT_F32_SHIFT 2 +#define SLJIT_F64_SHIFT 3 + +#define SLJIT_CONV_RESULT_MAX_INT 0 +#define SLJIT_CONV_RESULT_MIN_INT 1 +#define SLJIT_CONV_RESULT_ZERO 2 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT +#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT +#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO +#else +#error "Result for float to integer conversion is not defined" +#endif + +#ifndef SLJIT_W + +/* Defining long constants. */ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +#define SLJIT_W(w) (w##ll) +#else /* !windows */ +#define SLJIT_W(w) (w##l) +#endif /* windows */ +#else /* 32 bit */ +#define SLJIT_W(w) (w) +#endif /* unknown */ + +#endif /* !SLJIT_W */ + +/*************************/ +/* Endianness detection. */ +/*************************/ + +#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) + +/* These macros are mostly useful for the applications. */ +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#ifdef __LITTLE_ENDIAN__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#ifdef __MIPSEL__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#ifndef SLJIT_MIPS_REV + +/* Auto detecting mips revision. */ +#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6) +#define SLJIT_MIPS_REV 6 +#elif defined(__mips_isa_rev) && __mips_isa_rev >= 1 +#define SLJIT_MIPS_REV __mips_isa_rev +#elif defined(__clang__) \ + && (defined(_MIPS_ARCH_OCTEON) || defined(_MIPS_ARCH_P5600)) +/* clang either forgets to define (clang-7) __mips_isa_rev at all + * or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+) + * and -march=p5600 (MIPS32 R5). + * It also sets the __mips macro to 64 or 32 for -mipsN when N <= 5 + * (should be set to N exactly) so we cannot rely on this too. + */ +#define SLJIT_MIPS_REV 1 +#endif + +#endif /* !SLJIT_MIPS_REV */ + +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#define SLJIT_BIG_ENDIAN 1 + +#else +#define SLJIT_LITTLE_ENDIAN 1 +#endif + +#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ + +/* Sanity check. */ +#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#ifndef SLJIT_FPU_UNALIGNED + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_FPU_UNALIGNED 1 +#endif + +#endif /* !SLJIT_FPU_UNALIGNED */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_DETECT_SSE2 1 +#endif + +/*****************************************************************************************/ +/* Calling convention of functions generated by SLJIT or called from the generated code. */ +/*****************************************************************************************/ + +#ifndef SLJIT_FUNC +#define SLJIT_FUNC +#endif /* !SLJIT_FUNC */ + +#ifndef SLJIT_INDIRECT_CALL +#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \ + || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) +/* It seems certain ppc compilers use an indirect addressing for functions + which makes things complicated. */ +#define SLJIT_INDIRECT_CALL 1 +#endif +#endif /* SLJIT_INDIRECT_CALL */ + +/* The offset which needs to be subtracted from the return address to +determine the next executed instruction after return. */ +#ifndef SLJIT_RETURN_ADDRESS_OFFSET +#define SLJIT_RETURN_ADDRESS_OFFSET 0 +#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ + +/***************************************************/ +/* Functions of the built-in executable allocator. */ +/***************************************************/ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#define SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data) sljit_malloc_exec(size) +#define SLJIT_BUILTIN_FREE_EXEC(ptr, exec_allocator_data) sljit_free_exec(ptr) + +#ifndef SLJIT_MALLOC_EXEC +#define SLJIT_MALLOC_EXEC(size, exec_allocator_data) SLJIT_BUILTIN_MALLOC_EXEC((size), (exec_allocator_data)) +#endif /* SLJIT_MALLOC_EXEC */ + +#ifndef SLJIT_FREE_EXEC +#define SLJIT_FREE_EXEC(ptr, exec_allocator_data) SLJIT_BUILTIN_FREE_EXEC((ptr), (exec_allocator_data)) +#endif /* SLJIT_FREE_EXEC */ + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code); +#define SLJIT_EXEC_OFFSET(code) sljit_exec_offset(code) +#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */ + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#ifndef SLJIT_EXEC_OFFSET +#define SLJIT_EXEC_OFFSET(ptr) 0 +#endif + +/**********************************************/ +/* Registers and locals offset determination. */ +/**********************************************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 1 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#else /* _WIN64 */ +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw)) +#endif /* !_WIN64 */ +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_PREF_SHIFT_REG SLJIT_R3 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +#define SLJIT_NUMBER_OF_REGISTERS 26 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_64 || _AIX */ + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#define SLJIT_NUMBER_OF_REGISTERS 21 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6 +#else +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#endif +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +/* + * https://refspecs.linuxbase.org/ELF/zSeries/lzsabi0_zSeries.html#STACKFRAME + * + * 160 + * .. FR6 + * .. FR4 + * .. FR2 + * 128 FR0 + * 120 R15 (used for SP) + * 112 R14 + * 104 R13 + * 96 R12 + * .. + * 48 R6 + * .. + * 16 R2 + * 8 RESERVED + * 0 SP + */ +#define SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE 160 + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R2 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE +#define SLJIT_MASKED_SHIFT 1 + +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2 +#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1 +#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1 +#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_MASKED_SHIFT 1 +#define SLJIT_MASKED_SHIFT32 1 + +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +/* Just to have something. */ +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0 +#define SLJIT_TMP_DEST_REG 0 +#define SLJIT_TMP_MEM_REG 0 +#define SLJIT_TMP_DEST_FREG 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#endif + +#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + +/**********************************/ +/* Temporary register management. */ +/**********************************/ + +#define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2) +#define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +/* WARNING: Accessing temporary registers is not recommended, because they + are also used by the JIT compiler for various computations. Using them + might have any side effects including incorrect operations and crashes, + so use them at your own risk. The machine registers themselves might have + limitations, e.g. the r0 register on s390x / ppc cannot be used as + base address for memory operations. */ + +/* Temporary registers */ +#define SLJIT_TMP_R0 (SLJIT_TMP_REGISTER_BASE + 0) +#define SLJIT_TMP_R1 (SLJIT_TMP_REGISTER_BASE + 1) +#define SLJIT_TMP_R2 (SLJIT_TMP_REGISTER_BASE + 2) +#define SLJIT_TMP_R3 (SLJIT_TMP_REGISTER_BASE + 3) +#define SLJIT_TMP_R4 (SLJIT_TMP_REGISTER_BASE + 4) +#define SLJIT_TMP_R5 (SLJIT_TMP_REGISTER_BASE + 5) +#define SLJIT_TMP_R6 (SLJIT_TMP_REGISTER_BASE + 6) +#define SLJIT_TMP_R7 (SLJIT_TMP_REGISTER_BASE + 7) +#define SLJIT_TMP_R8 (SLJIT_TMP_REGISTER_BASE + 8) +#define SLJIT_TMP_R9 (SLJIT_TMP_REGISTER_BASE + 9) +#define SLJIT_TMP_R(i) (SLJIT_TMP_REGISTER_BASE + (i)) + +#define SLJIT_TMP_FR0 (SLJIT_TMP_FREGISTER_BASE + 0) +#define SLJIT_TMP_FR1 (SLJIT_TMP_FREGISTER_BASE + 1) +#define SLJIT_TMP_FR2 (SLJIT_TMP_FREGISTER_BASE + 2) +#define SLJIT_TMP_FR3 (SLJIT_TMP_FREGISTER_BASE + 3) +#define SLJIT_TMP_FR4 (SLJIT_TMP_FREGISTER_BASE + 4) +#define SLJIT_TMP_FR5 (SLJIT_TMP_FREGISTER_BASE + 5) +#define SLJIT_TMP_FR6 (SLJIT_TMP_FREGISTER_BASE + 6) +#define SLJIT_TMP_FR7 (SLJIT_TMP_FREGISTER_BASE + 7) +#define SLJIT_TMP_FR8 (SLJIT_TMP_FREGISTER_BASE + 8) +#define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9) +#define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i)) + +/********************************/ +/* CPU status flags management. */ +/********************************/ + +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + || (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +#define SLJIT_HAS_STATUS_FLAGS_STATE 1 +#endif + +/***************************************/ +/* Floating point register management. */ +/***************************************/ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_F64_SECOND(reg) \ + ((reg) + SLJIT_FS0 + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS) +#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */ +#define SLJIT_F64_SECOND(reg) \ + (reg) +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + +/*************************************/ +/* Debug and verbose related macros. */ +/*************************************/ + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) + +/* SLJIT_HALT_PROCESS must halt the process. */ +#ifndef SLJIT_HALT_PROCESS +#define SLJIT_HALT_PROCESS() \ + abort(); +#endif /* !SLJIT_HALT_PROCESS */ + +#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */ + +/* Feel free to redefine these two macros. */ +#ifndef SLJIT_ASSERT + +#define SLJIT_ASSERT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) { \ + printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } \ + } while (0) + +#endif /* !SLJIT_ASSERT */ + +#ifndef SLJIT_UNREACHABLE + +#define SLJIT_UNREACHABLE() \ + do { \ + printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } while (0) + +#endif /* !SLJIT_UNREACHABLE */ + +#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +/* Forcing empty, but valid statements. */ +#undef SLJIT_ASSERT +#undef SLJIT_UNREACHABLE + +#define SLJIT_ASSERT(x) \ + do { } while (0) +#define SLJIT_UNREACHABLE() \ + do { } while (0) + +#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#ifndef SLJIT_COMPILE_ASSERT + +#define SLJIT_COMPILE_ASSERT(x, description) \ + switch(0) { case 0: case ((x) ? 1 : 0): break; } + +#endif /* !SLJIT_COMPILE_ASSERT */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_INTERNAL_H_ */ diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c new file mode 100644 index 0000000..2dca17c --- /dev/null +++ b/src/sljit/sljitLir.c @@ -0,0 +1,3561 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#ifdef _WIN32 + +#include + +#endif /* _WIN32 */ + +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include +#include + +#endif /* SLJIT_STD_MACROS_DEFINED */ + +#define CHECK_ERROR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return compiler->error; \ + } while (0) + +#define CHECK_ERROR_PTR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return NULL; \ + } while (0) + +#define FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return compiler->error; \ + } while (0) + +#define PTR_FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return NULL; \ + } while (0) + +#define FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return SLJIT_ERR_ALLOC_FAILED; \ + } \ + } while (0) + +#define PTR_FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#define PTR_FAIL_WITH_EXEC_IF(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type)) + +#define VARIABLE_FLAG_SHIFT (10) +/* All variable flags are even. */ +#define VARIABLE_FLAG_MASK (0x3e << VARIABLE_FLAG_SHIFT) +#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) + +#define GET_OPCODE(op) \ + ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define HAS_FLAGS(op) \ + ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define GET_ALL_FLAGS(op) \ + ((op) & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) +#else /* !SLJIT_64BIT_ARCHITECTURE */ +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) +#endif /* SLJIT_64BIT_ARCHITECTURE */ + +#define BUF_SIZE 4096 + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define ABUF_SIZE 2048 +#else +#define ABUF_SIZE 4096 +#endif + +/* Parameter parsing. */ +#define REG_MASK 0x7f +#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK) +#define OFFS_REG_MASK (REG_MASK << 8) +#define TO_OFFS_REG(reg) ((reg) << 8) +#define FAST_IS_REG(reg) ((reg) < REG_MASK) + +/* Mask for argument types. */ +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) + +/* Mask for register pairs. */ +#define REG_PAIR_MASK 0x7f00 +#define REG_PAIR_FIRST(reg) ((reg) & 0x7f) +#define REG_PAIR_SECOND(reg) ((reg) >> 8) + +/* Mask for sljit_emit_enter. */ +#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) + +/* Getters for simd operations, which returns with log2(size). */ +#define SLJIT_SIMD_GET_OPCODE(type) ((type) & 0xff) +#define SLJIT_SIMD_GET_REG_SIZE(type) (((type) >> 12) & 0x3f) +#define SLJIT_SIMD_GET_ELEM_SIZE(type) (((type) >> 18) & 0x3f) +#define SLJIT_SIMD_GET_ELEM2_SIZE(type) (((type) >> 24) & 0x3f) + +#define SLJIT_SIMD_CHECK_REG(type) (((type) & 0x3f000) >= SLJIT_SIMD_REG_64 && ((type) & 0x3f000) <= SLJIT_SIMD_REG_512) +#define SLJIT_SIMD_TYPE_MASK(m) ((sljit_s32)0xff000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m))) +#define SLJIT_SIMD_TYPE_MASK2(m) ((sljit_s32)0xc0000fff & ~(SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | (m))) + +/* Jump flags. */ +#define JUMP_ADDR 0x1 +#define JUMP_MOV_ADDR 0x2 +/* SLJIT_REWRITABLE_JUMP is 0x1000. */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# define PATCH_MB 0x04 +# define PATCH_MW 0x08 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +# define PATCH_MD 0x10 +# define MOV_ADDR_HI 0x20 +# define JUMP_MAX_SIZE ((sljit_uw)(10 + 3)) +# define CJUMP_MAX_SIZE ((sljit_uw)(2 + 10 + 3)) +#else /* !SLJIT_CONFIG_X86_64 */ +# define JUMP_MAX_SIZE ((sljit_uw)5) +# define CJUMP_MAX_SIZE ((sljit_uw)6) +#endif /* SLJIT_CONFIG_X86_64 */ +# define TYPE_SHIFT 13 +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +/* Bits 7..12 is for debug jump size, SLJIT_REWRITABLE_JUMP is 0x1000 */ +# define JUMP_SIZE_SHIFT 7 +#endif /* SLJIT_DEBUG */ +#endif /* SLJIT_CONFIG_X86 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define IS_BL 0x04 +# define PATCH_B 0x08 +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) +# define CPOOL_SIZE 512 +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)3) +#endif /* SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# define IS_COND 0x04 +# define IS_BL 0x08 + /* conditional + imm8 */ +# define PATCH_TYPE1 0x10 + /* conditional + imm20 */ +# define PATCH_TYPE2 0x20 + /* imm11 */ +# define PATCH_TYPE3 0x30 + /* imm24 */ +# define PATCH_TYPE4 0x40 + /* BL + imm24 */ +# define PATCH_TYPE5 0x50 + /* addwi/subwi */ +# define PATCH_TYPE6 0x60 + /* 0xf00 cc code for branches */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)5) +#endif /* SLJIT_CONFIG_ARM_THUMB2 */ + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# define IS_COND 0x004 +# define IS_CBZ 0x008 +# define IS_BL 0x010 +# define PATCH_COND 0x020 +# define PATCH_B 0x040 +# define PATCH_B32 0x080 +# define PATCH_ABS48 0x100 +# define PATCH_ABS64 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)5) +#endif /* SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# define IS_COND 0x004 +# define IS_CALL 0x008 +# define PATCH_B 0x010 +# define PATCH_ABS_B 0x020 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +# define PATCH_ABS32 0x040 +# define PATCH_ABS48 0x080 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)7) +#else /* !SLJIT_CONFIG_PPC_64 */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)4) +#endif /* SLJIT_CONFIG_PPC_64 */ +#endif /* SLJIT_CONFIG_PPC */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# define IS_MOVABLE 0x004 +# define IS_JAL 0x008 +# define IS_CALL 0x010 +# define IS_BIT26_COND 0x020 +# define IS_BIT16_COND 0x040 +# define IS_BIT23_COND 0x080 + +# define IS_COND (IS_BIT26_COND | IS_BIT16_COND | IS_BIT23_COND) + +# define PATCH_B 0x100 +# define PATCH_J 0x200 + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +# define PATCH_ABS32 0x400 +# define PATCH_ABS48 0x800 +#endif /* SLJIT_CONFIG_MIPS_64 */ + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + /* FPU status register */ +# define FCSR_FCC 33 +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# define IS_COND 0x004 +# define IS_CALL 0x008 + +# define PATCH_B 0x010 +# define PATCH_J 0x020 + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +# define PATCH_REL32 0x040 +# define PATCH_ABS32 0x080 +# define PATCH_ABS44 0x100 +# define PATCH_ABS52 0x200 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)6) +#else /* !SLJIT_CONFIG_RISCV_64 */ +# define JUMP_SIZE_SHIFT 26 +# define JUMP_MAX_SIZE ((sljit_uw)2) +#endif /* SLJIT_CONFIG_RISCV_64 */ +#endif /* SLJIT_CONFIG_RISCV */ + +#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +# define IS_COND 0x004 +# define IS_CALL 0x008 + +# define PATCH_B 0x010 +# define PATCH_J 0x020 + +# define PATCH_REL32 0x040 +# define PATCH_ABS32 0x080 +# define PATCH_ABS52 0x100 +# define JUMP_SIZE_SHIFT 58 +# define JUMP_MAX_SIZE ((sljit_uw)4) + +#endif /* SLJIT_CONFIG_LOONGARCH */ +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw)) + +#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, type) \ + (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ + (fsaveds)) * SSIZE_OF(type)) + +#define ADJUST_LOCAL_OFFSET(p, i) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + (i) += SLJIT_LOCALS_OFFSET; + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CODE_TO_PTR(code) ((void*)((sljit_up)(code) & ~(sljit_up)0x1)) +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#define SLJIT_CODE_TO_PTR(code) ((void*)(*(sljit_up*)code)) +#else /* !SLJIT_CONFIG_ARM_THUMB2 && !SLJIT_INDIRECT_CALL */ +#define SLJIT_CODE_TO_PTR(code) ((void*)(code)) +#endif /* SLJIT_CONFIG_ARM_THUMB2 || SLJIT_INDIRECT_CALL */ + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) + +#if defined(__NetBSD__) +#include "allocator_src/sljitProtExecAllocatorNetBSD.c" +#else +#include "allocator_src/sljitProtExecAllocatorPosix.c" +#endif + +#elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) + +#if defined(_WIN32) +#include "allocator_src/sljitWXExecAllocatorWindows.c" +#else +#include "allocator_src/sljitWXExecAllocatorPosix.c" +#endif + +#else + +#if defined(_WIN32) +#include "allocator_src/sljitExecAllocatorWindows.c" +#elif defined(__APPLE__) +#include "allocator_src/sljitExecAllocatorApple.c" +#elif defined(__FreeBSD__) +#include "allocator_src/sljitExecAllocatorFreeBSD.c" +#else +#include "allocator_src/sljitExecAllocatorPosix.c" +#endif + +#endif + +#else /* !SLJIT_EXECUTABLE_ALLOCATOR */ + +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) +#else +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) +#endif + +/* Argument checking features. */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +/* Returns with error when an invalid argument is passed. */ + +#define CHECK_ARGUMENT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) \ + return 1; \ + } while (0) + +#define CHECK_RETURN_TYPE sljit_s32 +#define CHECK_RETURN_OK return 0 + +#define CHECK(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return SLJIT_ERR_BAD_ARGUMENT; \ + } \ + } while (0) + +#define CHECK_PTR(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return NULL; \ + } \ + } while (0) + +#define CHECK_REG_INDEX(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + return -2; \ + } \ + } while (0) + +#elif (defined SLJIT_DEBUG && SLJIT_DEBUG) + +/* Assertion failure occures if an invalid argument is passed. */ +#undef SLJIT_ARGUMENT_CHECKS +#define SLJIT_ARGUMENT_CHECKS 1 + +#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x) +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +/* Arguments are not checked. */ +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#else + +/* Arguments are not checked. */ +#define CHECK(x) +#define CHECK_PTR(x) +#define CHECK_REG_INDEX(x) + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +/* --------------------------------------------------------------------- */ +/* Public functions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_NEEDS_COMPILER_INIT 1 +static sljit_s32 compiler_initialized = 0; +/* A thread safe initialization. */ +static void init_compiler(void); +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) +{ + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); + if (!compiler) + return NULL; + SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); + + SLJIT_COMPILE_ASSERT( + sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 + && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 + && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 + && (sizeof(sljit_up) == 4 || sizeof(sljit_up) == 8) + && sizeof(sljit_up) <= sizeof(sljit_sw) + && sizeof(sljit_up) == sizeof(sljit_sp) + && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) + && (sizeof(sljit_uw) == sizeof(sljit_sw)), + invalid_integer_types); + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, + rewritable_jump_and_single_op_must_not_be_the_same); + SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_F_EQUAL & 0x1) && !(SLJIT_JUMP & 0x1), + conditional_flags_must_be_even_numbers); + + /* Only the non-zero members must be set. */ + compiler->error = SLJIT_SUCCESS; + + compiler->allocator_data = allocator_data; + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); + + if (!compiler->buf || !compiler->abuf) { + if (compiler->buf) + SLJIT_FREE(compiler->buf, allocator_data); + if (compiler->abuf) + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + + compiler->buf->next = NULL; + compiler->buf->used_size = 0; + compiler->abuf->next = NULL; + compiler->abuf->used_size = 0; + + compiler->scratches = -1; + compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; + compiler->local_size = -1; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->args_size = -1; +#endif /* SLJIT_CONFIG_X86_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + + CPOOL_SIZE * sizeof(sljit_u8), allocator_data); + if (!compiler->cpool) { + SLJIT_FREE(compiler->buf, allocator_data); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_diff = 0xffffffff; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = UNMOVABLE_INS; +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->last_flags = 0; + compiler->last_return = -1; + compiler->logical_local_size = 0; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + +#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) + if (!compiler_initialized) { + init_compiler(); + compiler_initialized = 1; + } +#endif + + return compiler; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *curr; + void *allocator_data = compiler->allocator_data; + SLJIT_UNUSED_ARG(allocator_data); + + buf = compiler->buf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + + buf = compiler->abuf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + SLJIT_FREE(compiler->cpool, allocator_data); +#endif + SLJIT_FREE(compiler, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + if (compiler->error == SLJIT_SUCCESS) + compiler->error = SLJIT_ERR_ALLOC_FAILED; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + SLJIT_FREE_EXEC(SLJIT_CODE_TO_PTR(code), exec_allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { + jump->flags &= (sljit_uw)~JUMP_ADDR; + jump->u.label = label; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + if (SLJIT_LIKELY(!!jump)) { + jump->flags |= JUMP_ADDR; + jump->u.target = target; + } +} + +#define SLJIT_CURRENT_FLAGS_ALL \ + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = current_flags; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); + } +#endif +} + +/* --------------------------------------------------------------------- */ +/* Private functions */ +/* --------------------------------------------------------------------- */ + +static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->buf->memory + compiler->buf->used_size; + compiler->buf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->buf; + compiler->buf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->abuf->memory + compiler->abuf->used_size; + compiler->abuf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->abuf; + compiler->abuf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) +{ + CHECK_ERROR_PTR(); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (size <= 0 || size > 128) + return NULL; + size = (size + 7) & ~7; +#else + if (size <= 0 || size > 64) + return NULL; + size = (size + 3) & ~3; +#endif + return ensure_abuf(compiler, (sljit_uw)size); +} + +static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf = compiler->buf; + struct sljit_memory_fragment *prev = NULL; + struct sljit_memory_fragment *tmp; + + do { + tmp = buf->next; + buf->next = prev; + prev = buf; + buf = tmp; + } while (buf != NULL); + + compiler->buf = prev; +} + +static SLJIT_INLINE void* allocate_executable_memory(sljit_uw size, sljit_s32 options, + void *exec_allocator_data, sljit_sw *executable_offset) +{ + void *code; + struct sljit_generate_code_buffer *buffer; + + if (SLJIT_LIKELY(!(options & SLJIT_GENERATE_CODE_BUFFER))) { + code = SLJIT_MALLOC_EXEC(size, exec_allocator_data); + *executable_offset = SLJIT_EXEC_OFFSET(code); + return code; + } + + buffer = (struct sljit_generate_code_buffer*)exec_allocator_data; + + if (size <= buffer->size) { + *executable_offset = buffer->executable_offset; + return buffer->buffer; + } + + return NULL; +} + +#define SLJIT_MAX_ADDRESS ~(sljit_uw)0 + +#define SLJIT_GET_NEXT_SIZE(ptr) (ptr != NULL) ? ((ptr)->size) : SLJIT_MAX_ADDRESS +#define SLJIT_GET_NEXT_ADDRESS(ptr) (ptr != NULL) ? ((ptr)->addr) : SLJIT_MAX_ADDRESS + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +#define SLJIT_NEXT_DEFINE_TYPES \ + sljit_uw next_label_size; \ + sljit_uw next_jump_addr; \ + sljit_uw next_const_addr; \ + sljit_uw next_min_addr + +#define SLJIT_NEXT_INIT_TYPES() \ + next_label_size = SLJIT_GET_NEXT_SIZE(label); \ + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); \ + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + +#define SLJIT_GET_NEXT_MIN() \ + next_min_addr = sljit_get_next_min(next_label_size, next_jump_addr, next_const_addr); + +static SLJIT_INLINE sljit_uw sljit_get_next_min(sljit_uw next_label_size, + sljit_uw next_jump_addr, sljit_uw next_const_addr) +{ + sljit_uw result = next_jump_addr; + + SLJIT_ASSERT(result == SLJIT_MAX_ADDRESS || result != next_const_addr); + + if (next_const_addr < result) + result = next_const_addr; + + if (next_label_size < result) + result = next_label_size; + + return result; +} + +#endif /* !SLJIT_CONFIG_X86 */ + +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) +{ + label->next = NULL; + label->u.index = compiler->label_count++; + label->size = compiler->size; + if (compiler->last_label != NULL) + compiler->last_label->next = label; + else + compiler->labels = label; + compiler->last_label = label; +} + +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_u32 flags) +{ + jump->next = NULL; + jump->flags = flags; + jump->u.label = NULL; + if (compiler->last_jump != NULL) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_mov_addr(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_uw offset) +{ + jump->next = NULL; + jump->addr = compiler->size - offset; + jump->flags = JUMP_MOV_ADDR; + jump->u.label = NULL; + if (compiler->last_jump != NULL) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) +{ + const_->next = NULL; + const_->addr = compiler->size; + if (compiler->last_const != NULL) + compiler->last_const->next = const_; + else + compiler->consts = const_; + compiler->last_const = const_; +} + +#define ADDRESSING_DEPENDS_ON(exp, reg) \ + (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches) +{ + sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type; + + curr_type = (arg_types & SLJIT_ARG_FULL_MASK); + + if (curr_type >= SLJIT_ARG_TYPE_F64) { + if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) + return 0; + } else if (curr_type >= SLJIT_ARG_TYPE_W) { + if (scratches == 0) + return 0; + } + + arg_types >>= SLJIT_ARG_SHIFT; + + word_arg_count = 0; + scratch_arg_end = 0; + saved_arg_count = 0; + float_arg_count = 0; + while (arg_types != 0) { + if (word_arg_count + float_arg_count >= 4) + return 0; + + curr_type = (arg_types & SLJIT_ARG_MASK); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P) + return 0; + + word_arg_count++; + scratch_arg_end = word_arg_count; + } else { + if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + return 0; + + if (curr_type < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + saved_arg_count++; + } else + float_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (saveds == -1) + return (word_arg_count <= scratches && float_arg_count <= fscratches); + + return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches); +} + +#define FUNCTION_CHECK_IS_REG(r) \ + (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ + || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0) \ + || ((r) >= SLJIT_TMP_REGISTER_BASE && (r) < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8) +#else +#define CHECK_IF_VIRTUAL_REGISTER(p) 0 +#endif + +static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if (!(p & SLJIT_MEM)) + return 0; + + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + + if (!(!(p & REG_MASK) || FUNCTION_CHECK_IS_REG(p & REG_MASK))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK)) + return 0; + + if (p & OFFS_REG_MASK) { + if (!(p & REG_MASK)) + return 0; + + if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p)))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(OFFS_REG(p))) + return 0; + + if ((i & ~0x3) != 0) + return 0; + } + + return (p & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK)) == 0; +} + +#define FUNCTION_CHECK_SRC_MEM(p, i) \ + CHECK_ARGUMENT(function_check_src_mem(compiler, p, i)); + +static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p)) + return (i == 0); + + if (p == SLJIT_IMM) + return 1; + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_SRC(p, i) \ + CHECK_ARGUMENT(function_check_src(compiler, p, i)); + +static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_DST(p, i) \ + CHECK_ARGUMENT(function_check_dst(compiler, p, i)); + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +#define FUNCTION_CHECK_IS_FREG(fr, is_32) \ + function_check_is_freg(compiler, (fr), (is_32)) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32); + +#define FUNCTION_FCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32))); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (FUNCTION_CHECK_IS_FREG(p, is_32)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */ +#define FUNCTION_CHECK_IS_FREG(fr, is_32) \ + function_check_is_freg(compiler, (fr)) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr) +{ + if (compiler->scratches == -1) + return 0; + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#define FUNCTION_FCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_fcheck(compiler, (p), (i))); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if ((p >= SLJIT_FR0 && p < (SLJIT_FR0 + compiler->fscratches)) + || (p > (SLJIT_FS0 - compiler->fsaveds) && p <= SLJIT_FS0) + || (p >= SLJIT_TMP_FREGISTER_BASE && p < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + compiler->verbose = verbose; +} + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +#ifdef __GNUC__ +# define SLJIT_PRINT_D "ll" +#else +# define SLJIT_PRINT_D "I64" +#endif +#else +# define SLJIT_PRINT_D "l" +#endif +#else +# define SLJIT_PRINT_D "" +#endif + +static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r) +{ + if (r < (SLJIT_R0 + compiler->scratches)) + fprintf(compiler->verbose, "r%d", r - SLJIT_R0); + else if (r < SLJIT_SP) + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r); + else if (r == SLJIT_SP) + fprintf(compiler->verbose, "sp"); + else + fprintf(compiler->verbose, "t%d", r - SLJIT_TMP_REGISTER_BASE); +} + +static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r) +{ +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (r >= SLJIT_F64_SECOND(SLJIT_FR0)) { + fprintf(compiler->verbose, "^"); + r -= SLJIT_F64_SECOND(0); + } +#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ + + if (r < (SLJIT_FR0 + compiler->fscratches)) + fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0); + else if (r < SLJIT_TMP_FREGISTER_BASE) + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r); + else + fprintf(compiler->verbose, "ft%d", r - SLJIT_TMP_FREGISTER_BASE); +} + +static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) == SLJIT_IMM) + fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); + else if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, " * %d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } else + sljit_verbose_reg(compiler, p); +} + +static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, "%d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } + else + sljit_verbose_freg(compiler, p); +} + +static const char* op0_names[] = { + "breakpoint", "nop", "lmul.uw", "lmul.sw", + "divmod.u", "divmod.s", "div.u", "div.s", + "endbr", "skip_frames_before_return" +}; + +static const char* op1_names[] = { + "mov", "mov", "mov", "mov", + "mov", "mov", "mov", "mov", + "mov", "clz", "ctz", "rev", + "rev", "rev", "rev", "rev" +}; + +static const char* op1_types[] = { + "", ".u8", ".s8", ".u16", + ".s16", ".u32", ".s32", "32", + ".p", "", "", "", + ".u16", ".s16", ".u32", ".s32" +}; + +static const char* op2_names[] = { + "add", "addc", "sub", "subc", + "mul", "and", "or", "xor", + "shl", "mshl", "lshr", "mlshr", + "ashr", "mashr", "rotl", "rotr" +}; + +static const char* op2r_names[] = { + "muladd" +}; + +static const char* op_src_dst_names[] = { + "fast_return", "skip_frames_before_fast_return", + "prefetch_l1", "prefetch_l2", + "prefetch_l3", "prefetch_once", + "fast_enter", "get_return_address" +}; + +static const char* fop1_names[] = { + "mov", "conv", "conv", "conv", + "conv", "conv", "conv", "conv", + "cmp", "neg", "abs", +}; + +static const char* fop1_conv_types[] = { + "sw", "s32", "sw", "s32", + "uw", "u32" +}; + +static const char* fop2_names[] = { + "add", "sub", "mul", "div" +}; + +static const char* fop2r_names[] = { + "copysign" +}; + +static const char* simd_op2_names[] = { + "and", "or", "xor" +}; + +static const char* jump_names[] = { + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "sig_less", "sig_greater_equal", + "sig_greater", "sig_less_equal", + "overflow", "not_overflow", + "carry", "not_carry", + "atomic_stored", "atomic_not_stored", + "f_equal", "f_not_equal", + "f_less", "f_greater_equal", + "f_greater", "f_less_equal", + "unordered", "ordered", + "ordered_equal", "unordered_or_not_equal", + "ordered_less", "unordered_or_greater_equal", + "ordered_greater", "unordered_or_less_equal", + "unordered_or_equal", "ordered_not_equal", + "unordered_or_less", "ordered_greater_equal", + "unordered_or_greater", "ordered_less_equal", + "jump", "fast_call", + "call", "call_reg_arg" +}; + +static const char* call_arg_names[] = { + "void", "w", "32", "p", "f64", "f32" +}; + +#endif /* SLJIT_VERBOSE */ + +/* --------------------------------------------------------------------- */ +/* Arch dependent */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +#define SLJIT_SKIP_CHECKS(compiler) (compiler)->skip_checks = 1 + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + struct sljit_jump *jump; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->size > 0); + jump = compiler->jumps; + while (jump) { + /* All jumps have target. */ + CHECK_ARGUMENT((jump->flags & JUMP_ADDR) || jump->u.label != NULL); + jump = jump->next; + } +#endif + CHECK_RETURN_OK; +} + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_ENTER_CPU_SPECIFIC_OPTIONS (SLJIT_ENTER_USE_VEX) +#else /* !SLJIT_CONFIG_X86 */ +#define SLJIT_ENTER_CPU_SPECIFIC_OPTIONS (0) +#endif /* !SLJIT_CONFIG_X86 */ + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG | SLJIT_ENTER_CPU_SPECIFIC_OPTIONS))); + } else { + CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) <= SLJIT_ARG_TYPE_F32); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " opt:reg_arg(%d),", SLJIT_KEPT_SAVEDS_COUNT(options)); + else + fprintf(compiler->verbose, " opt:reg_arg,"); + } + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (options & SLJIT_ENTER_USE_VEX) { + fprintf(compiler->verbose, " opt:use_vex,"); + } +#endif /* !SLJIT_CONFIG_X86 */ + + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG | SLJIT_ENTER_CPU_SPECIFIC_OPTIONS))); + } else { + CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " opt:reg_arg(%d),", SLJIT_KEPT_SAVEDS_COUNT(options)); + else + fprintf(compiler->verbose, " opt:reg_arg,"); + } + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (options & SLJIT_ENTER_USE_VEX) { + fprintf(compiler->verbose, " opt:use_vex,"); + } +#endif /* !SLJIT_CONFIG_X86 */ + + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +#undef SLJIT_ENTER_CPU_SPECIFIC_OPTIONS + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_compiler *compiler) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_RET_VOID); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_void\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->scratches >= 0); + + switch (compiler->last_return) { + case SLJIT_ARG_TYPE_W: + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_S32); + break; + case SLJIT_ARG_TYPE_32: + CHECK_ARGUMENT(op == SLJIT_MOV32 || (op >= SLJIT_MOV32_U8 && op <= SLJIT_MOV32_S16)); + break; + case SLJIT_ARG_TYPE_P: + CHECK_ARGUMENT(op == SLJIT_MOV_P); + break; + case SLJIT_ARG_TYPE_F64: + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(op == SLJIT_MOV_F64); + break; + case SLJIT_ARG_TYPE_F32: + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(op == SLJIT_MOV_F32); + break; + default: + /* Context not initialized, void, etc. */ + CHECK_ARGUMENT(0); + break; + } + + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + FUNCTION_CHECK_SRC(src, srcw); + } else { + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); + } + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + } else { + fprintf(compiler->verbose, " return%s ", !(op & SLJIT_32) ? ".f64" : ".f32"); + sljit_verbose_fparam(compiler, src, srcw); + } + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_to "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) + || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) + || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); + CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); + if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + { + fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); + if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) { + fprintf(compiler->verbose, (op & SLJIT_32) ? "32" : "w"); + } + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_REV_S32); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + /* Nothing allowed */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + /* Only SLJIT_32 is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + } + + FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_32) ? "" : "32", op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); + + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); + CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + + /* All arguments must be valid registers. */ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); + + if (op == SLJIT_MOV32_U8 || op == SLJIT_MOV32_U16) { + /* Only SLJIT_32 is allowed. */ + CHECK_ARGUMENT(!(op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z))); + } else { + /* Nothing allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + } + + compiler->last_flags = 0; +#endif /* SLJIT_ARGUMENT_CHECKS */ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " atomic_load%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", ["); + sljit_verbose_reg(compiler, mem_reg); + fprintf(compiler->verbose, "]\n"); + } +#endif /* SLJIT_VERBOSE */ + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P); + CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32); + + /* All arguments must be valid registers. */ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(mem_reg) && !CHECK_IF_VIRTUAL_REGISTER(mem_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(temp_reg) && src_reg != temp_reg); + + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == SLJIT_ATOMIC_STORED); + + if (GET_OPCODE(op) == SLJIT_MOV_U8 || GET_OPCODE(op) == SLJIT_MOV_U16) { + /* Only SLJIT_32, SLJIT_ATOMIC_STORED are allowed. */ + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + } else { + /* Only SLJIT_ATOMIC_STORED is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z))); + } + + compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); +#endif /* SLJIT_ARGUMENT_CHECKS */ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " atomic_store%s%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & VARIABLE_FLAG_MASK) ? "" : ".stored"); + sljit_verbose_reg(compiler, src_reg); + fprintf(compiler->verbose, ", ["); + sljit_verbose_reg(compiler, mem_reg); + fprintf(compiler->verbose, "], "); + sljit_verbose_reg(compiler, temp_reg); + fprintf(compiler->verbose, "\n"); + } +#endif /* SLJIT_VERBOSE */ + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ROTR); + + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_MUL: + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_ADD: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_SUB: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + break; + case SLJIT_ADDC: + case SLJIT_SUBC: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32)); + break; + case SLJIT_ROTL: + case SLJIT_ROTR: + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + SLJIT_UNREACHABLE(); + break; + } + + if (unset) { + CHECK_ARGUMENT(HAS_FLAGS(op)); + } else { + FUNCTION_CHECK_DST(dst, dstw); + } + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); + if (unset) + fprintf(compiler->verbose, "unset"); + else + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op | SLJIT_32) == SLJIT_MULADD32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", op2r_names[GET_OPCODE(op) - SLJIT_OP2R_BASE], !(op & SLJIT_32) ? "" : "32"); + + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR + || GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR); + CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src1_reg)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg)); + FUNCTION_CHECK_SRC(src3, src3w); + CHECK_ARGUMENT(dst_reg != src2_reg); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", + (op & SLJIT_SHIFT_INTO_NON_ZERO) ? ".nz" : ""); + + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, src1_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, src2_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src3, src3w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(op >= SLJIT_FAST_RETURN && op <= SLJIT_PREFETCH_ONCE); + FUNCTION_CHECK_SRC(src, srcw); + + if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN) { + CHECK_ARGUMENT(src != SLJIT_IMM); + compiler->last_flags = 0; + } else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE) { + CHECK_ARGUMENT(src & SLJIT_MEM); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(op >= SLJIT_FAST_ENTER && op <= SLJIT_GET_RETURN_ADDRESS); + FUNCTION_CHECK_DST(dst, dstw); + + if (op == SLJIT_FAST_ENTER) + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s ", op_src_dst_names[op - SLJIT_OP_SRC_DST_BASE]); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (type == SLJIT_GP_REGISTER) { + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS) + || (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); + } else { + CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))); + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS) + || (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + sljit_u32 i; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(instruction); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + CHECK_ARGUMENT(size > 0 && size < 16); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0) + || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + CHECK_ARGUMENT(size == 2 || size == 4 || size == 6); +#else + CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); +#endif + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " op_custom"); + for (i = 0; i < size; i++) + fprintf(compiler->verbose, " 0x%x", ((sljit_u8*)instruction)[i]); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32.from.f64" : ".f64.from.f32"); + else + fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32" : ".f64"); + + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); +#endif + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL)); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + if (op & VARIABLE_FLAG_MASK) { + fprintf(compiler->verbose, ".%s", jump_names[GET_FLAG_TYPE(op)]); + } + fprintf(compiler->verbose, " "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw, op & SLJIT_32); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64], + (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from.%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32" : ".f64", + fop1_conv_types[GET_OPCODE(op) - SLJIT_CONV_SW_FROM_F64]); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); + FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_COPYSIGN_F64); + FUNCTION_FCHECK(src1, src1w, op & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, op & SLJIT_32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, op & SLJIT_32)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", fop2r_names[GET_OPCODE(op) - SLJIT_FOP2R_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + SLJIT_UNUSED_ARG(value); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 1)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fset32 "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", %f\n", value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + SLJIT_UNUSED_ARG(value); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fset64 "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", %f\n", value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_COPY_TO_F64 && GET_OPCODE(op) <= SLJIT_COPY_FROM_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, op & SLJIT_32)); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); +#else /* !SLJIT_64BIT_ARCHITECTURE */ + switch (op) { + case SLJIT_COPY32_TO_F32: + case SLJIT_COPY32_FROM_F32: + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); + break; + case SLJIT_COPY_TO_F64: + case SLJIT_COPY_FROM_F64: + if (reg & REG_PAIR_MASK) { + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_FIRST(reg))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_SECOND(reg))); + + if (op == SLJIT_COPY_TO_F64) + break; + + CHECK_ARGUMENT(REG_PAIR_FIRST(reg) != REG_PAIR_SECOND(reg)); + break; + } + + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); + break; + } +#endif /* SLJIT_64BIT_ARCHITECTURE */ +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " copy%s_%s_f%s ", (op & SLJIT_32) ? "32" : "", + GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? "to" : "from", (op & SLJIT_32) ? "32" : "64"); + + sljit_verbose_freg(compiler, freg); + + if (reg & REG_PAIR_MASK) { + fprintf(compiler->verbose, ", {"); + sljit_verbose_reg(compiler, REG_PAIR_FIRST(reg)); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, REG_PAIR_SECOND(reg)); + fprintf(compiler->verbose, "}\n"); + } else { + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, "\n"); + } + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, "label:\n"); +#endif + CHECK_RETURN_OK; +} + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define CHECK_UNORDERED(type, last_flags) \ + ((((type) & 0xfe) == SLJIT_ORDERED) && \ + ((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL) +#else +#define CHECK_UNORDERED(type, last_flags) 0 +#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); + + if ((type & 0xff) < SLJIT_JUMP) { + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY); + compiler->last_flags = 0; + } else + CHECK_ARGUMENT((type & 0xfe) == (compiler->last_flags & 0xff) + || CHECK_UNORDERED(type, compiler->last_flags)); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " jump%s %s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff]); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); + + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "]\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmp%s%s %s, ", (type & SLJIT_32) ? "32" : "", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL); + FUNCTION_FCHECK(src1, src1w, type & SLJIT_32); + FUNCTION_FCHECK(src2, src2w, type & SLJIT_32); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fcmp%s%s %s, ", (type & SLJIT_32) ? ".f32" : ".f64", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); + FUNCTION_CHECK_SRC(src, srcw); + + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "], "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 + || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + + if (type <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xfe) == (compiler->last_flags & 0xff) + || CHECK_UNORDERED(type, compiler->last_flags)); + + FUNCTION_CHECK_DST(dst, dstw); + + if (GET_OPCODE(op) >= SLJIT_ADD) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " flags.%s%s%s ", + GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_types[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""), + !(op & SLJIT_SET_Z) ? "" : ".z"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", %s\n", jump_names[type]); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 cond = type & ~SLJIT_32; + + CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL); + + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + FUNCTION_CHECK_SRC(src1, src1w); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src2_reg)); + + if (cond <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY); + compiler->last_flags = 0; + } else + CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) + || CHECK_UNORDERED(cond, compiler->last_flags)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " select%s %s, ", + !(type & SLJIT_32) ? "" : "32", + jump_names[type & ~SLJIT_32]); + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, src2_reg); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 cond = type & ~SLJIT_32; + + CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL); + + CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, type & SLJIT_32)); + FUNCTION_FCHECK(src1, src1w, type & SLJIT_32); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, type & SLJIT_32)); + + if (cond <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xfe) == SLJIT_CARRY); + compiler->last_flags = 0; + } else + CHECK_ARGUMENT((cond & 0xfe) == (compiler->last_flags & 0xff) + || CHECK_UNORDERED(cond, compiler->last_flags)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fselect%s %s, ", + !(type & SLJIT_32) ? "" : "32", + jump_names[type & ~SLJIT_32]); + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src2_freg); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 allowed_flags; +#endif /* SLJIT_ARGUMENT_CHECKS */ + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (type & SLJIT_MEM_UNALIGNED) { + CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + } else if (type & SLJIT_MEM_ALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32)); + } else { + CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_ALIGNED_32)); + } + + allowed_flags = SLJIT_MEM_UNALIGNED; + + switch (type & 0xff) { + case SLJIT_MOV_P: + case SLJIT_MOV: + allowed_flags |= SLJIT_MEM_ALIGNED_32; + /* fallthrough */ + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + allowed_flags |= SLJIT_MEM_ALIGNED_16; + break; + } + + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | allowed_flags)) == 0); + + if (reg & REG_PAIR_MASK) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_FIRST(reg))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_SECOND(reg))); + CHECK_ARGUMENT(REG_PAIR_FIRST(reg) != REG_PAIR_SECOND(reg)); + } else { + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); + CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) >= SLJIT_MOV_U8 && (type & 0xff) <= SLJIT_MOV_S16)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); + } + + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if ((type & 0xff) == SLJIT_MOV32) + fprintf(compiler->verbose, " %s32", + (type & SLJIT_MEM_STORE) ? "store" : "load"); + else + fprintf(compiler->verbose, " %s%s%s", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "" : "32", op1_types[(type & 0xff) - SLJIT_OP1_BASE]); + + if (type & SLJIT_MEM_UNALIGNED) + printf(".unal"); + else if (type & SLJIT_MEM_ALIGNED_16) + printf(".al16"); + else if (type & SLJIT_MEM_ALIGNED_32) + printf(".al32"); + + if (reg & REG_PAIR_MASK) { + fprintf(compiler->verbose, " {"); + sljit_verbose_reg(compiler, REG_PAIR_FIRST(reg)); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, REG_PAIR_SECOND(reg)); + fprintf(compiler->verbose, "}, "); + } else { + fprintf(compiler->verbose, " "); + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, ", "); + } + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); + + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_mem_update(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # mem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + if ((type & 0xff) == SLJIT_MOV32) + fprintf(compiler->verbose, " %s32.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + (type & SLJIT_MEM_POST) ? "post" : "pre"); + else + fprintf(compiler->verbose, " %s%s%s.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "" : "32", + op1_types[(type & 0xff) - SLJIT_OP1_BASE], + (type & SLJIT_MEM_POST) ? "post" : "pre"); + + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); + + if (type & SLJIT_MEM_UNALIGNED) { + CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + } else if (type & SLJIT_MEM_ALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_ALIGNED_32)); + } else { + CHECK_ARGUMENT(type & SLJIT_MEM_ALIGNED_32); + CHECK_ARGUMENT(!(type & SLJIT_32)); + } + + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s.%s", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "f64" : "f32"); + + if (type & SLJIT_MEM_UNALIGNED) + printf(".unal"); + else if (type & SLJIT_MEM_ALIGNED_16) + printf(".al16"); + else if (type & SLJIT_MEM_ALIGNED_32) + printf(".al32"); + + fprintf(compiler->verbose, " "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); + FUNCTION_CHECK_SRC_MEM(mem, memw); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_fmem_update(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # fmem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " %s.%s.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "f64" : "f32", + (type & SLJIT_MEM_POST) ? "post" : "pre"); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_STORE)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_FCHECK(srcdst, srcdstw, 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_mov(compiler, type | SLJIT_SIMD_TEST, freg, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_mem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s.%d.%s%d", + (type & SLJIT_SIMD_STORE) ? "store" : "load", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + if ((type & 0x3f000000) == SLJIT_SIMD_MEM_UNALIGNED) + fprintf(compiler->verbose, ".unal "); + else + fprintf(compiler->verbose, ".al%d ", (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, srcdst, srcdstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) { + CHECK_ARGUMENT(srcw == 0); + } else { + FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + } + } else if (src != SLJIT_IMM) { + FUNCTION_CHECK_DST(src, srcw); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_dup: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_replicate.%d.%s%d ", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + if (type & SLJIT_SIMD_FLOAT) + sljit_verbose_fparam(compiler, src, srcw); + else + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO | SLJIT_SIMD_LANE_SIGNED | SLJIT_32)) == 0); + CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)) != (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)); + CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_SIGNED)) != SLJIT_SIMD_LANE_SIGNED); + CHECK_ARGUMENT(!(type & SLJIT_SIMD_FLOAT) || !(type & (SLJIT_SIMD_LANE_SIGNED | SLJIT_32))); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); + + if (type & SLJIT_SIMD_FLOAT) { + FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) { + FUNCTION_CHECK_DST(srcdst, srcdstw); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_TEST, freg, lane_index, srcdst, srcdstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_move_lane: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s_lane%s%s%s.%d.%s%d ", + (type & SLJIT_SIMD_STORE) ? "store" : "load", + (type & SLJIT_32) ? "32" : "", + (type & SLJIT_SIMD_LANE_ZERO) ? "_z" : "", + (type & SLJIT_SIMD_LANE_SIGNED) ? "_s" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, "[%d], ", lane_index); + if (type & SLJIT_SIMD_FLOAT) + sljit_verbose_fparam(compiler, srcdst, srcdstw); + else + sljit_verbose_param(compiler, srcdst, srcdstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0)); + CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_lane_replicate(compiler, type | SLJIT_SIMD_TEST, freg, src, src_lane_index) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_lane_replicate: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_lane_replicate.%d.%s%d ", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src); + fprintf(compiler->verbose, "[%d]\n", src_lane_index); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK2(SLJIT_SIMD_EXTEND_SIGNED)) == 0); + CHECK_ARGUMENT((type & (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)) != (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_extend: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_load_extend%s.%d.%s%d.%s%d ", + (type & SLJIT_SIMD_EXTEND_SIGNED) ? "_s" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0)); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, freg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_sign: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_store_sign%s.%d.%s%d ", + (type & SLJIT_32) ? "32" : "", + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD)); + CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) >= SLJIT_SIMD_OP2_AND && (type & SLJIT_SIMD_TYPE_MASK(0)) <= SLJIT_SIMD_OP2_XOR); + CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); + CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_freg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, 0)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_SIMD_TEST) + CHECK_RETURN_OK; + if (sljit_emit_simd_op2(compiler, type | SLJIT_SIMD_TEST, dst_freg, src1_freg, src2_freg) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # simd_op2: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " simd_%s.%d.%s%d ", + simd_op2_names[SLJIT_SIMD_GET_OPCODE(type) - 1], + (8 << SLJIT_SIMD_GET_REG_SIZE(type)), + (type & SLJIT_SIMD_FLOAT) ? "f" : "", + (8 << SLJIT_SIMD_GET_ELEM_SIZE(type))); + + sljit_verbose_freg(compiler, dst_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src1_freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_freg(compiler, src2_freg); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + /* Any offset is allowed. */ + SLJIT_UNUSED_ARG(offset); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " local_base "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + SLJIT_UNUSED_ARG(init_value); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " const "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " mov_addr "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +#else /* !SLJIT_ARGUMENT_CHECKS && !SLJIT_VERBOSE */ + +#define SLJIT_SKIP_CHECKS(compiler) + +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1) && !(SLJIT_CONV_F64_FROM_UW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \ + if (GET_OPCODE(op) == SLJIT_CMP_F64) { \ + CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_S32_FROM_F64) { \ + CHECK(check_sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_F64_FROM_S32) { \ + CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_f64_from_uw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) + +static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), mem, memw, reg, 0); + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), reg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) */ + +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ + && !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + +static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), mem, memw, freg, 0); + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), freg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM */ + +/* CPU description section */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 32bit (" +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 64bit (" +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_CPUINFO_PART2 "little endian + " +#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#define SLJIT_CPUINFO_PART2 "big endian + " +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) +#define SLJIT_CPUINFO_PART3 "unaligned)" +#else +#define SLJIT_CPUINFO_PART3 "aligned)" +#endif + +#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# include "sljitNativeARM_T2_32.c" +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# include "sljitNativeARM_64.c" +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# include "sljitNativeMIPS_common.c" +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# include "sljitNativeRISCV_common.c" +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +# include "sljitNativeS390X.c" +#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) +# include "sljitNativeLOONGARCH_64.c" +#endif + +#include "sljitSerialize.c" + +static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#else + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#endif + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); +} + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && defined __SOFTFP__) + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); +} + +#endif /* !SLJIT_CONFIG_X86_32 && !(SLJIT_CONFIG_ARM_32 && __SOFTFP__) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + } else { + FAIL_IF(emit_fmov_before_return(compiler, op, src, srcw)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_return_void(compiler); +} + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined(SLJIT_CONFIG_LOONGARCH_64) && SLJIT_CONFIG_LOONGARCH_64) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop2(compiler, op, dst_freg, 0, src1, src1w, src2, src2w); +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH_64 */ + +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* Default compare for most architectures. */ + sljit_s32 flags, tmp_src, condition; + sljit_sw tmp_srcw; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + + condition = type & 0xff; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { + if (src1 == SLJIT_IMM && !src1w) { + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + src2w = 0; + } + if (src2 == SLJIT_IMM && !src2w) + return emit_cmp_to0(compiler, type, src1, src1w); + } +#endif + + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM && src2 != SLJIT_IMM)) { + /* Immediate is preferred as second argument by most architectures. */ + switch (condition) { + case SLJIT_LESS: + condition = SLJIT_GREATER; + break; + case SLJIT_GREATER_EQUAL: + condition = SLJIT_LESS_EQUAL; + break; + case SLJIT_GREATER: + condition = SLJIT_LESS; + break; + case SLJIT_LESS_EQUAL: + condition = SLJIT_GREATER_EQUAL; + break; + case SLJIT_SIG_LESS: + condition = SLJIT_SIG_GREATER; + break; + case SLJIT_SIG_GREATER_EQUAL: + condition = SLJIT_SIG_LESS_EQUAL; + break; + case SLJIT_SIG_GREATER: + condition = SLJIT_SIG_LESS; + break; + case SLJIT_SIG_LESS_EQUAL: + condition = SLJIT_SIG_GREATER_EQUAL; + break; + } + + type = condition | (type & (SLJIT_32 | SLJIT_REWRITABLE_JUMP)); + tmp_src = src1; + src1 = src2; + src2 = tmp_src; + tmp_srcw = src1w; + src1w = src2w; + src2w = tmp_srcw; + } + + if (condition <= SLJIT_NOT_ZERO) + flags = SLJIT_SET_Z; + else + flags = (condition & 0xfe) << VARIABLE_FLAG_SHIFT; + + SLJIT_SKIP_CHECKS(compiler); + PTR_FAIL_IF(sljit_emit_op2u(compiler, + SLJIT_SUB | flags | (type & SLJIT_32), src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_32))); +} + +#endif /* !SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 1; + } + + return 0; +} + +#endif /* SLJIT_CONFIG_ARM */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xfe) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_PPC */ + +#if !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + && !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + return sljit_emit_fmem_unaligned(compiler, type, freg, mem, memw); +} + +#endif /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS */ + +#if !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_ARM_64 && !SLJIT_CONFIG_PPC */ + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(srcdst); + SLJIT_UNUSED_ARG(srcdstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(lane_index); + SLJIT_UNUSED_ARG(srcdst); + SLJIT_UNUSED_ARG(srcdstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(src_lane_index); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(dst_freg); + SLJIT_UNUSED_ARG(src1_freg); + SLJIT_UNUSED_ARG(src2_freg); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM */ + +#if !(defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86) \ + && !(defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \ + && !(defined(SLJIT_CONFIG_S390X) && SLJIT_CONFIG_S390X) \ + && !(defined(SLJIT_CONFIG_LOONGARCH) && SLJIT_CONFIG_LOONGARCH) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst_reg); + SLJIT_UNUSED_ARG(mem_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src_reg); + SLJIT_UNUSED_ARG(mem_reg); + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH */ + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + + SLJIT_SKIP_CHECKS(compiler); + + if (offset != 0) + return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM_64 */ + +#endif /* !SLJIT_CONFIG_UNSUPPORTED */ diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h new file mode 100644 index 0000000..8b6fa69 --- /dev/null +++ b/src/sljit/sljitLir.h @@ -0,0 +1,2466 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_LIR_H_ +#define SLJIT_LIR_H_ + +/* + ------------------------------------------------------------------------ + Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) + ------------------------------------------------------------------------ + + Short description + Advantages: + - The execution can be continued from any LIR instruction. In other + words, it is possible to jump to any label from anywhere, even from + a code fragment, which is compiled later, as long as the compiling + context is the same. See sljit_emit_enter for more details. + - Supports self modifying code: target of any jump and call + instructions and some constant values can be dynamically modified + during runtime. See SLJIT_REWRITABLE_JUMP. + - although it is not suggested to do it frequently + - can be used for inline caching: save an important value once + in the instruction stream + - A fixed stack space can be allocated for local variables + - The compiler is thread-safe + - The compiler is highly configurable through preprocessor macros. + You can disable unneeded features (multithreading in single + threaded applications), and you can use your own system functions + (including memory allocators). See sljitConfig.h. + Disadvantages: + - The compiler is more like a platform independent assembler, so + there is no built-in variable management. Registers and stack must + be managed manually (the name of the compiler refers to this). + In practice: + - This approach is very effective for interpreters + - One of the saved registers typically points to a stack interface + - It can jump to any exception handler anytime (even if it belongs + to another function) + - Hot paths can be modified during runtime reflecting the changes + of the fastest execution path of the dynamic language + - SLJIT supports complex memory addressing modes + - mainly position and context independent code (except some cases) + + For valgrind users: + - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" +*/ + +#if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE) +#include "sljitConfigPre.h" +#endif /* SLJIT_HAVE_CONFIG_PRE */ + +#include "sljitConfigCPU.h" +#include "sljitConfig.h" + +/* The following header file defines useful macros for fine tuning +SLJIT based code generators. They are listed in the beginning +of sljitConfigInternal.h */ + +#include "sljitConfigInternal.h" + +#if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST) +#include "sljitConfigPost.h" +#endif /* SLJIT_HAVE_CONFIG_POST */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Version numbers. */ +#define SLJIT_MAJOR_VERSION 0 +#define SLJIT_MINOR_VERSION 95 + +/* --------------------------------------------------------------------- */ +/* Error codes */ +/* --------------------------------------------------------------------- */ + +/* Indicates no error. */ +#define SLJIT_SUCCESS 0 +/* After the call of sljit_generate_code(), the error code of the compiler + is set to this value to avoid further code generation. + The complier should be freed after sljit_generate_code(). */ +#define SLJIT_ERR_COMPILED 1 +/* Cannot allocate non-executable memory. */ +#define SLJIT_ERR_ALLOC_FAILED 2 +/* Cannot allocate executable memory. + Only sljit_generate_code() returns with this error code. */ +#define SLJIT_ERR_EX_ALLOC_FAILED 3 +/* Unsupported instruction form. */ +#define SLJIT_ERR_UNSUPPORTED 4 +/* An invalid argument is passed to any SLJIT function. */ +#define SLJIT_ERR_BAD_ARGUMENT 5 + +/* --------------------------------------------------------------------- */ +/* Registers */ +/* --------------------------------------------------------------------- */ + +/* + Scratch (R) registers: registers which may not preserve their values + across function calls. + + Saved (S) registers: registers which preserve their values across + function calls. + + The scratch and saved register sets overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + For example, in an architecture with only five registers (A-E), if two + are scratch and three saved registers, they will be defined as follows: + + A | R0 | | R0 always represent scratch register A + B | R1 | | R1 always represent scratch register B + C | [R2] | S2 | R2 and S2 represent the same physical register C + D | [R3] | S1 | R3 and S1 represent the same physical register D + E | [R4] | S0 | R4 and S0 represent the same physical register E + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS will be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS will be 3. + + Note: For all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience: saved registers can + be used as extra scratch registers. For example, building in the + previous example, four registers can be specified as scratch registers + and the fifth one as saved register, allowing any user code which requires + four scratch registers to run unmodified. The SLJIT compiler automatically + saves the content of the two extra scratch register on the stack. Scratch + registers can also be preserved by saving their value on the stack but + that needs to be done manually. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. + + Note: sljit_emit_enter and sljit_set_context define whether a register + is S or R register. E.g: if in the previous example 3 scratches and + 1 saved are mapped by sljit_emit_enter, the allowed register set + will be: R0-R2 and S0. Although S2 is mapped to the same register + than R2, it is not available in that configuration. Furthermore + the S1 register cannot be used at all. +*/ + +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + sljit_get_local_base can be used to obtain the real address of a value. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) + +/* Return with machine word. */ + +#define SLJIT_RETURN_REG SLJIT_R0 + +/* --------------------------------------------------------------------- */ +/* Floating point registers */ +/* --------------------------------------------------------------------- */ + +/* Each floating point register can store a 32 or a 64 bit precision + value. The FR and FS register sets overlap in the same way as R + and S register sets. See above. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +#define SLJIT_FR6 7 +#define SLJIT_FR7 8 +#define SLJIT_FR8 9 +#define SLJIT_FR9 10 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +#define SLJIT_FS6 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 6) +#define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7) +#define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8) +#define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) + +/* Return with floating point arg. */ + +#define SLJIT_RETURN_FREG SLJIT_FR0 + +/* --------------------------------------------------------------------- */ +/* Argument type definitions */ +/* --------------------------------------------------------------------- */ + +/* The following argument type definitions are used by sljit_emit_enter, + sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. + + For sljit_emit_call and sljit_emit_icall, the first integer argument + must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. + Similarly the first floating point argument must be placed into SLJIT_FR0, + the second one into SLJIT_FR1, and so on. + + For sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. Scratch registers are identified by a _R suffix. + + If only saved registers are used, then the allocation mirrors what is + done for the "call" functions but using saved registers, meaning that + the first integer argument goes to SLJIT_S0, the second one goes into + SLJIT_S1, and so on. + + If scratch registers are used, then the way the integer registers are + allocated changes so that SLJIT_S0, SLJIT_S1, etc; will be assigned + only for the arguments not using scratch registers, while SLJIT_R + will be used for the ones using scratch registers. + + Furthermore, the index (shown as "n" above) that will be used for the + scratch register depends on how many previous integer registers + (scratch or saved) were used already, starting with SLJIT_R0. + Eventhough some indexes will be likely skipped, they still need to be + accounted for in the scratches parameter of sljit_emit_enter. See below + for some examples. + + The floating point arguments always use scratch registers (but not the + _R suffix like the integer arguments) and must use SLJIT_FR0, SLJIT_FR1, + just like in the "call" functions. + + Note: the mapping for scratch registers is part of the compiler context + and therefore a new context after sljit_emit_call/sljit_emit_icall + could remove access to some scratch registers that were used as + arguments. + + Example function definition: + sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, + sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); + + Argument type definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) + + Short form of argument type definition: + SLJIT_ARGS4(F32, P, F64, 32, F32) + + Argument passing: + arg_a must be placed in SLJIT_R0 + arg_b must be placed in SLJIT_FR0 + arg_c must be placed in SLJIT_R1 + arg_d must be placed in SLJIT_FR1 + + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4V(P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + The type of the result is void. + + SLJIT_ARGS4(F32, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 + The type of the result is sljit_f32. + + SLJIT_ARGS4(P, W, F32, P_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 + The type of the result is pointer. + + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(W, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 + The type of the result is sljit_sw / sljit_uw. +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 + +/* No return value, only supported by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_RET_VOID 0 +/* Machine word sized integer argument or result. */ +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 32 bit integer argument or result. */ +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Pointer sized integer argument or result. */ +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 64 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F64 4 +/* 32 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F32 5 + +#define SLJIT_ARG_SHIFT 4 +#define SLJIT_ARG_RETURN(type) (type) +#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) + +/* Simplified argument list definitions. + + The following definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) + + can be shortened to: + SLJIT_ARGS1(W, F32) + + Another example where no value is returned: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1) + + can be shortened to: + SLJIT_ARGS1V(W_R) +*/ + +#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type + +#define SLJIT_ARGS0(ret) \ + SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) +#define SLJIT_ARGS0V() \ + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) + +#define SLJIT_ARGS1(ret, arg1) \ + (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) +#define SLJIT_ARGS1V(arg1) \ + (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) + +#define SLJIT_ARGS2(ret, arg1, arg2) \ + (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) +#define SLJIT_ARGS2V(arg1, arg2) \ + (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) + +#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ + (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) +#define SLJIT_ARGS3V(arg1, arg2, arg3) \ + (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) + +#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) +#define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) + +/* --------------------------------------------------------------------- */ +/* Main structures and functions */ +/* --------------------------------------------------------------------- */ + +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + +struct sljit_memory_fragment { + struct sljit_memory_fragment *next; + sljit_uw used_size; + /* Must be aligned to sljit_sw. */ + sljit_u8 memory[1]; +}; + +struct sljit_label { + struct sljit_label *next; + union { + sljit_uw index; + sljit_uw addr; + } u; + /* The maximum size difference. */ + sljit_uw size; +}; + +struct sljit_jump { + struct sljit_jump *next; + sljit_uw addr; + /* Architecture dependent flags. */ + sljit_uw flags; + union { + sljit_uw target; + struct sljit_label *label; + } u; +}; + +struct sljit_const { + struct sljit_const *next; + sljit_uw addr; +}; + +struct sljit_generate_code_buffer { + void *buffer; + sljit_uw size; + sljit_sw executable_offset; +}; + +struct sljit_compiler { + sljit_s32 error; + sljit_s32 options; + + struct sljit_label *labels; + struct sljit_jump *jumps; + struct sljit_const *consts; + struct sljit_label *last_label; + struct sljit_jump *last_jump; + struct sljit_const *last_const; + + void *allocator_data; + void *user_data; + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *abuf; + + /* Number of labels created by the compiler. */ + sljit_uw label_count; + /* Available scratch registers. */ + sljit_s32 scratches; + /* Available saved registers. */ + sljit_s32 saveds; + /* Available float scratch registers. */ + sljit_s32 fscratches; + /* Available float saved registers. */ + sljit_s32 fsaveds; + /* Local stack size. */ + sljit_s32 local_size; + /* Maximum code size. */ + sljit_uw size; + /* Relative offset of the executable mapping from the writable mapping. */ + sljit_sw executable_offset; + /* Executable size for statistical purposes. */ + sljit_uw executable_size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args_size; +#endif /* SLJIT_CONFIG_X86_32 */ + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* Temporary fields. */ + sljit_s32 mode32; +#endif /* SLJIT_CONFIG_X86_64 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + /* Constant pool handling. */ + sljit_uw *cpool; + sljit_u8 *cpool_unique; + sljit_uw cpool_diff; + sljit_uw cpool_fill; + /* Other members. */ + /* Contains pointer, "ldr pc, [...]" pairs. */ + sljit_uw patches; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* Temporary fields. */ + sljit_uw shift_imm; +#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) + sljit_uw args_size; +#endif /* SLJIT_CONFIG_ARM_32 && __SOFTFP__ */ + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + /* Temporary fields. */ + sljit_u32 imm; +#endif /* SLJIT_CONFIG_PPC */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; + /* Temporary fields. */ + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + /* Temporary fields. */ + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif /* SLJIT_CONFIG_RISCV */ + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + /* Need to allocate register save area to make calls. */ + /* Temporary fields. */ + sljit_s32 mode; +#endif /* SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) + /* Temporary fields. */ + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif /* SLJIT_CONFIG_LOONGARCH */ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + FILE* verbose; +#endif /* SLJIT_VERBOSE */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Flags specified by the last arithmetic instruction. + It contains the type of the variable flag. */ + sljit_s32 last_flags; + /* Return value type set by entry functions. */ + sljit_s32 last_return; + /* Local size passed to entry functions. */ + sljit_s32 logical_local_size; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + /* Trust arguments when an API function is called. + Used internally for calling API functions. */ + sljit_s32 skip_checks; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ +}; + +/* --------------------------------------------------------------------- */ +/* Main functions */ +/* --------------------------------------------------------------------- */ + +/* Creates an SLJIT compiler. The allocator_data is required by some + custom memory managers. This pointer is passed to SLJIT_MALLOC + and SLJIT_FREE macros. Most allocators (including the default + one) ignores this value, and it is recommended to pass NULL + as a dummy value for allocator_data. + + Returns NULL if failed. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data); + +/* Frees everything except the compiled machine code. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); + +/* Returns the current error code. If an error occurres, future calls + which uses the same compiler argument returns early with the same + error code. Thus there is no need for checking the error after every + call, it is enough to do it after the code is compiled. Removing + these checks increases the performance of the compiling process. */ +static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } + +/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except + if an error was detected before. After the error code is set + the compiler behaves as if the allocation failure happened + during an SLJIT function call. This can greatly simplify error + checking, since it is enough to check the compiler status + after the code is compiled. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); + +/* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, + and <= 128 bytes on 64 bit architectures. The memory area is owned by the + compiler, and freed by sljit_free_compiler. The returned pointer is + sizeof(sljit_sw) aligned. Excellent for allocating small blocks during + compiling, and no need to worry about freeing them. The size is enough + to contain at most 16 pointers. If the size is outside of the range, + the function will return with NULL. However, this return value does not + indicate that there is no more memory (does not set the current error code + of the compiler to out-of-memory status). */ +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); + +/* Returns the allocator data passed to sljit_create_compiler. */ +static SLJIT_INLINE void* sljit_compiler_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; } +/* Sets/get the user data for a compiler. */ +static SLJIT_INLINE void sljit_compiler_set_user_data(struct sljit_compiler *compiler, void *user_data) { compiler->user_data = user_data; } +static SLJIT_INLINE void* sljit_compiler_get_user_data(struct sljit_compiler *compiler) { return compiler->user_data; } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +/* Passing NULL disables verbose. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); +#endif + +/* Option bits for sljit_generate_code. */ + +/* The exec_allocator_data points to a pre-allocated + buffer which type is sljit_generate_code_buffer. */ +#define SLJIT_GENERATE_CODE_BUFFER 0x1 + +/* Create executable code from the instruction stream. This is the final step + of the code generation, and no more instructions can be emitted after this call. + + options is the combination of SLJIT_GENERATE_CODE_* bits + exec_allocator_data is passed to SLJIT_MALLOC_EXEC and + SLJIT_MALLOC_FREE functions */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data); + +/* Free executable code. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data); + +/* When the protected executable allocator is used the JIT code is mapped + twice. The first mapping has read/write and the second mapping has read/exec + permissions. This function returns with the relative offset of the executable + mapping using the writable mapping as the base after the machine code is + successfully generated. The returned value is always 0 for the normal executable + allocator, since it uses only one mapping with read/write/exec permissions. + Dynamic code modifications requires this value. + + Before a successful code generation, this function returns with 0. */ +static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } + +/* The executable memory consumption of the generated code can be retrieved by + this function. The returned value can be used for statistical purposes. + + Before a successful code generation, this function returns with 0. */ +static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } + +/* Returns with non-zero if the feature or limitation type passed as its + argument is present on the current CPU. The return value is one, if a + feature is fully supported, and it is two, if partially supported. + + Some features (e.g. floating point operations) require hardware (CPU) + support while others (e.g. move with update) are emulated if not available. + However, even when a feature is emulated, specialized code paths may be + faster than the emulation. Some limitations are emulated as well so their + general case is supported but it has extra performance costs. */ + +/* [Not emulated] Floating-point support is available. */ +#define SLJIT_HAS_FPU 0 +/* [Limitation] Some registers are virtual registers. */ +#define SLJIT_HAS_VIRTUAL_REGISTERS 1 +/* [Emulated] Has zero register (setting a memory location to zero is efficient). */ +#define SLJIT_HAS_ZERO_REGISTER 2 +/* [Emulated] Count leading zero is supported. */ +#define SLJIT_HAS_CLZ 3 +/* [Emulated] Count trailing zero is supported. */ +#define SLJIT_HAS_CTZ 4 +/* [Emulated] Reverse the order of bytes is supported. */ +#define SLJIT_HAS_REV 5 +/* [Emulated] Rotate left/right is supported. */ +#define SLJIT_HAS_ROT 6 +/* [Emulated] Conditional move is supported. */ +#define SLJIT_HAS_CMOV 7 +/* [Emulated] Prefetch instruction is available (emulated as a nop). */ +#define SLJIT_HAS_PREFETCH 8 +/* [Emulated] Copy from/to f32 operation is available (see sljit_emit_fcopy). */ +#define SLJIT_HAS_COPY_F32 9 +/* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */ +#define SLJIT_HAS_COPY_F64 10 +/* [Not emulated] The 64 bit floating point registers can be used as + two separate 32 bit floating point registers (e.g. ARM32). The + second 32 bit part can be accessed by SLJIT_F64_SECOND. */ +#define SLJIT_HAS_F64_AS_F32_PAIR 11 +/* [Not emulated] Some SIMD operations are supported by the compiler. */ +#define SLJIT_HAS_SIMD 12 +/* [Not emulated] SIMD registers are mapped to a pair of double precision + floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to + a simd operation represents the same 128 bit register, and both SLJIT_FR0 + and SLJIT_FR1 are overwritten. */ +#define SLJIT_SIMD_REGS_ARE_PAIRS 13 +/* [Not emulated] Atomic support is available (fine-grained). */ +#define SLJIT_HAS_ATOMIC 14 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* [Not emulated] AVX support is available on x86. */ +#define SLJIT_HAS_AVX 100 +/* [Not emulated] AVX2 support is available on x86. */ +#define SLJIT_HAS_AVX2 101 +#endif + +#if (defined SLJIT_CONFIG_LOONGARCH) +/* [Not emulated] LASX support is available on LoongArch */ +#define SLJIT_HAS_LASX 201 +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); + +/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, + sljit_cmp_info returns with: + zero - if the cpu supports the floating point comparison type + one - if the comparison requires two machine instructions + two - if the comparison requires more than two machine instructions + + When the result is non-zero, it is recommended to avoid + using the specified comparison type if it is easy to do so. + + Otherwise it returns zero. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); + +/* The following functions generate machine code. If there is no + error, they return with SLJIT_SUCCESS, otherwise they return + with an error code. */ + +/* + The executable code is a function from the viewpoint of the C + language. The function calls must conform to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code. This is often + called as function prologue. Furthermore the options argument + can be used to pass configuration options to the compiler. The + available options are listed before sljit_emit_enter. + + The function argument list is specified by the SLJIT_ARGSx + (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four + arguments are supported. See the description of SLJIT_ARGSx + macros about argument passing. Furthermore the register set + used by the function must be declared as well. The number of + scratch and saved registers available to the function must + be passed to sljit_emit_enter. Only R registers between R0 + and "scratches" argument can be used later. E.g. if "scratches" + is set to two, the scratch register set will be limited to + SLJIT_R0 and SLJIT_R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a + similar manner. The sljit_emit_enter is also capable of + allocating a stack space for local data. The "local_size" + argument contains the size in bytes of this local area, and + it can be accessed using SLJIT_MEM1(SLJIT_SP). The memory + area between SLJIT_SP (inclusive) and SLJIT_SP + local_size + (exclusive) can be modified freely until the function returns. + The stack space is not initialized to zero. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + + Note: the compiler can use saved registers as scratch registers, + but the opposite is not supported + + Note: every call of sljit_emit_enter and sljit_set_context + overwrites the previous context. +*/ + +/* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) + are not saved / restored on function enter / return. Instead, + these registers can be used to pass / return data (such as + global / local context pointers) across function calls. The + value of n must be between 1 and 3. This option is only + supported by SLJIT_ENTER_REG_ARG calling convention. */ +#define SLJIT_ENTER_KEEP(n) (n) + +/* The compiled function uses an SLJIT specific register argument + calling convention. This is a lightweight function call type where + both the caller and the called functions must be compiled by + SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG + and all arguments must be stored in scratch registers. */ +#define SLJIT_ENTER_REG_ARG 0x00000004 + +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ +#define SLJIT_MAX_LOCAL_SIZE 1048576 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* Use VEX prefix for all SIMD operations on x86. */ +#define SLJIT_ENTER_USE_VEX 0x00010000 +#endif /* !SLJIT_CONFIG_X86 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* The SLJIT compiler has a current context (which contains the local + stack space size, number of used registers, etc.) which is initialized + by sljit_emit_enter. Several functions (such as sljit_emit_return) + requires this context to be able to generate the appropriate code. + However, some code fragments (compiled separately) may have no + normal entry point so their context is unknown to the compiler. + + sljit_set_context and sljit_emit_enter have the same arguments, + but sljit_set_context does not generate any machine code. + + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* Return to the caller function. The sljit_emit_return_void function + does not return with any value. The sljit_emit_return function returns + with a single value loaded from its source operand. The load operation + can be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1) and + SLJIT_MOV_F32/SLJIT_MOV_F64 (see sljit_emit_fop1) depending on the + return value specified by sljit_emit_enter/sljit_set_context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); + +/* Restores the saved registers and free the stack area, then the execution + continues from the address specified by the source operand. This + operation is similar to sljit_emit_return, but it ignores the return + address. The code where the exection continues should use the same context + as the caller function (see sljit_set_context). A word (pointer) value + can be passed in the SLJIT_RETURN_REG register. This function can be used + to jump to exception handlers. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw); + +/* + Source and destination operands for arithmetical instructions + imm - a simple immediate value (cannot be used as a destination) + reg - any of the available registers (immediate argument must be 0) + [imm] - absolute memory address + [reg+imm] - indirect memory address + [reg+(reg< 0 && (arg) < SLJIT_IMM) +#define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM) +#define SLJIT_IS_MEM0(arg) ((arg) == SLJIT_MEM) +#define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1)) +#define SLJIT_IS_MEM2(arg) (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) +#define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM) +#define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) + +/* Macros for extracting registers from operands. */ +/* Support operands which contains a single register or + constructed using SLJIT_MEM1, SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_REG(arg) ((arg) & 0x7f) +/* Support operands which constructed using SLJIT_MEM2, or SLJIT_REG_PAIR. */ +#define SLJIT_EXTRACT_SECOND_REG(arg) ((arg) >> 8) + +/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on + 32 bit CPUs. When this option is set for an arithmetic operation, only + the lower 32 bits of the input registers are used, and the CPU status + flags are set according to the 32 bit result. Although the higher 32 bit + of the input and the result registers are not defined by SLJIT, it might + be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU + requirements all source registers must be the result of those operations + where this option was also set. Memory loads read 32 bit values rather + than 64 bit ones. In other words 32 bit and 64 bit operations cannot be + mixed. The only exception is SLJIT_MOV32 which source register can hold + any 32 or 64 bit value, and it is converted to a 32 bit compatible format + first. When the source and destination registers are the same, this + conversion is free (no instructions are emitted) on most CPUs. A 32 bit + value can also be converted to a 64 bit value by SLJIT_MOV_S32 + (sign extension) or SLJIT_MOV_U32 (zero extension). + + As for floating-point operations, this option sets 32 bit single + precision mode. Similar to the integer operations, all register arguments + must be the result of those operations where this option was also set. + + Note: memory addressing always uses 64 bit values on 64 bit systems so + the result of a 32 bit operation must not be used with SLJIT_MEMx + macros. + + This option is part of the instruction name, so there is no need to + manually set it. E.g: + + SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ +#define SLJIT_32 0x100 + +/* Many CPUs (x86, ARM, PPC) have status flag bits which can be set according + to the result of an operation. Other CPUs (MIPS) do not have status + flag bits, and results must be stored in registers. To cover both + architecture types efficiently only two flags are defined by SLJIT: + + * Zero (equal) flag: it is set if the result is zero + * Variable flag: its value is defined by the arithmetic operation + + SLJIT instructions can set any or both of these flags. The value of + these flags is undefined if the instruction does not specify their + value. The description of each instruction contains the list of + allowed flag types. + + Note: the logical or operation can be used to set flags. + + Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence + + sljit_op2(..., SLJIT_ADD, ...) + Both the zero and variable flags are undefined so they can + have any value after the operation is completed. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + Sets the zero flag if the result is zero, clears it otherwise. + The variable flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...) + Sets the variable flag if an integer overflow occurs, clears + it otherwise. The zero flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...) + Sets the zero flag if the result is zero, clears it otherwise. + Sets the variable flag if unsigned overflow (carry) occurs, + clears it otherwise. + + Certain instructions (e.g. SLJIT_MOV) does not modify flags, so + status flags are unchanged. + + Example: + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + sljit_op1(..., SLJIT_MOV, ...) + Zero flag is set according to the result of SLJIT_ADD. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + sljit_op2(..., SLJIT_ADD, ...) + Zero flag has unknown value. + + These flags can be used for code optimization. E.g. a fast loop can be + implemented by decreasing a counter register and set the zero flag + using a single instruction. The zero register can be used by a + conditional jump to restart the loop. A single comparison can set a + zero and less flags to check if a value is less, equal, or greater + than another value. + + Motivation: although some CPUs can set a large number of flag bits, + usually their values are ignored or only a few of them are used. Emulating + a large number of flags on systems without a flag register is complicated + so SLJIT instructions must specify the flag they want to use and only + that flag is computed. The last arithmetic instruction can be repeated if + multiple flags need to be checked. +*/ + +/* Set Zero status flag. */ +#define SLJIT_SET_Z 0x0200 +/* Set the variable status flag if condition is true. + See comparison types (e.g. SLJIT_SET_LESS, SLJIT_SET_F_EQUAL). */ +#define SLJIT_SET(condition) ((condition) << 10) + +/* Starting index of opcodes for sljit_emit_op0. */ +#define SLJIT_OP0_BASE 0 + +/* Flags: - (does not modify flags) + Note: breakpoint instruction is not supported by all architectures (e.g. ppc) + It falls back to SLJIT_NOP in those cases. */ +#define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) +/* Flags: - (does not modify flags) + Note: may or may not cause an extra cycle wait + it can even decrease the runtime in a few cases. */ +#define SLJIT_NOP (SLJIT_OP0_BASE + 1) +/* Flags: - (may destroy flags) + Unsigned multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LMUL_UW (SLJIT_OP0_BASE + 2) +/* Flags: - (may destroy flags) + Signed multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) +/* Flags: - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) +#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) +/* Flags: - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) +#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) +/* Flags: - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) +#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) +/* Flags: - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) +#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) +/* Flags: - (does not modify flags) + ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 + when Intel Control-flow Enforcement Technology (CET) is enabled. + No instructions are emitted for other architectures. */ +#define SLJIT_ENDBR (SLJIT_OP0_BASE + 8) +/* Flags: - (may destroy flags) + Skip stack frames before return when Intel Control-flow + Enforcement Technology (CET) is enabled. No instructions + are emitted for other architectures. */ +#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); + +/* Starting index of opcodes for sljit_emit_op1. */ +#define SLJIT_OP1_BASE 32 + +/* The MOV instruction transfers data from source to destination. + + MOV instruction suffixes: + + U8 - unsigned 8 bit data transfer + S8 - signed 8 bit data transfer + U16 - unsigned 16 bit data transfer + S16 - signed 16 bit data transfer + U32 - unsigned int (32 bit) data transfer + S32 - signed int (32 bit) data transfer + P - pointer (sljit_up) data transfer +*/ + +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV (SLJIT_OP1_BASE + 0) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) +#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) +#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) +#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) +#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) +/* Flags: - (does not modify flags) + Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) +/* Flags: - (does not modify flags) + Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) +/* Flags: - (does not modify flags) + Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode + on x86-64 which uses 32 bit pointers) or similar compiling modes */ +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) +/* Count leading zeroes + Flags: - (may destroy flags) + Note: immediate source argument is not supported */ +#define SLJIT_CLZ (SLJIT_OP1_BASE + 9) +#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) +/* Count trailing zeroes + Flags: - (may destroy flags) + Note: immediate source argument is not supported */ +#define SLJIT_CTZ (SLJIT_OP1_BASE + 10) +#define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32) +/* Reverse the order of bytes + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV (SLJIT_OP1_BASE + 11) +#define SLJIT_REV32 (SLJIT_REV | SLJIT_32) +/* Reverse the order of bytes in the lower 16 bit and extend as unsigned + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_U16 (SLJIT_OP1_BASE + 12) +#define SLJIT_REV32_U16 (SLJIT_REV_U16 | SLJIT_32) +/* Reverse the order of bytes in the lower 16 bit and extend as signed + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_S16 (SLJIT_OP1_BASE + 13) +#define SLJIT_REV32_S16 (SLJIT_REV_S16 | SLJIT_32) +/* Reverse the order of bytes in the lower 32 bit and extend as unsigned + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_U32 (SLJIT_OP1_BASE + 14) +/* Reverse the order of bytes in the lower 32 bit and extend as signed + Flags: - (may destroy flags) + Note: converts between little and big endian formats + Note: immediate source argument is not supported */ +#define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15) + +/* The following unary operations are supported by using sljit_emit_op2: + - binary not: SLJIT_XOR with immedate -1 as src1 or src2 + - negate: SLJIT_SUB with immedate 0 as src1 + Note: these operations are optimized by the compiler if the + target CPU has specialized instruction forms for them. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_op2. */ +#define SLJIT_OP2_BASE 64 + +/* Flags: Z | OVERFLOW | CARRY */ +#define SLJIT_ADD (SLJIT_OP2_BASE + 0) +#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) +/* Flags: CARRY */ +#define SLJIT_ADDC (SLJIT_OP2_BASE + 1) +#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) +/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL + SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER + SIG_LESS_EQUAL | OVERFLOW | CARRY */ +#define SLJIT_SUB (SLJIT_OP2_BASE + 2) +#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) +/* Flags: CARRY */ +#define SLJIT_SUBC (SLJIT_OP2_BASE + 3) +#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) +/* Note: integer mul + Flags: OVERFLOW */ +#define SLJIT_MUL (SLJIT_OP2_BASE + 4) +#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) +/* Flags: Z */ +#define SLJIT_AND (SLJIT_OP2_BASE + 5) +#define SLJIT_AND32 (SLJIT_AND | SLJIT_32) +/* Flags: Z */ +#define SLJIT_OR (SLJIT_OP2_BASE + 6) +#define SLJIT_OR32 (SLJIT_OR | SLJIT_32) +/* Flags: Z */ +#define SLJIT_XOR (SLJIT_OP2_BASE + 7) +#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_SHL (SLJIT_OP2_BASE + 8) +#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) +/* Flags: Z + Same as SLJIT_SHL, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MSHL (SLJIT_OP2_BASE + 9) +#define SLJIT_MSHL32 (SLJIT_MSHL | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_LSHR (SLJIT_OP2_BASE + 10) +#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) +/* Flags: Z + Same as SLJIT_LSHR, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MLSHR (SLJIT_OP2_BASE + 11) +#define SLJIT_MLSHR32 (SLJIT_MLSHR | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_ASHR (SLJIT_OP2_BASE + 12) +#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) +/* Flags: Z + Same as SLJIT_ASHR, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MASHR (SLJIT_OP2_BASE + 13) +#define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32) +/* Flags: - (may destroy flags) + Let bit_length be the length of the rotate operation: 32 or 64. + The second operand is always masked by (bit_length - 1). */ +#define SLJIT_ROTL (SLJIT_OP2_BASE + 14) +#define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32) +/* Flags: - (may destroy flags) + Let bit_length be the length of the rotate operation: 32 or 64. + The second operand is always masked by (bit_length - 1). */ +#define SLJIT_ROTR (SLJIT_OP2_BASE + 15) +#define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* The sljit_emit_op2u function is the same as sljit_emit_op2 + except the result is discarded. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Starting index of opcodes for sljit_emit_op2r. */ +#define SLJIT_OP2R_BASE 96 + +/* Flags: - (may destroy flags) */ +#define SLJIT_MULADD (SLJIT_OP2R_BASE + 0) +#define SLJIT_MULADD32 (SLJIT_MULADD | SLJIT_32) + +/* Similar to sljit_emit_fop2, except the destination is always a register. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Emit a left or right shift operation, where the bits shifted + in comes from a separate source operand. All operands are + interpreted as unsigned integers. + + In the followings the value_mask variable is 31 for 32 bit + operations and word_size - 1 otherwise. + + op must be one of the following operations: + SLJIT_SHL or SLJIT_SHL32: + dst_reg = src1_reg << src3_reg + dst_reg |= ((src2_reg >> 1) >> (src3 ^ value_mask)) + SLJIT_MSHL or SLJIT_MSHL32: + src3 &= value_mask + perform the SLJIT_SHL or SLJIT_SHL32 operation + SLJIT_LSHR or SLJIT_LSHR32: + dst_reg = src1_reg >> src3_reg + dst_reg |= ((src2_reg << 1) << (src3 ^ value_mask)) + SLJIT_MLSHR or SLJIT_MLSHR32: + src3 &= value_mask + perform the SLJIT_LSHR or SLJIT_LSHR32 operation + + op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO + + dst_reg specifies the destination register, where dst_reg + and src2_reg cannot be the same registers + src1_reg specifies the source register + src2_reg specifies the register which is shifted into src1_reg + src3 / src3w contains the shift amount + + Note: a rotate operation is performed if src1_reg and + src2_reg are the same registers + + Flags: - (may destroy flags) */ + +/* The src3 operand contains a non-zero value. Improves + the generated code on certain architectures, which + provides a small performance improvement. */ +#define SLJIT_SHIFT_INTO_NON_ZERO 0x200 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w); + +/* Starting index of opcodes for sljit_emit_op_src + and sljit_emit_op_dst. */ +#define SLJIT_OP_SRC_DST_BASE 112 + +/* Fast return, see SLJIT_FAST_CALL for more details. + Note: src cannot be an immedate value + Flags: - (does not modify flags) */ +#define SLJIT_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 0) +/* Skip stack frames before fast return. + Note: src cannot be an immedate value + Flags: may destroy flags. */ +#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 1) +/* Prefetch value into the level 1 data cache + Note: if the target CPU does not support data prefetch, + no instructions are emitted. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_DST_BASE + 2) +/* Prefetch value into the level 2 data cache + Note: same as SLJIT_PREFETCH_L1 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_DST_BASE + 3) +/* Prefetch value into the level 3 data cache + Note: same as SLJIT_PREFETCH_L2 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_DST_BASE + 4) +/* Prefetch a value which is only used once (and can be discarded afterwards) + Note: same as SLJIT_PREFETCH_L1 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_DST_BASE + 5) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); + +/* Fast enter, see SLJIT_FAST_CALL for more details. + Flags: - (does not modify flags) */ +#define SLJIT_FAST_ENTER (SLJIT_OP_SRC_DST_BASE + 6) + +/* Copies the return address into dst. The return address is the + address where the execution continues after the called function + returns (see: sljit_emit_return / sljit_emit_return_void). + Flags: - (does not modify flags) */ +#define SLJIT_GET_RETURN_ADDRESS (SLJIT_OP_SRC_DST_BASE + 7) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw); + +/* Starting index of opcodes for sljit_emit_fop1. */ +#define SLJIT_FOP1_BASE 144 + +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) +#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) +/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] + SRC/DST TYPE can be: F64, F32, S32, SW + Rounding mode when the destination is SW or S32: round towards zero. */ +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_UW (SLJIT_FOP1_BASE + 6) +#define SLJIT_CONV_F32_FROM_UW (SLJIT_CONV_F64_FROM_UW | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_U32 (SLJIT_FOP1_BASE + 7) +#define SLJIT_CONV_F32_FROM_U32 (SLJIT_CONV_F64_FROM_U32 | SLJIT_32) +/* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64. + Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ +#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 8) +#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 9) +#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 10) +#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_fop2. */ +#define SLJIT_FOP2_BASE 176 + +/* Flags: - (may destroy flags) */ +#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) +#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) +#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) +#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) +#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Starting index of opcodes for sljit_emit_fop2r. */ +#define SLJIT_FOP2R_BASE 192 + +/* Flags: - (may destroy flags) */ +#define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0) +#define SLJIT_COPYSIGN_F32 (SLJIT_COPYSIGN_F64 | SLJIT_32) + +/* Similar to sljit_emit_fop2, except the destination is always a register. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Sets a floating point register to an immediate value. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value); +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value); + +/* The following opcodes are used by sljit_emit_fcopy(). */ + +/* 64 bit: copy a 64 bit value from an integer register into a + 64 bit floating point register without any modifications. + 32 bit: copy a 32 bit register or register pair into a 64 bit + floating point register without any modifications. The + register, or the first register of the register pair + replaces the high order 32 bit of the floating point + register. If a register pair is passed, the low + order 32 bit is replaced by the second register. + Otherwise, the low order 32 bit is unchanged. */ +#define SLJIT_COPY_TO_F64 1 +/* Copy a 32 bit value from an integer register into a 32 bit + floating point register without any modifications. */ +#define SLJIT_COPY32_TO_F32 (SLJIT_COPY_TO_F64 | SLJIT_32) +/* 64 bit: copy the value of a 64 bit floating point register into + an integer register without any modifications. + 32 bit: copy a 64 bit floating point register into a 32 bit register + or a 32 bit register pair without any modifications. The + high order 32 bit of the floating point register is copied + into the register, or the first register of the register + pair. If a register pair is passed, the low order 32 bit + is copied into the second register. */ +#define SLJIT_COPY_FROM_F64 2 +/* Copy the value of a 32 bit floating point register into an integer + register without any modifications. The register should be processed + with 32 bit operations later. */ +#define SLJIT_COPY32_FROM_F32 (SLJIT_COPY_FROM_F64 | SLJIT_32) + +/* Special data copy which involves floating point registers. + + op must be between SLJIT_COPY_TO_F64 and SLJIT_COPY32_FROM_F32 + freg must be a floating point register + reg must be a register or register pair */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg); + +/* Label and jump instructions. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); + +/* The SLJIT_FAST_CALL is a calling method for creating lightweight function + calls. This type of calls preserve the values of all registers and stack + frame. Unlike normal function calls, the enter and return operations must + be performed by the SLJIT_FAST_ENTER and SLJIT_FAST_RETURN operations + respectively. The return address is stored in the dst argument of the + SLJIT_FAST_ENTER operation, and this return address should be passed as + the src argument for the SLJIT_FAST_RETURN operation to return from the + called function. + + Fast calls are cheap operations (usually only a single call instruction is + emitted) but they do not preserve any registers. However the callee function + can freely use / update any registers and the locals area which can be + efficiently exploited by various optimizations. Registers can be saved + and restored manually if needed. + + Although returning to different address by SLJIT_FAST_RETURN is possible, + this address usually cannot be predicted by the return address predictor of + modern CPUs which may reduce performance. Furthermore certain security + enhancement technologies such as Intel Control-flow Enforcement Technology + (CET) may disallow returning to a different address (indirect jumps + can be used instead, see SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN). */ + +/* Invert (negate) conditional type: xor (^) with 0x1 */ + +/* Integer comparison types. */ +#define SLJIT_EQUAL 0 +#define SLJIT_ZERO SLJIT_EQUAL +#define SLJIT_NOT_EQUAL 1 +#define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL + +#define SLJIT_LESS 2 +#define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) +#define SLJIT_GREATER_EQUAL 3 +#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_LESS) +#define SLJIT_GREATER 4 +#define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) +#define SLJIT_LESS_EQUAL 5 +#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_GREATER) +#define SLJIT_SIG_LESS 6 +#define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) +#define SLJIT_SIG_GREATER_EQUAL 7 +#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_LESS) +#define SLJIT_SIG_GREATER 8 +#define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) +#define SLJIT_SIG_LESS_EQUAL 9 +#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_GREATER) + +#define SLJIT_OVERFLOW 10 +#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) +#define SLJIT_NOT_OVERFLOW 11 + +/* Unlike other flags, sljit_emit_jump may destroy the carry flag. */ +#define SLJIT_CARRY 12 +#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) +#define SLJIT_NOT_CARRY 13 + +#define SLJIT_ATOMIC_STORED 14 +#define SLJIT_SET_ATOMIC_STORED SLJIT_SET(SLJIT_ATOMIC_STORED) +#define SLJIT_ATOMIC_NOT_STORED 15 + +/* Basic floating point comparison types. + + Note: when the comparison result is unordered, their behaviour is unspecified. */ + +#define SLJIT_F_EQUAL 16 +#define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) +#define SLJIT_F_NOT_EQUAL 17 +#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_EQUAL) +#define SLJIT_F_LESS 18 +#define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) +#define SLJIT_F_GREATER_EQUAL 19 +#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_LESS) +#define SLJIT_F_GREATER 20 +#define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) +#define SLJIT_F_LESS_EQUAL 21 +#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_GREATER) + +/* Jumps when either argument contains a NaN value. */ +#define SLJIT_UNORDERED 22 +#define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) +/* Jumps when neither argument contains a NaN value. */ +#define SLJIT_ORDERED 23 +#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_UNORDERED) + +/* Ordered / unordered floating point comparison types. + + Note: each comparison type has an ordered and unordered form. Some + architectures supports only either of them (see: sljit_cmp_info). */ + +#define SLJIT_ORDERED_EQUAL 24 +#define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) +#define SLJIT_UNORDERED_OR_NOT_EQUAL 25 +#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) +#define SLJIT_ORDERED_LESS 26 +#define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) +#define SLJIT_UNORDERED_OR_GREATER_EQUAL 27 +#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS) +#define SLJIT_ORDERED_GREATER 28 +#define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) +#define SLJIT_UNORDERED_OR_LESS_EQUAL 29 +#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER) + +#define SLJIT_UNORDERED_OR_EQUAL 30 +#define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) +#define SLJIT_ORDERED_NOT_EQUAL 31 +#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) +#define SLJIT_UNORDERED_OR_LESS 32 +#define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) +#define SLJIT_ORDERED_GREATER_EQUAL 33 +#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS) +#define SLJIT_UNORDERED_OR_GREATER 34 +#define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) +#define SLJIT_ORDERED_LESS_EQUAL 35 +#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) + +/* Unconditional jump types. */ +#define SLJIT_JUMP 36 +/* Fast calling method. See the description above. */ +#define SLJIT_FAST_CALL 37 +/* Default C calling convention. */ +#define SLJIT_CALL 38 +/* Called function must be compiled by SLJIT. + See SLJIT_ENTER_REG_ARG option. */ +#define SLJIT_CALL_REG_ARG 39 + +/* The target can be changed during runtime (see: sljit_set_jump_addr). */ +#define SLJIT_REWRITABLE_JUMP 0x1000 +/* When this flag is passed, the execution of the current function ends and + the called function returns to the caller of the current function. The + stack usage is reduced before the call, but it is not necessarily reduced + to zero. In the latter case the compiler needs to allocate space for some + arguments and the return address must be stored on the stack as well. */ +#define SLJIT_CALL_RETURN 0x2000 + +/* Emit a jump instruction. The destination is not set, only the type of the jump. + type must be between SLJIT_EQUAL and SLJIT_FAST_CALL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + + Flags: does not modify flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); + +/* Emit a C compiler (ABI) compatible function call. + type must be SLJIT_CALL or SLJIT_CALL_REG_ARG + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and/or SLJIT_CALL_RETURN + arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); + +/* Basic arithmetic comparison. In most architectures it is implemented as + a compare operation followed by a sljit_emit_jump. However some + architectures (i.e: ARM64 or MIPS) may employ special optimizations + here. It is suggested to use this comparison form when appropriate. + type must be between SLJIT_EQUAL and SLJIT_SIG_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + + Flags: may destroy flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Basic floating point comparison. In most architectures it is implemented as + a SLJIT_CMP_F32/64 operation (setting appropriate flags) followed by a + sljit_emit_jump. However some architectures (i.e: MIPS) may employ + special optimizations here. It is suggested to use this comparison form + when appropriate. + type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: destroy flags. + Note: when an operand is NaN the behaviour depends on the comparison type. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Set the destination of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); +/* Set the destination address of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); + +/* Emit an indirect jump or fast call. + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be between SLJIT_JUMP and SLJIT_FAST_CALL + + Flags: does not modify flags. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); + +/* Emit a C compiler (ABI) compatible function call. + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be SLJIT_CALL or SLJIT_CALL_REG_ARG + type can be combined (or'ed) with SLJIT_CALL_RETURN + arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); + +/* Perform an operation using the conditional flags as the second argument. + Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. + The value represented by the type is 1, if the condition represented + by the type is fulfilled, and 0 otherwise. + + When op is SLJIT_MOV or SLJIT_MOV32: + Set dst to the value represented by the type (0 or 1). + Flags: - (does not modify flags) + When op is SLJIT_AND, SLJIT_AND32, SLJIT_OR, SLJIT_OR32, SLJIT_XOR, or SLJIT_XOR32 + Performs the binary operation using dst as the first, and the value + represented by type as the second argument. Result is written into dst. + Flags: Z (may destroy flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type); + +/* Emit a conditional select instruction which moves src1 to dst_reg, + if the condition is satisfied, or src2_reg to dst_reg otherwise. + + type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_32 to move 32 bit + register values instead of word sized ones + dst_reg and src2_reg must be valid registers + src1 must be valid operand + + Note: if src1 is a memory operand, its value + might be loaded even if the condition is false. + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg); + +/* Emit a conditional floating point select instruction which moves + src1 to dst_reg, if the condition is satisfied, or src2_reg to + dst_reg otherwise. + + type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_32 to move 32 bit + floating point values instead of 64 bit ones + dst_freg and src2_freg must be valid floating point registers + src1 must be valid operand + + Note: if src1 is a memory operand, its value + might be loaded even if the condition is false. + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg); + +/* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(), + sljit_emit_fmem(), and sljit_emit_fmem_update(). */ + +/* Memory load operation. This is the default. */ +#define SLJIT_MEM_LOAD 0x000000 +/* Memory store operation. */ +#define SLJIT_MEM_STORE 0x000200 + +/* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */ + +/* Load or stora data from an unaligned (byte aligned) address. */ +#define SLJIT_MEM_UNALIGNED 0x000400 +/* Load or stora data from a 16 bit aligned address. */ +#define SLJIT_MEM_ALIGNED_16 0x000800 +/* Load or stora data from a 32 bit aligned address. */ +#define SLJIT_MEM_ALIGNED_32 0x001000 + +/* The following flags are used by sljit_emit_mem_update(), + and sljit_emit_fmem_update(). */ + +/* Base register is updated before the memory access (default). */ +#define SLJIT_MEM_PRE 0x000000 +/* Base register is updated after the memory access. */ +#define SLJIT_MEM_POST 0x000400 + +/* When SLJIT_MEM_SUPP is passed, no instructions are emitted. + Instead the function returns with SLJIT_SUCCESS if the instruction + form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag + allows runtime checking of available instruction forms. */ +#define SLJIT_MEM_SUPP 0x000800 + +/* The sljit_emit_mem emits instructions for various memory operations: + + When SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_16 / + SLJIT_MEM_ALIGNED_32 is set in type argument: + Emit instructions for unaligned memory loads or stores. When + SLJIT_UNALIGNED is not defined, the only way to access unaligned + memory data is using sljit_emit_mem. Otherwise all operations (e.g. + sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access. + In general, the performance of unaligned memory accesses are often + lower than aligned and should be avoided. + + When a pair of registers is passed in reg argument: + Emit instructions for moving data between a register pair and + memory. The register pair can be specified by the SLJIT_REG_PAIR + macro. The first register is loaded from or stored into the + location specified by the mem/memw arguments, and the end address + of this operation is the starting address of the data transfer + between the second register and memory. The type argument must + be SLJIT_MOV. The SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_* + options are allowed for this operation. + + type must be between SLJIT_MOV and SLJIT_MOV_P and can be + combined (or'ed) with SLJIT_MEM_* flags + reg is a register or register pair, which is the source or + destination of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw); + +/* Emit a single memory load or store with update instruction. + When the requested instruction form is not supported by the CPU, + it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the + instruction. This allows specializing tight loops based on + the supported instruction forms (see SLJIT_MEM_SUPP flag). + Absolute address (SLJIT_MEM0) forms are never supported + and the base (first) register specified by the mem argument + must not be SLJIT_SP and must also be different from the + register specified by the reg argument. + + type must be between SLJIT_MOV and SLJIT_MOV_P and can be + combined (or'ed) with SLJIT_MEM_* flags + reg is the source or destination register of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw); + +/* Same as sljit_emit_mem except the followings: + + Loading or storing a pair of registers is not supported. + + type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be + combined (or'ed) with SLJIT_MEM_* flags. + freg is the source or destination floating point register + of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw); + +/* Same as sljit_emit_mem_update except the followings: + + type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be + combined (or'ed) with SLJIT_MEM_* flags + freg is the source or destination floating point register + of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw); + +/* The following options are used by several simd operations. */ + +/* Load data into a simd register, this is the default */ +#define SLJIT_SIMD_LOAD 0x000000 +/* Store data from a simd register */ +#define SLJIT_SIMD_STORE 0x000001 +/* The simd register contains floating point values */ +#define SLJIT_SIMD_FLOAT 0x000400 +/* Tests whether the operation is available */ +#define SLJIT_SIMD_TEST 0x000800 +/* Move data to/from a 64 bit (8 byte) long SIMD register */ +#define SLJIT_SIMD_REG_64 (3 << 12) +/* Move data to/from a 128 bit (16 byte) long SIMD register */ +#define SLJIT_SIMD_REG_128 (4 << 12) +/* Move data to/from a 256 bit (32 byte) long SIMD register */ +#define SLJIT_SIMD_REG_256 (5 << 12) +/* Move data to/from a 512 bit (64 byte) long SIMD register */ +#define SLJIT_SIMD_REG_512 (6 << 12) +/* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */ +#define SLJIT_SIMD_ELEM_8 (0 << 18) +/* Element size is 16 bit long, usually cannot be combined with SLJIT_SIMD_FLOAT */ +#define SLJIT_SIMD_ELEM_16 (1 << 18) +/* Element size is 32 bit long */ +#define SLJIT_SIMD_ELEM_32 (2 << 18) +/* Element size is 64 bit long */ +#define SLJIT_SIMD_ELEM_64 (3 << 18) +/* Element size is 128 bit long */ +#define SLJIT_SIMD_ELEM_128 (4 << 18) +/* Element size is 256 bit long */ +#define SLJIT_SIMD_ELEM_256 (5 << 18) + +/* The following options are used by sljit_emit_simd_mov(). */ + +/* Memory address is unaligned (this is the default) */ +#define SLJIT_SIMD_MEM_UNALIGNED (0 << 24) +/* Memory address is 16 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_16 (1 << 24) +/* Memory address is 32 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_32 (2 << 24) +/* Memory address is 64 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24) +/* Memory address is 128 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_128 (4 << 24) +/* Memory address is 256 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_256 (5 << 24) +/* Memory address is 512 bit aligned */ +#define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24) + +/* Moves data between a simd register and memory. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and + SLJIT_SIMD_MEM_* options + freg is the source or destination simd register + of the operation + srcdst must be a memory operand or a simd register + + Note: + The alignment and element size must be + less or equal than simd register size. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw); + +/* Replicates a scalar value to all lanes of a simd + register. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + except SLJIT_SIMD_STORE. + freg is the destination simd register of the operation + src is the value which is replicated + + Note: + The src == SLJIT_IMM and srcw == 0 can be used to + clear a register even when SLJIT_SIMD_FLOAT is set. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw); + +/* The following options are used by sljit_emit_simd_lane_mov(). */ + +/* Clear all bits of the simd register before loading the lane. */ +#define SLJIT_SIMD_LANE_ZERO 0x000002 +/* Sign extend the integer value stored from the lane. */ +#define SLJIT_SIMD_LANE_SIGNED 0x000004 + +/* Moves data between a simd register lane and a register or + memory. If the srcdst argument is a register, it must be + a floating point register when SLJIT_SIMD_FLOAT is specified, + or a general purpose register otherwise. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + Further options: + SLJIT_32 - when SLJIT_SIMD_FLOAT is not set + SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE + is set and SLJIT_SIMD_FLOAT is not set + SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD + is specified + freg is the source or destination simd register + of the operation + lane_index is the index of the lane + srcdst is the destination operand for loads, and + source operand for stores + + Note: + The elem size must be lower than register size. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw); + +/* Replicates a scalar value from a lane to all lanes + of a simd register. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* options + except SLJIT_SIMD_STORE. + freg is the destination simd register of the operation + src is the simd register which lane is replicated + src_lane_index is the lane index of the src register + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index); + +/* The following options are used by sljit_emit_simd_load_extend(). */ + +/* Sign extend the integer elements */ +#define SLJIT_SIMD_EXTEND_SIGNED 0x000002 +/* Extend data to 16 bit */ +#define SLJIT_SIMD_EXTEND_16 (1 << 24) +/* Extend data to 32 bit */ +#define SLJIT_SIMD_EXTEND_32 (2 << 24) +/* Extend data to 64 bit */ +#define SLJIT_SIMD_EXTEND_64 (3 << 24) + +/* Extend elements and stores them in a simd register. + The extension operation increases the size of the + elements (e.g. from 16 bit to 64 bit). For integer + values, the extension can be signed or unsigned. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_*, and + SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE + freg is the destination simd register of the operation + src must be a memory operand or a simd register. + In the latter case, the source elements are stored + in the lower half of the register. + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw); + +/* Extract the highest bit (usually the sign bit) from + each elements of a vector. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and SLJIT_32 + options except SLJIT_SIMD_LOAD + freg is the source simd register of the operation + dst is the destination operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw); + +/* The following options are used by sljit_emit_simd_op2(). */ + +/* Binary 'and' operation */ +#define SLJIT_SIMD_OP2_AND 0x000001 +/* Binary 'or' operation */ +#define SLJIT_SIMD_OP2_OR 0x000002 +/* Binary 'xor' operation */ +#define SLJIT_SIMD_OP2_XOR 0x000003 + +/* Perform simd operations using simd registers. + + If the operation is not supported, it returns with + SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, + it does not emit any instructions. + + type must be a combination of SLJIT_SIMD_* and SLJIT_SIMD_OP2_ + options except SLJIT_SIMD_LOAD and SLJIT_SIMD_STORE + dst_freg is the destination register of the operation + src1_freg is the first source register of the operation + src1_freg is the second source register of the operation + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg); + +/* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair + can perform an atomic read-modify-write operation. First, an unsigned + value must be loaded from memory using sljit_emit_atomic_load. Then, + the updated value must be written back to the same memory location by + sljit_emit_atomic_store. A thread can only perform a single atomic + operation at a time. + + Note: atomic operations are experimental, and not implemented + for all cpus. + + The following conditions must be satisfied, or the operation + is undefined: + - the address provided in mem_reg must be divisible by the size of + the value (only naturally aligned updates are supported) + - no memory writes are allowed between the load and store operations + regardless of its target address (currently read operations are + allowed, but this might change in the future) + - the memory operation (op) and the base address (stored in mem_reg) + passed to the load/store operations must be the same (the mem_reg + can be a different register, only its value must be the same) + - an store must always follow a load for the same transaction. + + op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all + signed loads such as SLJIT_MOV32_S16 + dst_reg is the register where the data will be loaded into + mem_reg is the base address of the memory load (it cannot be + SLJIT_SP or a virtual register on x86-32) + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg); + +/* The sljit_emit_atomic_load and sljit_emit_atomic_store operations + allows performing an atomic read-modify-write operation. See the + description of sljit_emit_atomic_load. + + op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all signed + loads such as SLJIT_MOV32_S16 + src_reg is the register which value is stored into the memory + mem_reg is the base address of the memory store (it cannot be + SLJIT_SP or a virtual register on x86-32) + temp_reg is a not preserved scratch register, which must be + initialized with the value loaded into the dst_reg during the + corresponding sljit_emit_atomic_load operation, or the operation + is undefined + + Flags: ATOMIC_STORED is set if the operation is successful, + otherwise the memory remains unchanged. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg); + +/* Copies the base address of SLJIT_SP + offset to dst. The offset can + represent the starting address of a value in the local data (stack). + The offset is not limited by the local data limits, it can be any value. + For example if an array of bytes are stored on the stack from + offset 0x40, and R0 contains the offset of an array item plus 0x120, + this item can be changed by two SLJIT instructions: + + sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5); + + Flags: - (may destroy flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); + +/* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const) + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); + +/* Store the value of a label (see: sljit_set_label / sljit_set_target) + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); + +/* Provides the address of label, jump and const instructions after sljit_generate_code + is called. The returned value is unspecified before the sljit_generate_code call. + Since these structures are freed by sljit_free_compiler, the addresses must be + preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->u.addr; } +static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } +static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } + +/* Only the address and executable offset are required to perform dynamic + code modifications. See sljit_get_executable_offset function. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); + +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* Types for sljit_get_register_index */ + +/* General purpose (integer) registers. */ +#define SLJIT_GP_REGISTER 0 +/* Floating point registers. */ +#define SLJIT_FLOAT_REGISTER 1 + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any registers. + + When type is SLJIT_GP_REGISTER: + reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register + + When type is SLJIT_FLOAT_REGISTER: + reg must be an SLJIT_FR(i) or SLJIT_FS(i) register + + When type is SLJIT_SIMD_REG_64 / 128 / 256 / 512 : + reg must be an SLJIT_FR(i) or SLJIT_FS(i) register + + Note: it returns with -1 for unknown registers, such as virtual + registers on x86-32 or unsupported simd registers. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15, the instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + s390x: size can be 2, 4, or 6, the instruction argument can be byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size); + +/* Flags were set by a 32 bit operation. */ +#define SLJIT_CURRENT_FLAGS_32 SLJIT_32 + +/* Flags were set by an ADD or ADDC operations. */ +#define SLJIT_CURRENT_FLAGS_ADD 0x01 +/* Flags were set by a SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_SUB 0x02 + +/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. + Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x04 + +/* Define the currently available CPU status flags. It is usually used after + an sljit_emit_label or sljit_emit_op_custom operations to define which CPU + status flags are available. + + The current_flags must be a valid combination of SLJIT_SET_* and + SLJIT_CURRENT_FLAGS_* constants. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, + sljit_s32 current_flags); + +/* --------------------------------------------------------------------- */ +/* Serialization functions */ +/* --------------------------------------------------------------------- */ + +/* Label/jump/const enumeration functions. The items in each group + are enumerated in creation order. Serialization / deserialization + preserves this order for each group. For example the fifth label + after deserialization refers to the same machine code location as + the fifth label before the serialization. */ +static SLJIT_INLINE struct sljit_label *sljit_get_first_label(struct sljit_compiler *compiler) { return compiler->labels; } +static SLJIT_INLINE struct sljit_jump *sljit_get_first_jump(struct sljit_compiler *compiler) { return compiler->jumps; } +static SLJIT_INLINE struct sljit_const *sljit_get_first_const(struct sljit_compiler *compiler) { return compiler->consts; } + +static SLJIT_INLINE struct sljit_label *sljit_get_next_label(struct sljit_label *label) { return label->next; } +static SLJIT_INLINE struct sljit_jump *sljit_get_next_jump(struct sljit_jump *jump) { return jump->next; } +static SLJIT_INLINE struct sljit_const *sljit_get_next_const(struct sljit_const *const_) { return const_->next; } + +/* A number starting from 0 is assigned to each label, which +represents its creation index. The first label created by the +compiler has index 0, the second has index 1, the third has +index 2, and so on. The returned value is unspecified after +sljit_generate_code() is called. */ +static SLJIT_INLINE sljit_uw sljit_get_label_index(struct sljit_label *label) { return label->u.index; } + +/* The sljit_jump_has_label() and sljit_jump_has_target() functions +returns non-zero value if a label or target is set for the jump +respectively. Both may return with a zero value. The other two +functions return the value assigned to the jump. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump); +static SLJIT_INLINE struct sljit_label *sljit_jump_get_label(struct sljit_jump *jump) { return jump->u.label; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump); +static SLJIT_INLINE sljit_uw sljit_jump_get_target(struct sljit_jump *jump) { return jump->u.target; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump); + +/* Option bits for sljit_serialize_compiler. */ + +/* When debugging is enabled, the serialized buffer contains +debugging information unless this option is specified. */ +#define SLJIT_SERIALIZE_IGNORE_DEBUG 0x1 + +/* Serialize the internal structure of the compiler into a buffer. +If the serialization is successful, the returned value is a newly +allocated buffer which is allocated by the memory allocator assigned +to the compiler. Otherwise the returned value is NULL. Unlike +sljit_generate_code(), serialization does not modify the internal +state of the compiler, so the code generation can be continued. + + options must be the combination of SLJIT_SERIALIZE_* option bits + size is an output argument, which is set to the byte size of + the result buffer if the operation is successful + +Notes: + - This function is useful for ahead-of-time compilation (AOT). + - The returned buffer must be freed later by the caller. + The SLJIT_FREE() macro is suitable for this purpose: + SLJIT_FREE(returned_buffer, sljit_get_allocator_data(compiler)) + - Memory allocated by sljit_alloc_memory() is not serialized. + - The type of the returned buffer is sljit_uw* to emphasize that + the buffer is word aligned. However, the 'size' output argument + contains the byte size, so this value is always divisible by + sizeof(sljit_uw). +*/ +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size); + +/* Construct a new compiler instance from a buffer produced by +sljit_serialize_compiler(). If the operation is successful, the new +compiler instance is returned. Otherwise the returned value is NULL. + + buffer points to a word aligned memory data which was + created by sljit_serialize_compiler() + size is the byte size of the buffer + options must be 0 + allocator_data specify an allocator specific data, see + sljit_create_compiler() for further details + +Notes: + - Labels assigned to jumps are restored with their + corresponding label in the label set created by + the deserializer. Target addresses assigned to + jumps are also restored. Uninitialized jumps + remain uninitialized. + - After the deserialization, sljit_generate_code() does + not need to be the next operation on the returned + compiler, the code generation can be continued. + Even sljit_serialize_compiler() can be called again. + - When debugging is enabled, a buffers without debug + information cannot be deserialized. +*/ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data); + +/* --------------------------------------------------------------------- */ +/* Miscellaneous utility functions */ +/* --------------------------------------------------------------------- */ + +/* Get the human readable name of the platform. Can be useful on platforms + like ARM, where ARM and Thumb2 functions can be mixed, and it is useful + to know the type of the code generator. */ +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); + +/* Portable helper function to get an offset of a member. + Same as offsetof() macro defined in stddef.h */ +#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* The sljit_stack structure and its manipulation functions provides + an implementation for a top-down stack. The stack top is stored + in the end field of the sljit_stack structure and the stack goes + down to the min_start field, so the memory region reserved for + this stack is between min_start (inclusive) and end (exclusive) + fields. However the application can only use the region between + start (inclusive) and end (exclusive) fields. The sljit_stack_resize + function can be used to extend this region up to min_start. + + This feature uses the "address space reserve" feature of modern + operating systems. Instead of allocating a large memory block + applications can allocate a small memory region and extend it + later without moving the content of the memory area. Therefore + after a successful resize by sljit_stack_resize all pointers into + this region are still valid. + + Note: + this structure may not be supported by all operating systems. + end and max_limit fields are aligned to PAGE_SIZE bytes (usually + 4 Kbyte or more). + stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */ + +struct sljit_stack { + /* User data, anything can be stored here. + Initialized to the same value as the end field. */ + sljit_u8 *top; +/* These members are read only. */ + /* End address of the stack */ + sljit_u8 *end; + /* Current start address of the stack. */ + sljit_u8 *start; + /* Lowest start address of the stack. */ + sljit_u8 *min_start; +}; + +/* Allocates a new stack. Returns NULL if unsuccessful. + Note: see sljit_create_compiler for the explanation of allocator_data. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); + +/* Can be used to increase (extend) or decrease (shrink) the stack + memory area. Returns with new_start if successful and NULL otherwise. + It always fails if new_start is less than min_start or greater or equal + than end fields. The fields of the stack are not changed if the returned + value is NULL (the current memory content is never lost). */ +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start); + +#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ + +#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) +#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) + +#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +/* All JIT related code should be placed in the same context (library, binary, etc.). */ + +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) +#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) + +/* For powerpc64, the function pointers point to a context descriptor. */ +struct sljit_function_context { + sljit_uw addr; + sljit_uw r2; + sljit_uw r11; +}; + +/* Fill the context arguments using the addr and the function. + If func_ptr is NULL, it will not be set to the address of context + If addr is NULL, the function address also comes from the func pointer. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); + +#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +/* Free unused executable memory. The allocator keeps some free memory + around to reduce the number of OS executable memory allocations. + This improves performance since these calls are costly. However + it is sometimes desired to free all unused memory regions, e.g. + before the application terminates. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_LIR_H_ */ diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c new file mode 100644 index 0000000..a253c06 --- /dev/null +++ b/src/sljit/sljitNativeARM_32.c @@ -0,0 +1,4636 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __SOFTFP__ +#define ARM_ABI_INFO " ABI:softfp" +#else +#define ARM_ABI_INFO " ABI:hardfp" +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO; +#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO; +#else +#error "Internal error: Unknown ARM architecture" +#endif +} + +/* Length of an instruction word. */ +typedef sljit_u32 sljit_ins; + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* In ARM instruction words. + Cache lines are usually 32 byte aligned. */ +#define CONST_POOL_ALIGNMENT 8 +#define CONST_POOL_EMPTY 0xffffffff + +#define ALIGN_INSTRUCTION(ptr) \ + (sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1)) +#define MAX_DIFFERENCE(max_diff) \ + (((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1)) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6 +}; + +static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1 +}; + +#define RM(rm) ((sljit_ins)reg_map[rm]) +#define RM8(rm) ((sljit_ins)reg_map[rm] << 8) +#define RD(rd) ((sljit_ins)reg_map[rd] << 12) +#define RN(rn) ((sljit_ins)reg_map[rn] << 16) + +#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5)) +#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22)) +#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7)) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* The instruction includes the AL condition. + INST_NAME - CONDITIONAL remove this flag. */ +#define COND_MASK 0xf0000000 +#define CONDITIONAL 0xe0000000 +#define PUSH_POOL 0xff000000 + +#define ADC 0xe0a00000 +#define ADD 0xe0800000 +#define AND 0xe0000000 +#define B 0xea000000 +#define BIC 0xe1c00000 +#define BKPT 0xe1200070 +#define BL 0xeb000000 +#define BLX 0xe12fff30 +#define BX 0xe12fff10 +#define CLZ 0xe16f0f10 +#define CMN 0xe1600000 +#define CMP 0xe1400000 +#define EOR 0xe0200000 +#define LDR 0xe5100000 +#define LDR_POST 0xe4100000 +#define LDREX 0xe1900f9f +#define LDREXB 0xe1d00f9f +#define LDREXH 0xe1f00f9f +#define MLA 0xe0200090 +#define MOV 0xe1a00000 +#define MUL 0xe0000090 +#define MVN 0xe1e00000 +#define NOP 0xe1a00000 +#define ORR 0xe1800000 +#define PUSH 0xe92d0000 +#define POP 0xe8bd0000 +#define REV 0xe6bf0f30 +#define REV16 0xe6bf0fb0 +#define RSB 0xe0600000 +#define RSC 0xe0e00000 +#define SBC 0xe0c00000 +#define SMULL 0xe0c00090 +#define STR 0xe5000000 +#define STREX 0xe1800f90 +#define STREXB 0xe1c00f90 +#define STREXH 0xe1e00f90 +#define SUB 0xe0400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 +#define TST 0xe1000000 +#define UMULL 0xe0800090 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VAND 0xf2000110 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F32_U32 0xeeb80a40 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VDUP 0xee800b10 +#define VDUP_s 0xf3b00c00 +#define VEOR 0xf3000110 +#define VLD1 0xf4200000 +#define VLD1_r 0xf4a00c00 +#define VLD1_s 0xf4a00000 +#define VLDR_F32 0xed100a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMOV_i 0xf2800010 +#define VMOV_s 0xee000b10 +#define VMOVN 0xf3b20200 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VORR 0xf2200110 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 +#define VSHLL 0xf2800a10 +#define VSHR 0xf2800010 +#define VSRA 0xf2800110 +#define VST1 0xf4000000 +#define VST1_s 0xf4800000 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Arm v7 specific instructions. */ +#define MOVT 0xe3400000 +#define MOVW 0xe3000000 +#define RBIT 0xe6ff0f30 +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + +static sljit_s32 push_cpool(struct sljit_compiler *compiler) +{ + /* Pushing the constant pool into the instruction stream. */ + sljit_ins* inst; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_s32 i; + + /* The label could point the address after the constant pool. */ + if (compiler->last_label && compiler->last_label->size == compiler->size) + compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; + + SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0xff000000 | compiler->cpool_fill; + + for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0; + } + + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + while (cpool_ptr < cpool_end) { + inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!inst); + compiler->size++; + *inst = *cpool_ptr++; + } + compiler->cpool_diff = CONST_POOL_EMPTY; + compiler->cpool_fill = 0; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_ins* ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal) +{ + sljit_ins* ptr; + sljit_uw cpool_index = CPOOL_SIZE; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_u8* cpool_unique_ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + else if (compiler->cpool_fill > 0) { + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + cpool_unique_ptr = compiler->cpool_unique; + do { + if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); + break; + } + cpool_ptr++; + cpool_unique_ptr++; + } while (cpool_ptr < cpool_end); + } + + if (cpool_index == CPOOL_SIZE) { + /* Must allocate a new entry in the literal pool. */ + if (compiler->cpool_fill < CPOOL_SIZE) { + cpool_index = compiler->cpool_fill; + compiler->cpool_fill++; + } + else { + FAIL_IF(push_cpool(compiler)); + cpool_index = 0; + compiler->cpool_fill = 1; + } + } + + SLJIT_ASSERT((inst & 0xfff) == 0); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | cpool_index; + + compiler->cpool[cpool_index] = literal; + compiler->cpool_unique[cpool_index] = 0; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal) +{ + sljit_ins* ptr; + + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) + FAIL_IF(push_cpool(compiler)); + + SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | compiler->cpool_fill; + + compiler->cpool[compiler->cpool_fill] = literal; + compiler->cpool_unique[compiler->cpool_fill] = 1; + compiler->cpool_fill++; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) +{ + /* Place for at least two instruction (doesn't matter whether the first has a literal). */ + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) + return push_cpool(compiler); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) +{ + /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ + SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + return push_inst(compiler, BLX | RM(TMP_REG1)); +} + +static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) +{ + sljit_uw diff; + sljit_uw ind; + sljit_uw counter = 0; + sljit_uw* clear_const_pool = const_pool; + sljit_uw* clear_const_pool_end = const_pool + cpool_size; + + SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); + /* Set unused flag for all literals in the constant pool. + I.e.: unused literals can belong to branches, which can be encoded as B or BL. + We can "compress" the constant pool by discarding these literals. */ + while (clear_const_pool < clear_const_pool_end) + *clear_const_pool++ = (sljit_uw)(-1); + + while (last_pc_patch < code_ptr) { + /* Data transfer instruction with Rn == r15. */ + if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) { + diff = (sljit_uw)(const_pool - last_pc_patch); + ind = (*last_pc_patch) & 0xfff; + + /* Must be a load instruction with immediate offset. */ + SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); + if ((sljit_s32)const_pool[ind] < 0) { + const_pool[ind] = counter; + ind = counter; + counter++; + } + else + ind = const_pool[ind]; + + SLJIT_ASSERT(diff >= 1); + if (diff >= 2 || ind > 0) { + diff = (diff + (sljit_uw)ind - 2) << 2; + SLJIT_ASSERT(diff <= 0xfff); + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; + } + else + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; + } + last_pc_patch++; + } + return counter; +} + +/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ +struct future_patch { + struct future_patch* next; + sljit_s32 index; + sljit_s32 value; +}; + +static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +{ + sljit_u32 value; + struct future_patch *curr_patch, *prev_patch; + + SLJIT_UNUSED_ARG(compiler); + + /* Using the values generated by patch_pc_relative_loads. */ + if (!*first_patch) + value = cpool_start_address[cpool_current_index]; + else { + curr_patch = *first_patch; + prev_patch = NULL; + while (1) { + if (!curr_patch) { + value = cpool_start_address[cpool_current_index]; + break; + } + if ((sljit_uw)curr_patch->index == cpool_current_index) { + value = (sljit_uw)curr_patch->value; + if (prev_patch) + prev_patch->next = curr_patch->next; + else + *first_patch = curr_patch->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + break; + } + prev_patch = curr_patch; + curr_patch = curr_patch->next; + } + } + + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); + if (!curr_patch) { + while (*first_patch) { + curr_patch = *first_patch; + *first_patch = (*first_patch)->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + } + return SLJIT_ERR_ALLOC_FAILED; + } + curr_patch->next = *first_patch; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_address[value]; + *first_patch = curr_patch; + } + cpool_start_address[value] = *buf_ptr; + } + return SLJIT_SUCCESS; +} + +#else + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_ins* ptr; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); +} + +#endif + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (jump->flags & IS_BL) + code_ptr--; +#endif /* SLJIT_CONFIG_ARM_V6 */ + + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); + else { + SLJIT_ASSERT(jump->u.label != NULL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (jump->flags & IS_BL) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } + } + else { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + } + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } +#endif /* SLJIT_CONFIG_ARM_V6 */ + return 0; +} + +static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_ins *ptr = (sljit_ins*)jump_ptr; + sljit_ins *inst = (sljit_ins*)ptr[0]; + sljit_ins mov_pc = ptr[1]; + sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); + + SLJIT_UNUSED_ARG(executable_offset); + + if (diff <= 0x7fffff && diff >= -0x800000) { + /* Turn to branch. */ + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); + inst[1] = NOP; + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } else { + /* Get the position of the constant. */ + if (mov_pc & (1 << 23)) + ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != mov_pc) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0); + } + inst[0] = mov_pc; + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[1] = BLX | RM(TMP_REG1); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_addr; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + sljit_ins *inst = (sljit_ins*)jump_ptr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); + +static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_ins *ptr = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins*)ptr[0]; + sljit_uw ldr_literal = ptr[1]; + sljit_uw src2; + + SLJIT_UNUSED_ARG(executable_offset); + + src2 = get_imm(new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + src2 = get_imm(~new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + if (ldr_literal & (1 << 23)) + ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != ldr_literal) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = ldr_literal; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_constant; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + sljit_ins *inst = (sljit_ins*)addr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) { + jump->flags |= PATCH_B; + return 0; + } + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return 0; +#else /* !SLJIT_CONFIG_ARM_V6 */ + return 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + + size_reduce += JUMP_MAX_SIZE - 1 - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 1; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (diff <= 0xff + 2 && diff >= -0xff + 2) + total_size = 0; + } + + size_reduce += 1 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +#endif /* SLJIT_CONFIG_ARM_V7 */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + sljit_uw addr; + sljit_sw diff; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw cpool_size; + sljit_uw cpool_skip_alignment; + sljit_uw cpool_current_index; + sljit_ins *cpool_start_address; + sljit_ins *last_pc_patch; + struct future_patch *first_patch; +#endif + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + /* Second code generation pass. */ +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + compiler->size += (compiler->patches << 1); + if (compiler->cpool_fill > 0) + compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else /* !SLJIT_CONFIG_ARM_V6 */ + reduce_code_size(compiler); +#endif /* SLJIT_CONFIG_ARM_V6 */ + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + cpool_size = 0; + cpool_skip_alignment = 0; + cpool_current_index = 0; + cpool_start_address = NULL; + first_patch = NULL; + last_pc_patch = code; +#endif /* SLJIT_CONFIG_ARM_V6 */ + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (cpool_size > 0) { + if (cpool_skip_alignment > 0) { + buf_ptr++; + cpool_skip_alignment--; + } else { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + if (++cpool_current_index >= cpool_size) { + SLJIT_ASSERT(!first_patch); + cpool_size = 0; + } + } + } else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif /* SLJIT_CONFIG_ARM_V6 */ + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + /* These structures are ordered by their address. */ + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + if (!detect_jump_type(jump, code_ptr, code, executable_offset)) { + code_ptr[2] = code_ptr[0]; + addr = ((code_ptr[0] & 0xf) << 12); + code_ptr[0] = MOVW | addr; + code_ptr[1] = MOVT | addr; + code_ptr += 2; + } + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + } else { +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + word_count += jump->flags >> JUMP_SIZE_SHIFT; +#endif /* SLJIT_CONFIG_ARM_V7 */ + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + } else { + /* Fortunately, no need to shift. */ + cpool_size = *buf_ptr++ & ~PUSH_POOL; + SLJIT_ASSERT(cpool_size > 0); + cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); + if (cpool_current_index > 0) { + /* Unconditional branch. */ + *code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index); + } + cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; + cpool_current_index = 0; + last_pc_patch = code_ptr; + } +#endif /* SLJIT_CONFIG_ARM_V6 */ + word_count++; + } while (buf_ptr < buf_end); + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + SLJIT_ASSERT(cpool_size == 0); + if (compiler->cpool_fill > 0) { + cpool_start_address = ALIGN_INSTRUCTION(code_ptr); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); + if (cpool_current_index > 0) + code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index); + + buf_ptr = compiler->cpool; + buf_end = buf_ptr + compiler->cpool_fill; + cpool_current_index = 0; + while (buf_ptr < buf_end) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + cpool_current_index++; + } + SLJIT_ASSERT(!first_patch); + } +#endif + + jump = compiler->jumps; + while (jump) { + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + buf_ptr = (sljit_ins*)jump->addr; + + if (jump->flags & JUMP_MOV_ADDR) { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000); +#else /* !SLJIT_CONFIG_ARM_V6 */ + SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0); + diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2; + + SLJIT_ASSERT(diff <= 0xff && diff >= -0xff); + + addr = ADD; + if (diff < 0) { + diff = -diff; + addr = SUB; + } + + buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff); + } else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; +#else /* !SLJIT_CONFIG_ARM_V6 */ + buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); + buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff); +#endif /* SLJIT_CONFIG_ARM_V6 */ + } + } else if (jump->flags & PATCH_B) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000); + *buf_ptr |= (diff >> 2) & 0x00ffffff; + } else { +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (jump->flags & IS_BL) + buf_ptr--; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_ins)buf_ptr; + code_ptr[1] = *buf_ptr; + set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0); + code_ptr += 2; + } else { + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = addr; + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0); +#endif /* SLJIT_CONFIG_ARM_V6 */ + } + + jump = jump->next; + } + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + const_ = compiler->consts; + while (const_) { + buf_ptr = (sljit_ins*)const_->addr; + const_->addr = (sljit_uw)code_ptr; + + code_ptr[0] = (sljit_ins)buf_ptr; + code_ptr[1] = *buf_ptr; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + /* Set the value again (can be a simple constant). */ + set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); + code_ptr += 2; + + const_ = const_->next; + } +#endif /* SLJIT_CONFIG_ARM_V6 */ + + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); + + code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_F64_AS_F32_PAIR: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ + case SLJIT_HAS_SIMD: +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + return 0; +#else +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ +#endif /* SLJIT_CONFIG_ARM_V6 */ + + case SLJIT_SIMD_REGS_ARE_PAIRS: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_REV: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: + return 1; + + case SLJIT_HAS_CTZ: +#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6 + return 2; +#else + return 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x01 +#define HALF_SIZE 0x02 +#define PRELOAD 0x03 +#define SIGNED 0x04 +#define LOAD_DATA 0x08 + +/* Flag bits for emit_op. */ +#define ALLOW_IMM 0x10 +#define ALLOW_INV_IMM 0x20 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 +#define ALLOW_DOUBLE_IMM 0x80 + +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + Storing signed and unsigned values are the same operations. */ + +static const sljit_ins data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u p */ 0xf5500000 /* preload */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg)) + +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) & 0xf] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) + +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2)) + +/* Flags for emit_op: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define REGISTER_OP 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw imm, offset; + sljit_s32 i, tmp, size, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + imm = 0; + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + /* Push saved and temporary registers + multiple registers: stmdb sp!, {..., lr} + single register: str reg, [sp, #-4]! */ + if (imm != 0) + FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm)); + else + FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); + + /* Stack must be aligned to 8 bytes: */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); + else + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw)))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst(compiler, *(--remap_ptr))); +#endif + + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + + /* Doubles are saved, so alignment is unaffected. */ + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) +{ + sljit_uw imm2 = get_imm(imm); + + if (imm2 == 0) + return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm); + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 restored_reg = 0; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list = 0; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) { + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0); + lr_dst = 0; + frame_size &= ~0x7; + } + + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + tmp = SLJIT_S0 - compiler->saveds; + i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + if (tmp < i) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i > tmp); + } + + i = compiler->scratches; + if (i >= SLJIT_FIRST_SAVED_REG) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i >= SLJIT_FIRST_SAVED_REG); + } + + if (lr_dst == TMP_REG2 && reg_list == 0) { + restored_reg = TMP_REG2; + lr_dst = 0; + } + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + tmp = 0; + if (reg_list != 0) { + tmp = 2; + if (local_size <= 0xfff) { + if (local_size == 0) { + SLJIT_ASSERT(restored_reg != TMP_REG2); + if (frame_size == 0) + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008); + if (frame_size > 2 * SSIZE_OF(sw)) + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw)))); + } + + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size)); + tmp = 1; + } else if (frame_size == 0) { + frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw); + tmp = 3; + } + + /* Place for the saved register. */ + if (restored_reg != TMP_REG2) + local_size += SSIZE_OF(sw); + } + + /* Place for the lr register. */ + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (tmp <= 1) + return SLJIT_SUCCESS; + + if (tmp == 2) { + frame_size -= SSIZE_OF(sw); + if (restored_reg != TMP_REG2) + frame_size -= SSIZE_OF(sw); + + return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size); + } + + tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008; + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + /* Pop saved and temporary registers + multiple registers: ldmia sp!, {...} + single register: ldr reg, [sp], #4 */ + if ((reg_list & (reg_list - 1)) == 0) { + SLJIT_ASSERT(lr_dst != 0); + SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]); + + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004); + } + + FAIL_IF(push_inst(compiler, POP | reg_list)); + + if (frame_size > 0) + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw))); + + if (lr_dst != 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_uw dst, sljit_uw src1, sljit_uw src2) +{ + sljit_s32 reg, is_masked; + sljit_uw shift_type; + + switch (op) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return push_inst(compiler, MOV | RD(dst) | RM(src2)); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & REGISTER_OP) + return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); + + if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & REGISTER_OP) + return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); + + if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + return SLJIT_SUCCESS; + + case SLJIT_CTZ: + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED)); +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0)); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1))); + FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32)); + return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f); +#else /* !SLJIT_CONFIG_ARM_V6 */ + FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2))); + return push_inst(compiler, CLZ | RD(dst) | RM(dst)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + return push_inst(compiler, REV | RD(dst) | RM(src2)); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2))); + if (!(flags & REGISTER_OP)) + return SLJIT_SUCCESS; + return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst)); + case SLJIT_ADD: + SLJIT_ASSERT(!(flags & INV_IMM)); + + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) + return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_ADDC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUB: + SLJIT_ASSERT(!(flags & INV_IMM)); + + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) + return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUBC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + compiler->status_flags_state = 0; + + if (!(flags & SET_FLAGS)) + return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); + + reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1))); + + /* cmp TMP_REG1, dst asr #31. */ + return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0); + + case SLJIT_AND: + if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) + return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_OR: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_XOR: + if (flags & INV_IMM) { + SLJIT_ASSERT(src2 == SRC2_IMM); + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1)); + } + return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SHL: + case SLJIT_MSHL: + shift_type = 0; + is_masked = op == SLJIT_MSHL; + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + shift_type = 1; + is_masked = op == SLJIT_MLSHR; + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + shift_type = 2; + is_masked = op == SLJIT_MASHR; + break; + + case SLJIT_ROTL: + if (compiler->shift_imm == 0x20) { + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0)); + src2 = TMP_REG2; + } else + compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f; + /* fallthrough */ + + case SLJIT_ROTR: + shift_type = 3; + is_masked = 0; + break; + + case SLJIT_MULADD: + return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1)); + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM)); + + if (compiler->shift_imm != 0x20) { + SLJIT_ASSERT(src1 == TMP_REG1); + + if (compiler->shift_imm != 0) + return push_inst(compiler, MOV | (flags & SET_FLAGS) | + RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2)); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); + } + + SLJIT_ASSERT(src1 != TMP_REG2); + + if (is_masked) { + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) + | RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1)); +} + +#undef EMIT_SHIFT_INS_AND_RETURN + +/* Tests whether the immediate can be stored in the 12 bit imm field. + Returns with 0 if not possible. */ +static sljit_uw get_imm(sljit_uw imm) +{ + sljit_u32 rol; + + if (imm <= 0xff) + return SRC2_IMM | imm; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol = 8; + } else { + imm = (imm << 24) | (imm >> 8); + rol = 0; + } + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + return SRC2_IMM | (imm >> 24) | (rol << 8); + return 0; +} + +static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2) +{ + sljit_uw mask; + sljit_uw imm1; + sljit_uw rol; + + /* Step1: Search a zero byte (8 continous zero bit). */ + mask = 0xff000000; + rol = 8; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = 4 + (rol >> 1); + break; + } + + rol += 2; + mask >>= 2; + if (mask & 0x3) { + /* rol by 8. */ + imm = (imm << 8) | (imm >> 24); + mask = 0xff00; + rol = 24; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = (rol >> 1) - 8; + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) + return 0; + } + break; + } + } + + /* The low 8 bit must be zero. */ + SLJIT_ASSERT(!(imm & 0xff)); + + if (!(imm & 0xff000000)) { + imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); + *imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } else if (imm & 0xc0000000) { + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } else { + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + + return imm1; +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) +{ + sljit_uw tmp; +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw imm1, imm2; +#else /* !SLJIT_CONFIG_ARM_V6 */ + if (!(imm & ~(sljit_uw)0xffff)) + return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + /* Create imm by 1 inst. */ + tmp = get_imm(imm); + if (tmp) + return push_inst(compiler, MOV | RD(reg) | tmp); + + tmp = get_imm(~imm); + if (tmp) + return push_inst(compiler, MVN | RD(reg) | tmp); + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + /* Create imm by 2 inst. */ + imm1 = compute_imm(imm, &imm2); + if (imm1 != 0) { + FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1)); + return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2); + } + + imm1 = compute_imm(~imm, &imm2); + if (imm1 != 0) { + FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1)); + return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2); + } + + /* Load integer. */ + return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm); +#else /* !SLJIT_CONFIG_ARM_V6 */ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + if (imm <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +#endif /* SLJIT_CONFIG_ARM_V6 */ +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_uw imm, offset_reg, tmp; + sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff; + sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100; + + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask)); + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + + FAIL_IF(load_immediate(compiler, tmp_reg, tmp)); + + argw -= (sljit_sw)tmp; + tmp = 1; + + if (argw < 0) { + argw = -argw; + tmp = 0; + } + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg, + (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw)); + } + + if (arg & OFFS_REG_MASK) { + offset_reg = OFFS_REG(arg); + arg &= REG_MASK; + argw &= 0x3; + + if (argw != 0 && (mask == 0xff)) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7))); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); + } + + /* Bit 25: RM is offset. */ + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7))); + } + + arg &= REG_MASK; + + if (argw > mask) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); + argw -= (sljit_sw)tmp; + arg = tmp_reg; + + SLJIT_ASSERT(argw >= -mask && argw <= mask); + } + } else if (argw < -mask) { + tmp = (sljit_uw)(-argw & (sign | mask)); + tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); + argw += (sljit_sw)tmp; + arg = tmp_reg; + + SLJIT_ASSERT(argw >= -mask && argw <= mask); + } + } + + if (argw <= mask && argw >= -mask) { + if (argw >= 0) { + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw)); + } + + argw = -argw; + + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25)))); +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* src1 is reg or TMP_REG1 + src2 is reg, TMP_REG2, or imm + result goes to TMP_REG2, so put result can use TMP_REG1. */ + + /* We prefers register and simple consts. */ + sljit_s32 dst_reg; + sljit_s32 src1_reg = 0; + sljit_s32 src2_reg = 0; + sljit_s32 src2_tmp_reg = 0; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; + sljit_u32 imm2; + + op = GET_OPCODE(op); + + if (flags & SET_FLAGS) + inp_flags &= ~ALLOW_DOUBLE_IMM; + + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + + SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); + + if (inp_flags & ALLOW_NEG_IMM) { + switch (op) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } + + do { + if (!(inp_flags & ALLOW_IMM)) + break; + + if (src2 == SLJIT_IMM) { + src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); + if (src2_reg) + break; + + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); + if (src2_reg) { + flags |= INV_IMM; + break; + } + } + + if (neg_op != 0) { + src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w); + if (src2_reg) { + op = neg_op | GET_ALL_FLAGS(op); + break; + } + } + } + + if (src1 == SLJIT_IMM) { + src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED; + src1 = src2; + src1w = src2w; + break; + } + + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED | INV_IMM; + src1 = src2; + src1w = src2w; + break; + } + } + + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ + SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC); + + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); + if (src2_reg) { + src1 = src2; + src1w = src2w; + op = neg_op | GET_ALL_FLAGS(op); + break; + } + } + } + } while(0); + + /* Destination. */ + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (op <= SLJIT_MOV_P) { + if (dst & SLJIT_MEM) { + if (inp_flags & BYTE_SIZE) + inp_flags &= ~SIGNED; + + if (FAST_IS_REG(src2)) + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1); + } + + if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) + flags |= REGISTER_OP; + + src2_tmp_reg = dst_reg; + } else { + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16)) + flags |= REGISTER_OP; + } + + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + } + + if (src2_reg == 0 && (src2 & SLJIT_MEM)) { + src2_reg = src2_tmp_reg; + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1)); + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } + + /* Source 2. */ + if (src2_reg == 0) { + src2_reg = src2_tmp_reg; + + if (FAST_IS_REG(src2)) + src2_reg = src2; + else if (!(inp_flags & ALLOW_DOUBLE_IMM)) + FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); + else { + SLJIT_ASSERT(!(flags & SET_FLAGS)); + + if (src1_reg == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } + + src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2); + + if (src2_reg == 0 && neg_op != 0) { + src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2); + if (src2_reg != 0) + op = neg_op; + } + + if (src2_reg == 0) { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w)); + src2_reg = src2_tmp_reg; + } else { + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); + src1_reg = dst_reg; + src2_reg = (sljit_s32)imm2; + + if (op == SLJIT_ADDC) + op = SLJIT_ADD; + else if (op == SLJIT_SUBC) + op = SLJIT_SUB; + } + } + } + + if (src1_reg == 0) { + SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS)); + + src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2); + + if (src1_reg == 0 && neg_op != 0) { + src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2); + if (src1_reg != 0) + op = neg_op; + } + + if (src1_reg == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } else { + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg)); + src1_reg = dst_reg; + src2_reg = (sljit_s32)imm2; + + if (op == SLJIT_ADDC) + op = SLJIT_ADD; + } + } + + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1); +} + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_uw saved_reg_list[3]; + sljit_sw saved_reg_count; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + FAIL_IF(push_inst(compiler, BKPT)); + break; + case SLJIT_NOP: + FAIL_IF(push_inst(compiler, NOP)); + break; + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); + } + } + +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); + } + return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 inp_flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_OR: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_XOR: + inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM; + if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) { + inp_flags |= ALLOW_INV_IMM; + } + return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 == SLJIT_IMM) { + compiler->shift_imm = src2w & 0x1f; + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); + } else { + compiler->shift_imm = 0x20; + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + } + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + op = GET_OPCODE(op); + is_left = (op == SLJIT_SHL || op == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + /* Shift type of ROR is 3. */ + if (src3 == SLJIT_IMM) { + src3w &= 0x1f; + + if (src3w == 0) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7))); + src3w = (src3w ^ 0x1f) + 1; + return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2)); + src3 = TMP_REG2; + } + + if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) { + FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f)); + src3 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7))); + FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f)); + return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1)); + + return push_inst(compiler, BX | RM(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + SLJIT_ASSERT(src & SLJIT_MEM); + return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 size, dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2)); + break; + case SLJIT_GET_RETURN_ADDRESS: + size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0); + + if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + /* The size of pc is not added above. */ + if ((size & SSIZE_OF(sw)) == 0) + size += SSIZE_OF(sw); + + size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64); + } + + SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1)); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) + return freg_map[reg]; + + if (type != SLJIT_SIMD_REG_128) + return freg_map[reg] & ~0x1; + + return -1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FPU_LOAD (1 << 20) +#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ + ((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs)) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + arg &= ~SLJIT_MEM; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7))); + arg = TMP_REG1; + argw = 0; + } + + /* Fast loads and stores. */ + if (arg) { + if (!(argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + } + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); + if (imm) { + argw = -argw; + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + } + } + + if (arg) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1))); + } + else + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0))); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_32; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); + FAIL_IF(push_inst(compiler, VMRS)); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0))); + FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0)); + return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0)); + } + + if (dst_r != dst) + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); + + return SLJIT_SUCCESS; +} + +#undef EMIT_FPU_DATA_TRANSFER + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) { + ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f); + return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) { + exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) { + ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f); + return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[0] == u.imm[1]) + return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_s32 reg2; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + + inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg); + } else { + inst = VMOV | VN(freg) | RD(reg); + + if (!(op & SLJIT_32)) + inst |= 1 << 7; + } + + if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64) + inst |= 1 << 20; + + return push_inst(compiler, inst); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return 0x00000000; + + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0x10000000; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + + case SLJIT_LESS: + return 0x30000000; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x20000000; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x80000000; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x90000000; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xb0000000; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xa0000000; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xc0000000; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xd0000000; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x10000000; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x60000000; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x00000000; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x70000000; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x40000000; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x50000000; + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); + return 0xe0000000; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(prepare_blx(compiler)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + compiler->patches++; + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_BL; + jump->addr = compiler->size; + PTR_FAIL_IF(emit_blx(compiler)); + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + jump->addr = compiler->size; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); + compiler->size += JUMP_MAX_SIZE - 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; + + if (src && FAST_IS_REG(*src)) + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset)); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count--; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + default: + word_arg_offset -= sizeof(sljit_sw); + offset = *(--offset_ptr); + + SLJIT_ASSERT(offset >= word_arg_offset); + + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = (sljit_s32)(SLJIT_R0 + (offset >> 2)); + src_offset = offset; + } + FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); + } else + FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); + } + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); + + return SLJIT_SUCCESS; +} + +#else /* !__SOFTFP__ */ + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + SLJIT_32, new_offset, offset, 0))); + + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0x400000, f32_offset, offset, 0))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, new_offset, offset, 0))); + f32_offset = new_offset; + new_offset++; + } + offset++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif /* __SOFTFP__ */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (src != SLJIT_IMM) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + } + + SLJIT_ASSERT(src & SLJIT_MEM); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + if (type >= SLJIT_FAST_CALL) + FAIL_IF(prepare_blx(compiler)); + jump->addr = compiler->size; + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_FAST_CALL) { + jump->addr = compiler->size; + FAIL_IF(emit_blx(compiler)); + } +#else /* !SLJIT_CONFIG_ARM_V6 */ + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); + compiler->size += JUMP_MAX_SIZE - 1; +#endif /* SLJIT_CONFIG_ARM_V6 */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +#ifdef __SOFTFP__ + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + if (op & SLJIT_32) + return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src)); + return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src)); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (op & SLJIT_32) + return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw); + return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw); +} + +#endif /* __SOFTFP__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); + sljit_ins cc, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type); + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0)); + FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR)); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); + + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + + if (op == SLJIT_AND) + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (flags & SLJIT_SET_Z) + return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src2_reg != dst_reg && src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + src2_reg = dst_reg; + type ^= 0x1; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); + + if (src2_reg != dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + src1 = TMP_REG1; + src1w = 0; + } + } else if (dst_reg != src2_reg) + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg))); + + cc = get_cc(compiler, type & ~SLJIT_32); + + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + tmp = get_imm((sljit_uw)src1w); + if (tmp) + return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + + tmp = get_imm(~(sljit_uw)src1w); + if (tmp) + return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + tmp = (sljit_ins)src1w; + FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); + if (tmp <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); +#else /* !SLJIT_CONFIG_ARM_V7 */ + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1 = TMP_REG1; +#endif /* SLJIT_CONFIG_ARM_V7 */ + } + + return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + type ^= SLJIT_32; + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0))); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0)); +} + +#undef EMIT_FPU_OPERATION + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm, tmp; + sljit_sw mask = 0xfff; + sljit_sw sign = 0x1000; + + SLJIT_ASSERT(max_offset >= 0xf00); + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -mask) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw >= 0) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + *memw = argw - (sljit_sw)tmp; + SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset); + + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm); + } + } else { + tmp = (sljit_uw)(-argw & (sign | mask)); + tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + *memw = argw + (sljit_sw)tmp; + SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset); + + return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm); + } + } + } + + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask); + *memw = argw - (sljit_sw)tmp; + + FAIL_IF(load_immediate(compiler, TMP_REG1, tmp)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + + flags = WORD_SIZE; + + if (!(type & SLJIT_MEM_STORE)) { + if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1)); + return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1); + } + + flags = WORD_SIZE | LOAD_DATA; + } + + FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1)); + return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_ins is_type1_transfer, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + is_type1_transfer = 1; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + if (!(type & SLJIT_MEM_STORE)) + is_type1_transfer = 0; + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + is_type1_transfer = 0; + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + is_type1_transfer = 0; + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags)); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (!is_type1_transfer && memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } else { + if (is_type1_transfer) { + if (memw > 4095 || memw < -4095) + return SLJIT_ERR_UNSUPPORTED; + } else if (memw > 255 || memw < -255) + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7)); + + if (is_type1_transfer) + inst |= (1 << 25); + + if (type & SLJIT_MEM_POST) + inst ^= (1 << 24); + else + inst |= (1 << 21); + + return push_inst(compiler, inst); + } + + inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0); + + if (type & SLJIT_MEM_POST) + inst ^= (1 << 24); + else + inst |= (1 << 21); + + if (is_type1_transfer) { + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | (sljit_ins)memw); + } + + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_ALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1)); +} + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_s32 mem = *mem_ptr; + sljit_uw imm; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7)); + } + + if (SLJIT_UNLIKELY(!(mem & REG_MASK))) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, (sljit_uw)memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw)); + + if (imm != 0) + return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm); + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); +} + +static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) +{ + freg += freg & 0x1; + + SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)); + + if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS) + freg--; + + return freg; +} + +#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (!(srcdst & SLJIT_MEM)) { + if (reg_size == 4) + srcdst = simd_get_quad_reg_index(srcdst); + + if (type & SLJIT_SIMD_STORE) + ins = VD(srcdst) | VN(freg) | VM(freg); + else + ins = VD(freg) | VN(srcdst) | VM(srcdst); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VORR | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg) + | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + ins |= 0x10; + else if (alignment >= 3) + ins |= 0x20; + + return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 1 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe00; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x800; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 5); + } + break; + default: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x200; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x400; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x600; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc00; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value = ~value; + result = (1 << 5); + } + break; + } + + return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src == SLJIT_IMM && srcw == 0) + return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg)); + + if (SLJIT_UNLIKELY(elem_size == 3)) { + SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); + src = freg; + } else if (freg != src) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + return SLJIT_SUCCESS; + } + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)(elem_size << 6); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 5; + + return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(elem_size == 2); + ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]); + } + + if (src == SLJIT_IMM) { + if (elem_size < 2) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + if (reg_size == 4) + imm |= (sljit_ins)1 << 6; + + return push_inst(compiler, VMOV_i | imm | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + src = TMP_REG1; + } + + switch (elem_size) { + case 0: + ins = 1 << 22; + break; + case 1: + ins = 1 << 5; + break; + default: + ins = 0; + break; + } + + if (reg_size == 4) + ins |= (sljit_ins)1 << 21; + + return push_inst(compiler, VDUP | ins | VN(freg) | RD(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { + if (lane_index == 1) + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (srcdst != freg) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VMOV_i | VD(freg)); + } + + if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { + FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg))); + srcdst = TMP_FREG2; + srcdstw = 0; + } + } + + FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg))); + } + + if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { + lane_index -= (0x8 >> elem_size); + freg += SLJIT_QUAD_OTHER_HALF(freg); + } + + if (srcdst & SLJIT_MEM) { + if (elem_size == 3) + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + lane_index = lane_index << elem_size; + ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (type & SLJIT_SIMD_STORE) + return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg)); + return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst)); + } + + if (type & SLJIT_SIMD_STORE) { + if (freg_ebit_map[freg] == 0) { + if (lane_index == 1) + freg = SLJIT_F64_SECOND(freg); + + return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg)); + } + + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1))); + return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1)); + } + + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1))); + return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 2) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw)); + srcdst = TMP_REG1; + } + + if (elem_size == 0) + ins = 0x400000; + else if (elem_size == 1) + ins = 0x20; + else + ins = 0; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5)); + + if (type & SLJIT_SIMD_STORE) { + ins |= (1 << 20); + + if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED)) + ins |= (1 << 23); + } + + return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + src = simd_get_quad_reg_index(src); + + if (src_lane_index >= (0x8 >> elem_size)) { + src_lane_index -= (0x8 >> elem_size); + src += SLJIT_QUAD_OTHER_HALF(src); + } + } + + if (elem_size == 3) { + if (freg != src) + FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)); + return SLJIT_SUCCESS; + } + + ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 dst_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf)); + else + FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf)); + src = freg; + } else if (reg_size == 4) + src = simd_get_quad_reg_index(src); + + if (!(type & SLJIT_SIMD_FLOAT)) { + dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + + do { + FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24)) + | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src))); + src = dst_reg; + } while (++elem_size < elem2_size); + + if (dst_reg == TMP_FREG2) + return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2)); + return SLJIT_SUCCESS; + } + + /* No SIMD variant, must use VFP instead. */ + SLJIT_ASSERT(reg_size == 4); + + if (freg == src) { + freg += SLJIT_QUAD_OTHER_HALF(freg); + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20)); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)); + } + + FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src))); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x243219; + ins = VSHR | (1 << 24) | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x243219 : 0x2231; + ins = VSHR | (1 << 24) | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x2231 : 0x21; + ins = VSHR | (1 << 24) | (0x21 << 16); + break; + default: + imms = 0x21; + ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7); + break; + } + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + ins |= (sljit_ins)1 << 6; + } + + SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2))); + + ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0; + + while (imms >= 0x100) { + FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2))); + imms >>= 8; + } + + FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2))); + + if (reg_size == 4 && elem_size == 0) { + SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]); + FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1))); + FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7))); + } + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND; + break; + case SLJIT_SIMD_OP2_OR: + ins = VORR; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VEOR; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + dst_freg = simd_get_quad_reg_index(dst_freg); + src1_freg = simd_get_quad_reg_index(src1_freg); + src2_freg = simd_get_quad_reg_index(src2_freg); + ins |= (sljit_ins)1 << 6; + } + + return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); +} + +#undef FPU_LOAD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_u32 ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LDREXB; + break; + case SLJIT_MOV_U16: + ins = LDREXH; + break; + default: + ins = LDREX; + break; + } + + return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_u32 ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = STREXB; + break; + case SLJIT_MOV_U16: + ins = STREXH; + break; + default: + ins = STREX; + break; + } + + FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg))); + if (op & SLJIT_SET_ATOMIC_STORED) + return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value)); + compiler->patches++; +#else /* !SLJIT_CONFIG_ARM_V6 */ + PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); + compiler->patches++; +#else /* !SLJIT_CONFIG_ARM_V6 */ + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + compiler->size += 1; +#endif /* SLJIT_CONFIG_ARM_V7 */ + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + set_jump_addr(addr, executable_offset, new_target, 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1); +} diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c new file mode 100644 index 0000000..5331ebd --- /dev/null +++ b/src/sljit/sljitNativeARM_64.c @@ -0,0 +1,3491 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "ARM-64" SLJIT_CPUINFO; +} + +/* Length of an instruction word */ +typedef sljit_u32 sljit_ins; + +#define TMP_ZERO (0) + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* r18 - platform register, currently not used */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 +}; + +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RT(rt) ((sljit_ins)reg_map[rt]) +#define RN(rn) ((sljit_ins)reg_map[rn] << 5) +#define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10) +#define RM(rm) ((sljit_ins)reg_map[rm] << 16) +#define VD(vd) ((sljit_ins)freg_map[vd]) +#define VT(vt) ((sljit_ins)freg_map[vt]) +#define VT2(vt) ((sljit_ins)freg_map[vt] << 10) +#define VN(vn) ((sljit_ins)freg_map[vn] << 5) +#define VM(vm) ((sljit_ins)freg_map[vm] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDE 0x8b200000 +#define ADDI 0x91000000 +#define ADR 0x10000000 +#define ADRP 0x90000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define AND_v 0x0e201c00 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CAS 0xc8a07c00 +#define CASB 0x08a07c00 +#define CASH 0x48a07c00 +#define CBZ 0xb4000000 +#define CCMPI 0xfa400800 +#define CLZ 0xdac01000 +#define CSEL 0x9a800000 +#define CSINC 0x9a800400 +#define DUP_e 0x0e000400 +#define DUP_g 0x0e000c00 +#define EOR 0xca000000 +#define EOR_v 0x2e201c00 +#define EORI 0xd2000000 +#define EXTR 0x93c00000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCSEL 0x1e600c00 +#define FCVT 0x1e224000 +#define FCVTL 0x0e217800 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMOV_R 0x9e660000 +#define FMOV_I 0x1e601000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define INS 0x4e001c00 +#define INS_e 0x6e000400 +#define LD1 0x0c407000 +#define LD1_s 0x0d400000 +#define LD1R 0x0d40c000 +#define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 +#define LDRI_POST 0xf8400400 +#define LDP 0xa9400000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 +#define LDR_PRE 0xf8400c00 +#define LDXR 0xc85f7c00 +#define LDXRB 0x085f7c00 +#define LDXRH 0x485f7c00 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVI 0x0f000400 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORR_v 0x0ea01c00 +#define ORRI 0xb2000000 +#define RBIT 0xdac00000 +#define RET 0xd65f0000 +#define REV 0xdac00c00 +#define REV16 0xdac00400 +#define RORV 0x9ac02c00 +#define SBC 0xda000000 +#define SBFM 0x93400000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMOV 0x0e002c00 +#define SMULH 0x9b403c00 +#define SSHLL 0x0f00a400 +#define ST1 0x0c007000 +#define ST1_s 0x0d000000 +#define STP 0xa9000000 +#define STP_F64 0x6d000000 +#define STP_PRE 0xa9800000 +#define STRB 0x38206800 +#define STRBI 0x39000000 +#define STRI 0xf9000000 +#define STRI_F64 0xfd000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define STURBI 0x38000000 +#define STXR 0xc8007c00 +#define STXRB 0x8007c00 +#define STXRH 0x48007c00 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define TBZ 0x36000000 +#define UBFM 0xd3400000 +#define UCVTF 0x9e630000 +#define UDIV 0x9ac00800 +#define UMOV 0x0e003c00 +#define UMULH 0x9bc03c00 +#define USHLL 0x2f00a400 +#define USHR 0x2f000400 +#define USRA 0x2f001400 +#define XTN 0x0e212800 + +#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO)) +#define LDR (STRI | (1 << 22)) +#define LDRB (STRBI | (1 << 22)) +#define LDRH (LDRB | (1 << 30)) +#define MOV (ORR | RN(TMP_ZERO)) + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; + + if (jump->flags & IS_COND) { + diff += SSIZE_OF(ins); + if (diff <= 0xfffff && diff >= -0x100000) { + *(--code_ptr) ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; + jump->flags |= PATCH_COND; + jump->addr -= sizeof(sljit_ins); + return code_ptr; + } + diff -= SSIZE_OF(ins); + } + + if (diff <= 0x7ffffff && diff >= -0x8000000) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (4 << 5); + jump->flags |= PATCH_B; + return code_ptr; + } + + if (target_addr < 0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (2 << 5); + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; + } + + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (2 << 5); + jump->flags |= PATCH_B32; + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; + } + + if (target_addr < 0x1000000000000l) { + if (jump->flags & IS_COND) + code_ptr[-1] -= (1 << 5); + jump->flags |= PATCH_ABS48; + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; + } + +exit: + jump->flags |= PATCH_ABS64; + code_ptr[4] = code_ptr[0]; + return code_ptr + 4; +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff <= 0xfffff && diff >= -0x100000) { + jump->flags |= PATCH_B; + return 0; + } + + if (diff <= 0xfffff000l && diff >= -0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_B32; + return 1; + } + + if (addr < 0x100000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + return 1; + } + + if (addr < 0x1000000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; + return 2; + } + + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS64; + return 3; +} + +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_sw addr = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + sljit_ins* buf_ptr = (sljit_ins*)jump->addr; + sljit_u32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + if (!(jump->flags & JUMP_MOV_ADDR)) { + if (jump->flags & PATCH_COND) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); + return; + } + + if (jump->flags & PATCH_B) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); + return; + } + + dst = (buf_ptr[0] >> 5) & 0x1f; + + if (jump->flags & PATCH_B32) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xfffff000l && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } + } else { + dst = *buf_ptr; + + if (jump->flags & PATCH_B) { + addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT(addr <= 0xfffff && addr >= -0x100000); + buf_ptr[0] = ADR | (((sljit_ins)addr & 0x3) << 29) | (((sljit_ins)(addr >> 2) & 0x7ffff) << 5) | dst; + return; + } + + if (jump->flags & PATCH_B32) { + addr -= ((sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) & ~(sljit_sw)0xfff; + SLJIT_ASSERT(addr <= 0xffffffffl && addr >= -0x100000000l); + buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst; + buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10); + return; + } + } + + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); + + buf_ptr[0] = MOVZ | (((sljit_ins)addr & 0xffff) << 5) | dst; + buf_ptr[1] = MOVK | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21) | dst; + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21) | dst; + + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | ((sljit_ins)((sljit_uw)addr >> 48) << 5) | (3 << 21) | dst; +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x100000000l) + total_size = 3; + else if (jump->u.target < 0x1000000000000l) + total_size = 4; + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (0xfffff / SSIZE_OF(ins)) && (diff + 1) >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0x7ffffff / SSIZE_OF(ins)) && diff >= (-0x8000000 / SSIZE_OF(ins))) + total_size = 1; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 3; + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xfffff / SSIZE_OF(ins)) && diff >= (-0x100000 / SSIZE_OF(ins))) + total_size = 0; + else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < 0x100000000l) + total_size = 1; + else if (jump->u.target < 0x1000000000000l) + total_size = 2; + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + sljit_sw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_COND) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + generate_jump_or_mov_addr(jump, executable_offset); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_SIMD: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: + return 1; + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 2; + } + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define COUNT_TRAILING_ZERO(value, result) \ + result = 0; \ + if (!(value & 0xffffffff)) { \ + result += 32; \ + value >>= 32; \ + } \ + if (!(value & 0xffff)) { \ + result += 16; \ + value >>= 16; \ + } \ + if (!(value & 0xff)) { \ + result += 8; \ + value >>= 8; \ + } \ + if (!(value & 0xf)) { \ + result += 4; \ + value >>= 4; \ + } \ + if (!(value & 0x3)) { \ + result += 2; \ + value >>= 2; \ + } \ + if (!(value & 0x1)) { \ + result += 1; \ + value >>= 1; \ + } + +#define LOGICAL_IMM_CHECK (sljit_ins)0x100 + +static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len) +{ + sljit_s32 negated; + sljit_u32 ones, right; + sljit_uw mask, uimm; + sljit_ins ins; + + if (len & LOGICAL_IMM_CHECK) { + len &= ~LOGICAL_IMM_CHECK; + if (len == 32 && (imm == 0 || imm == -1)) + return 0; + if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) + return 0; + } + + SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) + || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); + + uimm = (sljit_uw)imm; + while (1) { + if (len <= 0) { + SLJIT_UNREACHABLE(); + return 0; + } + + mask = ((sljit_uw)1 << len) - 1; + if ((uimm & mask) != ((uimm >> len) & mask)) + break; + len >>= 1; + } + + len <<= 1; + + negated = 0; + if (uimm & 0x1) { + negated = 1; + uimm = ~uimm; + } + + if (len < 64) + uimm &= ((sljit_uw)1 << len) - 1; + + /* Unsigned right shift. */ + COUNT_TRAILING_ZERO(uimm, right); + + /* Signed shift. We also know that the highest bit is set. */ + imm = (sljit_sw)~uimm; + SLJIT_ASSERT(imm < 0); + + COUNT_TRAILING_ZERO(imm, ones); + + if (~imm) + return 0; + + if (len == 64) + ins = 1 << 22; + else + ins = (0x3f - ((len << 1) - 1)) << 10; + + if (negated) + return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); + + return ins | ((ones - 1) << 10) | ((len - right) << 16); +} + +#undef COUNT_TRAILING_ZERO + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) +{ + sljit_uw imm = (sljit_uw)simm; + sljit_u32 i, zeros, ones, first; + sljit_ins bitmask; + + /* Handling simple immediates first. */ + if (imm <= 0xffff) + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5)); + + if (simm < 0 && simm >= -0x10000) + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); + + if (imm <= 0xffffffffl) { + if ((imm & 0xffff) == 0) + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21)); + if ((imm & 0xffff0000l) == 0xffff0000) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + + bitmask = logical_imm(simm, 16); + if (bitmask != 0) + return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); + + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + } + + bitmask = logical_imm(simm, 32); + if (bitmask != 0) + return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); + + if (simm < 0 && simm >= -0x100000000l) { + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + } + + /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ + + zeros = 0; + ones = 0; + for (i = 4; i > 0; i--) { + if ((simm & 0xffff) == 0) + zeros++; + if ((simm & 0xffff) == 0xffff) + ones++; + simm >>= 16; + } + + simm = (sljit_sw)imm; + first = 1; + if (ones > zeros) { + simm = ~simm; + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; + } + + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define INT_OP 0x0040000 +#define SET_FLAGS 0x0080000 +#define UNUSED_RETURN 0x0100000 + +#define CHECK_FLAGS(flag_bits) \ + if (flags & SET_FLAGS) { \ + inv_bits |= flag_bits; \ + if (flags & UNUSED_RETURN) \ + dst = TMP_ZERO; \ + } + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0; + sljit_ins inst_bits; + sljit_s32 op = (flags & 0xffff); + sljit_s32 reg; + sljit_sw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (op) { + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + case SLJIT_ADDC: + case SLJIT_SUBC: + case SLJIT_MUL: + case SLJIT_MULADD: + /* No form with immediate operand (except imm 0, which + is represented by a ZERO register). */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) + break; + imm = -imm; + /* Fall through. */ + case SLJIT_ADD: + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + + if (imm == 0) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); + } + if (imm > 0 && imm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10)); + } + nimm = -imm; + if (nimm > 0 && nimm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10)); + } + if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)); + } + if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)); + } + if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10)); + } + if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10)); + } + break; + case SLJIT_AND: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); + case SLJIT_XOR: + if (imm == -1) { + FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg))); + goto set_flags; + } + /* fallthrough */ + case SLJIT_OR: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + if (op == SLJIT_OR) + inst_bits |= ORRI; + else + inst_bits |= EORI; + FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); + goto set_flags; + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ARG1_IMM) + break; + + if (flags & INT_OP) { + imm &= 0x1f; + inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10); + } else { + imm &= 0x3f; + inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10); + } + + inv_bits |= inv_bits >> 9; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); + goto set_flags; + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ARG1_IMM) + break; + + inv_bits |= inv_bits >> 9; + if (op >= SLJIT_ASHR) + inv_bits |= 1 << 30; + + if (flags & INT_OP) { + imm &= 0x1f; + inst_bits = ((sljit_ins)imm << 16) | (31 << 10); + } else { + imm &= 0x3f; + inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10); + } + + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); + goto set_flags; + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ARG1_IMM) + break; + + if (op == SLJIT_ROTL) + imm = -imm; + + imm &= (flags & INT_OP) ? 0x1f : 0x3f; + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10)); + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + if (arg2 == 0) + arg2 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + } + else { + if (arg1 == 0) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + } + + /* Both arguments are registers. */ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, MOV | RD(dst) | RM(arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + /* fallthrough */ + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2)); + case SLJIT_MOV_S32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_CTZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2))); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst)); + case SLJIT_REV: + SLJIT_ASSERT(arg1 == TMP_REG1); + inv_bits |= inv_bits >> 21; + return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2))); + if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16)) + return SLJIT_SUCCESS; + inv_bits |= inv_bits >> 9; + return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2))); + if (op == SLJIT_REV_U32 || dst == TMP_REG1) + return SLJIT_SUCCESS; + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10)); + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); + if (flags & INT_OP) { + FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + } + FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); + FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + case SLJIT_AND: + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_OR: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_SHL: + case SLJIT_MSHL: + FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_LSHR: + case SLJIT_MLSHR: + FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_ASHR: + case SLJIT_MASHR: + FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_ROTL: + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2))); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_ROTR: + return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(dst)); + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + +set_flags: + if (flags & SET_FLAGS) + return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); + return SLJIT_SUCCESS; +} + +#define STORE 0x10 +#define SIGNED 0x20 + +#define BYTE_SIZE 0x0 +#define HALF_SIZE 0x1 +#define INT_SIZE 0x2 +#define WORD_SIZE 0x3 + +#define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_u32 type = (shift << 30); + + if (!(flags & STORE)) + type |= (flags & SIGNED) ? 0x00800000 : 0x00400000; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (argw == 0 || argw == shift) + return push_inst(compiler, STRB | type | RT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); + } + + arg &= REG_MASK; + + if (!arg) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + + if ((argw & ((1 << shift) - 1)) == 0) { + if (argw >= 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + } else if (argw < -256 && argw >= -0xfff000) { + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10))); + argw = ((0x1000 + argw) & 0xfff) >> shift; + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + } + + if (argw <= 0xff && argw >= -0x100) + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); + + if (((argw + 0x100) & 0xfff) <= 0x1ff && argw <= 0xfff0ff && argw >= -0xfff100) { + if (argw >= 0) { + if (argw & 0x100) + argw += 0x1000; + + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } else { + if (!(argw & 0x100)) + argw -= 0x1000; + + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg)); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 prev, fprev, saved_regs_size, i, tmp; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_ins offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= 512) { + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size = 0; + } else { + saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf; + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10))); + offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size -= saved_regs_size; + SLJIT_ASSERT(local_size > 0); + } + + prev = -1; + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + fprev = -1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + +#ifdef _WIN32 + if (local_size > 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); +#endif /* _WIN32 */ + + if (!(options & SLJIT_ENTER_REG_ARG)) { + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp))); + saved_arg_count++; + } + tmp++; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + } + +#ifdef _WIN32 + if (local_size > 4096) { + if (local_size < 4 * 4096) { + /* No need for a loop. */ + + if (local_size >= 2 * 4096) { + if (local_size >= 3 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } + + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } + } + else { + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10))); + FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); + } + + local_size &= 0xfff; + + if (local_size > 0) + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + else + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } + + if (local_size > 0) { + if (local_size <= 512) + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + else { + if (local_size >= 4096) + local_size = (1 << (22 - 10)); + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } + } + +#else /* !_WIN32 */ + + /* The local_size does not include saved registers size. */ + if (local_size != 0) { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + if (local_size > 512 || local_size == 0) { + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + } + +#endif /* _WIN32 */ + + return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 local_size, prev, fprev, i, tmp; + sljit_ins offs; + + local_size = compiler->local_size; + + if (!is_return_to) { + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else { + if (local_size > 512 && local_size <= 512 + 248) { + FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0)); + } + + if (local_size > 512) { + local_size -= 512; + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) + | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + local_size = 512; + } + + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + prev = -1; + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + fprev = -1; + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + /* This and the next call/jump instruction can be executed parallelly. */ + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + return push_inst(compiler, RET | RN(TMP_LR)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BRK); + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags, mem_flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + /* Both operands are registers. */ + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) + return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + mem_flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + mem_flags = BYTE_SIZE; + if (src == SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + mem_flags = BYTE_SIZE | SIGNED; + if (src == SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + mem_flags = HALF_SIZE; + if (src == SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + mem_flags = HALF_SIZE | SIGNED; + if (src == SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + case SLJIT_MOV_U32: + mem_flags = INT_SIZE; + if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + mem_flags = INT_SIZE | SIGNED; + if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; + break; + default: + SLJIT_UNREACHABLE(); + mem_flags = 0; + break; + } + + if (src == SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (!(src & SLJIT_MEM)) + dst_r = src; + else + FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG2)); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + + switch (op) { + case SLJIT_REV_U16: + case SLJIT_REV_S16: + mem_flags = HALF_SIZE; + break; + case SLJIT_REV_U32: + case SLJIT_REV_S32: + mem_flags = INT_SIZE; + break; + default: + mem_flags = WORD_SIZE; + + if (op_flags & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + break; + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); + src = TMP_REG2; + } + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags, mem_flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == TMP_REG2) + flags |= UNUSED_RETURN; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 == SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + + if (src2 == SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_ins inv_bits, imm; + sljit_s32 is_left; + sljit_sw mask; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + inv_bits = (op & SLJIT_32) ? W_OP : 0; + + if (src3 == SLJIT_IMM) { + mask = inv_bits ? 0x1f : 0x3f; + src3w &= mask; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) + src3w = (src3w ^ mask) + 1; + + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg) + | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3))); + src3 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + /* Shift left/right by 1. */ + if (is_left) + imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22))); + else + imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22))); + + FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm)); + + /* Set imm to mask. */ + imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm)); + + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3))); + + FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2))); + return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1)); + + return push_inst(compiler, RET | RN(TMP_LR)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4); + + /* The reg_map[op] should provide the appropriate constant. */ + if (op == SLJIT_PREFETCH_L1) + op = 1; + else if (op == SLJIT_PREFETCH_L2) + op = 3; + else if (op == SLJIT_PREFETCH_L3) + op = 5; + else + op = 2; + + /* Signed word sized load is the prefetch instruction. */ + return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_r = TMP_LR; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR)); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2)); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_ins type = (shift << 30); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & STORE)) + type |= 0x00400000; + + if (arg & OFFS_REG_MASK) { + argw &= 3; + if (argw == 0 || argw == shift) + return push_inst(compiler, STR_FR | type | VT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2)); + } + + arg &= REG_MASK; + + if (!arg) { + FAIL_IF(load_immediate(compiler, TMP_REG2, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); + } + + if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG2) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10)); + } + } + + if (argw <= 255 && argw >= -256) + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); + + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG2)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) + inv_bits |= W_OP; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2))); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO))); + return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1))); + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1))); + FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1))); + return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r)); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + sljit_u32 exp; + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16)); + + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) + return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm)); + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + sljit_uw exp; + union { + sljit_uw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16); + + if ((u.imm << (64 - 48)) == 0) { + exp = (u.imm >> (52 + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) + return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm)); + return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16); + else + inst = FMOV_R | VN(freg) | RD(reg); + + if (op & SLJIT_32) + inst ^= W_OP | (1 << 22); + + return push_inst(compiler, inst); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return 0x1; + + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0x0; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + + case SLJIT_LESS: + return 0x2; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x3; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x9; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x8; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xa; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xb; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xd; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xc; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x0; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x7; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x1; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x6; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x5; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x4; + + default: + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); + } else if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; + + SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); + ADJUST_LOCAL_OFFSET(src, srcw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_CBZ | IS_COND; + + if (src & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + else if (src == SLJIT_IMM) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + SLJIT_ASSERT(FAST_IS_REG(src)); + + if ((type & 0xff) == SLJIT_EQUAL) + inv_bits |= 1 << 24; + + PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG2))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src != SLJIT_IMM) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); + src = TMP_REG2; + } + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, src_r, flags, mem_flags; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + cc = get_cc(compiler, type); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (GET_OPCODE(op) < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); + + if (dst & SLJIT_MEM) { + mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE; + return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2); + } + + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + src_r = dst; + + if (dst & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1)); + src_r = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + FAIL_IF(load_immediate(compiler, TMP_REG2, src1w)); + src1 = TMP_REG2; + } else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src1, src1w, TMP_REG2)); + src1 = TMP_REG2; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + if (!(mem & REG_MASK)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8)); + + mem = SLJIT_MEM1(TMP_REG1); + memw &= 0x1f8; + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10))); + + mem = SLJIT_MEM1(TMP_REG1); + memw = 0; + } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) { + inst = ADDI; + + if (memw < 0) { + /* Remains negative for integer min. */ + memw = -memw; + inst = SUBI; + } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) { + if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI; + + FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + if ((sljit_uw)memw <= 0xfff) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10))); + memw = 0; + } else if ((sljit_uw)memw <= 0xffffff) { + FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10))); + + if ((memw & 0xe07) != 0) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10))); + memw = 0; + } else { + memw &= 0xfff; + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + + if (inst == SUBI) + memw = -memw; + } + + SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200); + return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 sign = 0, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S8: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U8: + inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S16: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U16: + inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S32: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U32: + case SLJIT_MOV32: + inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; + break; + default: + SLJIT_UNREACHABLE(); + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + inst |= sign ? 0x00800000 : 0x00400000; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x800; + + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = STUR_FI | 0x80000400; + + if (!(type & SLJIT_32)) + inst |= 0x40000000; + + if (!(type & SLJIT_MEM_STORE)) + inst |= 0x00400000; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x800; + + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); +} + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_ins ins; + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG2; + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); + } + + if (!(mem & REG_MASK)) { + *mem_ptr = TMP_REG2; + return load_immediate(compiler, TMP_REG2, memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG2; + + if (memw < -0xffffff || memw > 0xffffff) { + FAIL_IF(load_immediate(compiler, TMP_REG2, memw)); + return push_inst(compiler, ADD | RD(TMP_REG2) | RN(TMP_REG2) | RM(mem)); + } + + ins = ADDI; + + if (memw < 0) { + memw = -memw; + ins = SUBI; + } + + if (memw > 0xfff) { + FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG2) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); + + memw &= 0xfff; + if (memw == 0) + return SLJIT_SUCCESS; + + mem = TMP_REG2; + } + + return push_inst(compiler, ins | RD(TMP_REG2) | RN(mem) | ((sljit_ins)memw << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = VD(srcdst) | VN(freg) | VM(freg); + else + ins = VD(freg) | VN(srcdst) | VM(srcdst); + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ORR_v | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1; + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg)); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 2 && (sljit_u32)value == (value >> 32)) { + elem_size = 2; + value = (sljit_u32)value; + } + + if (elem_size == 2 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe000; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x8000; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 29); + } + break; + case 2: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x0000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x2000; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x4000; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x6000; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc000; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffffffff; + result = (1 << 29); + } + break; + default: + return ~(sljit_ins)0; + } + + return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)elem_size << 10; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, LD1R | ins | RN(src) | VT(freg)); + } + + ins = (sljit_ins)1 << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg)); + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); + } + + if (src == SLJIT_IMM) { + if (elem_size < 3) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + imm |= ins & ((sljit_ins)1 << 30); + + return push_inst(compiler, MOVI | imm | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; + } + + return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30); + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size == 3) + ins = 0x8400; + else if (elem_size == 0) + ins = 0; + else + ins = (sljit_ins)0x2000 << elem_size; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10)); + + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (type & SLJIT_SIMD_STORE) + ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg); + else + ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst); + + return push_inst(compiler, ins); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 3) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG2, srcdstw)); + srcdst = TMP_REG2; + } + + if (type & SLJIT_SIMD_STORE) { + ins = RD(srcdst) | VN(freg); + + if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) { + ins |= SMOV; + + if (!(type & SLJIT_32)) + ins |= (sljit_ins)1 << 30; + } else + ins |= UMOV; + } else + ins = INS | VD(freg) | RN(srcdst); + + if (elem_size == 3) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg))); + else + FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg))); + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(reg_size == 4); + return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src)); + } + + do { + FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL) + | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x643219; + ins = USHR | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x643219 : 0x6231; + ins = USHR | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x6231 : 0x61; + ins = USHR | (0x21 << 16); + break; + default: + imms = 0x61; + ins = USHR | (0x41 << 16); + break; + } + + if (reg_size == 4) + ins |= (1 << 30); + + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + if (imms >= 0x100) { + ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0; + + do { + FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + imms >>= 8; + } while (imms >= 0x100); + } + + FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + ins = (0x1 << 16); + + if (reg_size == 4 && elem_size == 0) { + FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1))); + ins = (0x2 << 16); + } + + FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1))); + + if (dst_r == TMP_REG2) + return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG2, dst, dstw, TMP_REG1); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = AND_v; + break; + case SLJIT_SIMD_OP2_OR: + ins = ORR_v; + break; + case SLJIT_SIMD_OP2_XOR: + ins = EOR_v; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDR ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = LDRH; + break; + case SLJIT_MOV_U8: + ins = LDRB; + break; + default: + ins = LDR; + break; + } +#else /* !__ARM_FEATURE_ATOMICS */ + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = LDXRB; + break; + case SLJIT_MOV_U16: + ins = LDXRH; + break; + default: + ins = LDXR; + break; + } +#endif /* ARM_FEATURE_ATOMICS */ + return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + sljit_s32 tmp = temp_reg; + sljit_ins cmp = 0; + sljit_ins inv_bits = W_OP; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBS ^ W_OP) | RD(TMP_ZERO); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = CAS ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = CASH; + break; + case SLJIT_MOV_U8: + ins = CASB; + break; + default: + ins = CAS; + inv_bits = 0; + if (cmp) + cmp ^= W_OP; + break; + } + + if (cmp) { + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1))); + tmp = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg))); + if (!cmp) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg))); + FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp))); + return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO)); +#else /* !__ARM_FEATURE_ATOMICS */ + SLJIT_UNUSED_ARG(tmp); + SLJIT_UNUSED_ARG(inv_bits); + + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBI ^ W_OP) | (1 << 29); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = STXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = STXRB; + break; + case SLJIT_MOV_U16: + ins = STXRH; + break; + default: + ins = STXR; + break; + } + + FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg))); + return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_ATOMICS */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + sljit_s32 dst_reg; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + /* Not all instruction forms support accessing SP register. */ + if (offset <= 0xffffff && offset >= -0xffffff) { + ins = ADDI; + if (offset < 0) { + offset = -offset; + ins = SUBI; + } + + if (offset <= 0xfff) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10))); + else { + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + + offset &= 0xfff; + if (offset != 0) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10))); + } + } + else { + FAIL_IF(load_immediate (compiler, dst_reg, offset)); + /* Add extended register form. */ + FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg))); + } + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(push_inst(compiler, RD(dst_r))); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 1); + + compiler->size += 3; + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins* inst = (sljit_ins*)addr; + sljit_u32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + + dst = inst[0] & 0x1f; + SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); + inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c new file mode 100644 index 0000000..799954a --- /dev/null +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -0,0 +1,4302 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#ifdef __SOFTFP__ + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp"; +#else + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp"; +#endif +} + +/* Length of an instruction word. */ +typedef sljit_u32 sljit_ins; + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, + 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6 +}; + +static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1 +}; + +#define COPY_BITS(src, from, to, bits) \ + ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to)) + +#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm)) + +/* Thumb16 encodings. */ +#define RD3(rd) ((sljit_ins)reg_map[rd]) +#define RN3(rn) ((sljit_ins)reg_map[rn] << 3) +#define RM3(rm) ((sljit_ins)reg_map[rm] << 6) +#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8) +#define IMM3(imm) ((sljit_ins)imm << 6) +#define IMM8(imm) ((sljit_ins)imm) + +/* Thumb16 helpers. */ +#define SET_REGS44(rd, rn) \ + (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4)) +#define IS_2_LO_REGS(reg1, reg2) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) +#define IS_3_LO_REGS(reg1, reg2, reg3) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) + +/* Thumb32 encodings. */ +#define RM4(rm) ((sljit_ins)reg_map[rm]) +#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) +#define RT4(rt) ((sljit_ins)reg_map[rt] << 12) +#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) + +#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5)) +#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22)) +#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7)) + +#define IMM5(imm) \ + (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) +#define IMM12(imm) \ + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff)) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* dot '.' changed to _ + I immediate form (possibly followed by number of immediate bits). */ +#define ADCI 0xf1400000 +#define ADCS 0x4140 +#define ADC_W 0xeb400000 +#define ADD 0x4400 +#define ADDS 0x1800 +#define ADDSI3 0x1c00 +#define ADDSI8 0x3000 +#define ADDWI 0xf2000000 +#define ADD_SP 0x4485 +#define ADD_SP_I 0xb000 +#define ADD_W 0xeb000000 +#define ADD_WI 0xf1000000 +#define ANDI 0xf0000000 +#define ANDS 0x4000 +#define AND_W 0xea000000 +#define ASRS 0x4100 +#define ASRSI 0x1000 +#define ASR_W 0xfa40f000 +#define ASR_WI 0xea4f0020 +#define BCC 0xd000 +#define BICI 0xf0200000 +#define BKPT 0xbe00 +#define BLX 0x4780 +#define BX 0x4700 +#define CLZ 0xfab0f080 +#define CMNI_W 0xf1100f00 +#define CMP 0x4280 +#define CMPI 0x2800 +#define CMPI_W 0xf1b00f00 +#define CMP_X 0x4500 +#define CMP_W 0xebb00f00 +#define EORI 0xf0800000 +#define EORS 0x4040 +#define EOR_W 0xea800000 +#define IT 0xbf00 +#define LDR 0xf8d00000 +#define LDR_SP 0x9800 +#define LDRD 0xe9500000 +#define LDREX 0xe8500f00 +#define LDREXB 0xe8d00f4f +#define LDREXH 0xe8d00f5f +#define LDRI 0xf8500800 +#define LSLS 0x4080 +#define LSLSI 0x0000 +#define LSL_W 0xfa00f000 +#define LSL_WI 0xea4f0000 +#define LSRS 0x40c0 +#define LSRSI 0x0800 +#define LSR_W 0xfa20f000 +#define LSR_WI 0xea4f0010 +#define MLA 0xfb000000 +#define MOV 0x4600 +#define MOVS 0x0000 +#define MOVSI 0x2000 +#define MOVT 0xf2c00000 +#define MOVW 0xf2400000 +#define MOV_W 0xea4f0000 +#define MOV_WI 0xf04f0000 +#define MUL 0xfb00f000 +#define MVNS 0x43c0 +#define MVN_W 0xea6f0000 +#define MVN_WI 0xf06f0000 +#define NOP 0xbf00 +#define ORNI 0xf0600000 +#define ORRI 0xf0400000 +#define ORRS 0x4300 +#define ORR_W 0xea400000 +#define POP 0xbc00 +#define POP_W 0xe8bd0000 +#define PUSH 0xb400 +#define PUSH_W 0xe92d0000 +#define REV 0xba00 +#define REV_W 0xfa90f080 +#define REV16 0xba40 +#define REV16_W 0xfa90f090 +#define RBIT 0xfa90f0a0 +#define RORS 0x41c0 +#define ROR_W 0xfa60f000 +#define ROR_WI 0xea4f0030 +#define RSB_WI 0xf1c00000 +#define RSBSI 0x4240 +#define SBCI 0xf1600000 +#define SBCS 0x4180 +#define SBC_W 0xeb600000 +#define SDIV 0xfb90f0f0 +#define SMULL 0xfb800000 +#define STR_SP 0x9000 +#define STRD 0xe9400000 +#define STREX 0xe8400000 +#define STREXB 0xe8c00f40 +#define STREXH 0xe8c00f50 +#define SUBS 0x1a00 +#define SUBSI3 0x1e00 +#define SUBSI8 0x3800 +#define SUB_W 0xeba00000 +#define SUBWI 0xf2a00000 +#define SUB_SP_I 0xb080 +#define SUB_WI 0xf1a00000 +#define SXTB 0xb240 +#define SXTB_W 0xfa4ff080 +#define SXTH 0xb200 +#define SXTH_W 0xfa0ff080 +#define TST 0x4200 +#define TSTI 0xf0000f00 +#define TST_W 0xea000f00 +#define UDIV 0xfbb0f0f0 +#define UMULL 0xfba00000 +#define UXTB 0xb2c0 +#define UXTB_W 0xfa5ff080 +#define UXTH 0xb280 +#define UXTH_W 0xfa1ff080 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VAND 0xef000110 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F32_U32 0xeeb80a40 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VDUP 0xee800b10 +#define VDUP_s 0xffb00c00 +#define VEOR 0xff000110 +#define VLD1 0xf9200000 +#define VLD1_r 0xf9a00c00 +#define VLD1_s 0xf9a00000 +#define VLDR_F32 0xed100a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMOV_i 0xef800010 +#define VMOV_s 0xee000b10 +#define VMOVN 0xffb20200 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VORR 0xef200110 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 +#define VSHLL 0xef800a10 +#define VSHR 0xef800010 +#define VSRA 0xef800110 +#define VST1 0xf9000000 +#define VST1_s 0xf9800000 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr; + SLJIT_ASSERT(!(inst & 0xffff0000)); + + ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); + FAIL_IF(!ptr); + *ptr = (sljit_u16)(inst); + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr++ = (sljit_u16)(inst >> 16); + *ptr = (sljit_u16)(inst); + compiler->size += 2; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); +} + +/* Dst must be in bits[11-8] */ +static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm) +{ + inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); + inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); + inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); + inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +{ + sljit_ins dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + set_imm32_const(inst, dst, new_imm); +} + +static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) { + /* Branch to ARM code is not optimized yet. */ + if (!(jump->u.target & 0x1)) + goto exit; + diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset; + } else { + SLJIT_ASSERT(jump->u.label != NULL); + diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2); + } + + if (jump->flags & IS_COND) { + SLJIT_ASSERT(!(jump->flags & IS_BL)); + /* Size of the prefix IT instruction. */ + diff += SSIZE_OF(u16); + if (diff <= 0xff && diff >= -0x100) { + jump->flags |= PATCH_TYPE1; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr - 1; + } + if (diff <= 0xfffff && diff >= -0x100000) { + jump->flags |= PATCH_TYPE2; + jump->addr = (sljit_uw)(code_ptr - 1); + return code_ptr; + } + diff -= SSIZE_OF(u16); + } else if (jump->flags & IS_BL) { + /* Branch and link. */ + if (diff <= 0xffffff && diff >= -0x1000000) { + jump->flags |= PATCH_TYPE5; + return code_ptr + 1; + } + goto exit; + } else if (diff <= 0x7ff && diff >= -0x800) { + jump->flags |= PATCH_TYPE3; + return code_ptr; + } + + if (diff <= 0xffffff && diff >= -0x1000000) { + jump->flags |= PATCH_TYPE4; + return code_ptr + 1; + } + +exit: + code_ptr[4] = code_ptr[0]; + + if (jump->flags & IS_COND) { + code_ptr[3] = code_ptr[-1]; + jump->addr = (sljit_uw)(code_ptr - 1); + } + + return code_ptr + 4; +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + /* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */ + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + /* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */ + + if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) { + jump->flags |= PATCH_TYPE6; + return 1; + } + + return 3; +} + +static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_s32 type = (jump->flags >> 4) & 0xf; + sljit_u16 *jump_inst = (sljit_u16*)jump->addr; + sljit_sw diff; + sljit_ins ins; + + diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + + if (SLJIT_UNLIKELY(type == 0)) { + ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1); + set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff); + return; + } + + if (SLJIT_UNLIKELY(type == 6)) { + SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR); + diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3; + + SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff); + + ins = ADDWI >> 16; + if (diff <= 0) { + diff = -diff; + ins = SUBWI >> 16; + } + + jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff)); + jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1)); + return; + } + + SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR)); + diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + + switch (type) { + case 1: + /* Encoding T1 of 'B' instruction */ + SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); + return; + case 2: + /* Encoding T3 of 'B' instruction */ + SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); + jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); + return; + case 3: + /* Encoding T2 of 'B' instruction */ + SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); + return; + } + + SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000); + + /* Really complex instruction form for branches. Negate with sign bit. */ + diff ^= ((diff >> 2) & 0x600000) ^ 0x600000; + + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1)); + jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1)); + + SLJIT_ASSERT(type == 4 || type == 5); + + /* The others have a common form. */ + if (type == 4) /* Encoding T4 of 'B' instruction */ + jump_inst[1] |= 0x9000; + else /* Encoding T1 of 'BL' instruction */ + jump_inst[1] |= 0xd000; +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2; + + if (jump->flags & IS_COND) { + diff++; + + if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16))) + total_size = 0; + else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16))) + total_size = 1; + diff--; + } else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16))) + total_size = 1; + + if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16))) + total_size = 2; + } + + size_reduce += JUMP_MAX_SIZE - total_size; + } else { + /* Real size minus 1. Unit size: instruction. */ + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16))) + total_size = 1; + } + + size_reduce += 3 - total_size; + } + + jump->flags |= total_size << JUMP_SIZE_SHIFT; + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_u16 *code; + sljit_u16 *code_ptr; + sljit_u16 *buf_ptr; + sljit_u16 *buf_end; + sljit_uw half_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw addr; + sljit_sw executable_offset; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + + code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + half_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_u16*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 1); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr < + ((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16)); + } else { + half_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_sw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)addr; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + half_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == half_count) { + label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + generate_jump_or_mov_addr(jump, executable_offset); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + + /* Set thumb mode flag. */ + return (void*)((sljit_uw)code | 0x1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_F64_AS_F32_PAIR: + case SLJIT_HAS_SIMD: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_SIMD_REGS_ARE_PAIRS: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define INVALID_IMM 0x80000000 +static sljit_uw get_imm(sljit_uw imm) +{ + /* Thumb immediate form. */ + sljit_s32 counter; + + if (imm <= 0xff) + return imm; + + if ((imm & 0xffff) == (imm >> 16)) { + /* Some special cases. */ + if (!(imm & 0xff00)) + return (1 << 12) | (imm & 0xff); + if (!(imm & 0xff)) + return (2 << 12) | ((imm >> 8) & 0xff); + if ((imm & 0xff00) == ((imm & 0xff) << 8)) + return (3 << 12) | (imm & 0xff); + } + + /* Assembly optimization: count leading zeroes? */ + counter = 8; + if (!(imm & 0xffff0000)) { + counter += 16; + imm <<= 16; + } + if (!(imm & 0xff000000)) { + counter += 8; + imm <<= 8; + } + if (!(imm & 0xf0000000)) { + counter += 4; + imm <<= 4; + } + if (!(imm & 0xc0000000)) { + counter += 2; + imm <<= 2; + } + if (!(imm & 0x80000000)) { + counter += 1; + imm <<= 1; + } + /* Since imm >= 128, this must be true. */ + SLJIT_ASSERT(counter <= 31); + + if (imm & 0x00ffffff) + return INVALID_IMM; /* Cannot be encoded. */ + + return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + sljit_uw tmp; + + /* MOVS cannot be used since it destroy flags. */ + + if (imm >= 0x10000) { + tmp = get_imm(imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); + tmp = get_imm(~imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); + } + + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + + /* set hi 16 bit if needed. */ + if (imm >= 0x10000) + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS 0x0100000 +#define UNUSED_RETURN 0x0200000 +#define REGISTER_OP 0x0400000 + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) +{ + /* dst must be register + arg1 must be register, imm + arg2 must be register, imm */ + sljit_s32 reg; + sljit_uw imm, imm2; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates, no temporaries are used. */ + flags &= ~ARG1_IMM; + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (flags & 0xffff) { + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + case SLJIT_MUL: + case SLJIT_MULADD: + /* No form with immediate operand. */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2); + return load_immediate(compiler, dst, imm); + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + imm2 = NEGATE(imm); + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (imm2 <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); + if (imm2 <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (imm2 <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2)); + } + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + if (flags & ARG2_IMM) { + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + } + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) { + if (imm == 0 && IS_2_LO_REGS(reg, dst)) + return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + } + if (flags & UNUSED_RETURN) { + if (imm <= 0xff && reg_map[reg] <= 7) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, CMPI_W | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, CMNI_W | RN4(reg) | imm); + break; + } + imm2 = NEGATE(imm); + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (imm2 <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); + if (imm2 <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (imm2 <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2)); + } + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) + break; + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_AND: + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_OR: + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_XOR: + if (imm == (sljit_uw)-1) { + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg)); + } + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ARG1_IMM) + break; + imm &= 0x1f; + + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } + + switch (flags & 0xffff) { + case SLJIT_SHL: + case SLJIT_MSHL: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_ASHR: + case SLJIT_MASHR: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_ROTL: + imm = (imm ^ 0x1f) + 1; + /* fallthrough */ + default: /* SLJIT_ROTR */ + return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm)); + } + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + imm = arg2; + arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm)); + } else { + imm = arg1; + arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm)); + } + + SLJIT_ASSERT(arg1 != arg2); + } + + /* Both arguments are registers. */ + switch (flags & 0xffff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (dst == (sljit_s32)arg2) + return SLJIT_SUCCESS; + return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG2); + return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)); + case SLJIT_CTZ: + SLJIT_ASSERT(arg1 == TMP_REG2); + FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2))); + return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst)); + case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, REV | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2)); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG2); + + if (IS_2_LO_REGS(dst, arg2)) + FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2))); + else + FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2))); + + if (!(flags & REGISTER_OP)) + return SLJIT_SUCCESS; + + flags &= 0xffff; + if (reg_map[dst] <= 7) + return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst)); + return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst)); + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); + if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) + return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); + return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & UNUSED_RETURN) { + if (IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); + return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2)); + } + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); + return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); + reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2))); + /* cmp TMP_REG2, dst asr #31. */ + return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst)); + case SLJIT_AND: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_OR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_XOR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MSHL: + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; + /* fallthrough */ + case SLJIT_SHL: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MLSHR: + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; + /* fallthrough */ + case SLJIT_LSHR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MASHR: + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f)); + arg2 = (sljit_uw)reg; + /* fallthrough */ + case SLJIT_ASHR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ROTL: + reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2; + FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0)); + arg2 = (sljit_uw)reg; + /* fallthrough */ + case SLJIT_ROTR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MULADD: + compiler->status_flags_state = 0; + return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x04 +#define HALF_SIZE 0x08 +#define PRELOAD 0x0c + +#define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE))) +#define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift)))) + +/* + 1st letter: + w = word + b = byte + h = half + + 2nd letter: + s = signed + u = unsigned + + 3rd letter: + l = load + s = store +*/ + +static const sljit_ins sljit_mem16[12] = { +/* w u l */ 0x5800 /* ldr */, +/* w u s */ 0x5000 /* str */, +/* w s l */ 0x5800 /* ldr */, +/* w s s */ 0x5000 /* str */, + +/* b u l */ 0x5c00 /* ldrb */, +/* b u s */ 0x5400 /* strb */, +/* b s l */ 0x5600 /* ldrsb */, +/* b s s */ 0x5400 /* strb */, + +/* h u l */ 0x5a00 /* ldrh */, +/* h u s */ 0x5200 /* strh */, +/* h s l */ 0x5e00 /* ldrsh */, +/* h s s */ 0x5200 /* strh */, +}; + +static const sljit_ins sljit_mem16_imm5[12] = { +/* w u l */ 0x6800 /* ldr imm5 */, +/* w u s */ 0x6000 /* str imm5 */, +/* w s l */ 0x6800 /* ldr imm5 */, +/* w s s */ 0x6000 /* str imm5 */, + +/* b u l */ 0x7800 /* ldrb imm5 */, +/* b u s */ 0x7000 /* strb imm5 */, +/* b s l */ 0x0000 /* not allowed */, +/* b s s */ 0x7000 /* strb imm5 */, + +/* h u l */ 0x8800 /* ldrh imm5 */, +/* h u s */ 0x8000 /* strh imm5 */, +/* h s l */ 0x0000 /* not allowed */, +/* h s s */ 0x8000 /* strh imm5 */, +}; + +#define MEM_IMM8 0xc00 +#define MEM_IMM12 0x800000 +static const sljit_ins sljit_mem32[13] = { +/* w u l */ 0xf8500000 /* ldr.w */, +/* w u s */ 0xf8400000 /* str.w */, +/* w s l */ 0xf8500000 /* ldr.w */, +/* w s s */ 0xf8400000 /* str.w */, + +/* b u l */ 0xf8100000 /* ldrb.w */, +/* b u s */ 0xf8000000 /* strb.w */, +/* b s l */ 0xf9100000 /* ldrsb.w */, +/* b s s */ 0xf8000000 /* strb.w */, + +/* h u l */ 0xf8300000 /* ldrh.w */, +/* h u s */ 0xf8200000 /* strsh.w */, +/* h s l */ 0xf9300000 /* ldrsh.w */, +/* h s s */ 0xf8200000 /* strsh.w */, + +/* p u l */ 0xf8100000 /* pld */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) +{ + sljit_uw imm; + + if (value >= 0) { + if (value <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm); + } + return SLJIT_ERR_UNSUPPORTED; +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_s32 other_r; + sljit_uw imm, tmp; + + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff)); + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + other_r = OFFS_REG(arg); + arg &= REG_MASK; + + if (!argw && IS_3_LO_REGS(reg, arg, other_r)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4)); + } + + arg &= REG_MASK; + + if (argw > 0xfff) { + imm = get_imm((sljit_uw)(argw & ~0xfff)); + if (imm != INVALID_IMM) { + push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm); + arg = tmp_reg; + argw = argw & 0xfff; + } + } + else if (argw < -0xff) { + tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff); + SLJIT_ASSERT(tmp >= (sljit_uw)-argw); + imm = get_imm(tmp); + + if (imm != INVALID_IMM) { + push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm); + arg = tmp_reg; + argw += (sljit_sw)tmp; + + SLJIT_ASSERT(argw >= 0 && argw <= 0xfff); + } + } + + /* 16 bit instruction forms. */ + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { + tmp = 3; + if (IS_WORD_SIZE(flags)) { + if (ALIGN_CHECK(argw, 0x1f, 2)) + tmp = 2; + } + else if (flags & BYTE_SIZE) + { + if (ALIGN_CHECK(argw, 0x1f, 0)) + tmp = 0; + } + else { + SLJIT_ASSERT(flags & HALF_SIZE); + if (ALIGN_CHECK(argw, 0x1f, 1)) + tmp = 1; + } + + if (tmp < 3) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp))); + } + else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) { + /* SP based immediate. */ + return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2)); + } + + if (argw >= 0 && argw <= 0xfff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw); + else if (argw < 0 && argw >= -0xff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw); + + SLJIT_ASSERT(arg != tmp_reg); + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + if (IS_3_LO_REGS(reg, arg, tmp_reg)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); +} + +#undef ALIGN_CHECK +#undef IS_WORD_SIZE + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size, i, tmp, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_uw offset; + sljit_uw imm = 0; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + /* At least two registers must be set for PUSH_W and one for PUSH instruction. */ + FAIL_IF((imm & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | imm) + : push_inst16(compiler, PUSH | (1 << 8) | imm)); + + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1))); + else if (reg_map[tmp] <= 7) + FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + else + FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw))))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst32(compiler, *(--remap_ptr))); +#endif + +#ifdef _WIN32 + if (local_size >= 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); + + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + if (local_size > 3 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff))); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + local_size &= 0xfff; + } + + if (local_size >= 256) { + SLJIT_ASSERT(local_size < 4096); + + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + } else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size)); +#else /* !_WIN32 */ + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + } +#endif /* _WIN32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + + /* Doubles are saved, so alignment is unaffected. */ + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) +{ + sljit_uw imm2; + + /* The TMP_REG1 register must keep its value. */ + if (imm <= (127u << 2)) + return push_inst16(compiler, ADD_SP_I | (imm >> 2)); + + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm)); + + imm2 = get_imm(imm); + + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2); + + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + return push_inst16(compiler, ADD_SP | RN3(TMP_REG2)); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 restored_reg = 0; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list = 0; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) { + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0); + lr_dst = 0; + frame_size &= ~0x7; + } + + tmp = SLJIT_S0 - compiler->saveds; + i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + if (tmp < i) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i > tmp); + } + + i = compiler->scratches; + if (i >= SLJIT_FIRST_SAVED_REG) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i >= SLJIT_FIRST_SAVED_REG); + } + + if (lr_dst == TMP_REG2 && reg_list == 0) { + reg_list |= (sljit_uw)1 << reg_map[TMP_REG2]; + restored_reg = TMP_REG2; + lr_dst = 0; + } + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + tmp = 0; + if (reg_list != 0) { + tmp = 2; + if (local_size <= 0xfff) { + if (local_size == 0) { + SLJIT_ASSERT(restored_reg != TMP_REG2); + if (frame_size == 0) + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308); + if (frame_size > 2 * SSIZE_OF(sw)) + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw)))); + } + + if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc) + FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2))); + else + FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size)); + tmp = 1; + } else if (frame_size == 0) { + frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw); + tmp = 3; + } + + /* Place for the saved register. */ + if (restored_reg != TMP_REG2) + local_size += SSIZE_OF(sw); + } + + /* Place for the lr register. */ + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (tmp <= 1) + return SLJIT_SUCCESS; + + if (tmp == 2) { + frame_size -= SSIZE_OF(sw); + if (restored_reg != TMP_REG2) + frame_size -= SSIZE_OF(sw); + + if (reg_map[restored_reg] <= 7) + return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2)); + + return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size); + } + + tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308; + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) { + if (lr_dst == TMP_PC) + reg_list |= 1u << 8; + + /* At least one register must be set for POP instruction. */ + SLJIT_ASSERT(reg_list != 0); + + FAIL_IF(push_inst16(compiler, POP | reg_list)); + } else { + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + /* At least two registers must be set for POP_W instruction. */ + SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0); + + FAIL_IF(push_inst32(compiler, POP_W | reg_list)); + } + + if (frame_size > 0) + return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2)); + + if (lr_dst != 0) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD_SP_I | 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator); +extern long long __rt_sdiv(int denominator, int numerator); +#elif defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + sljit_uw saved_reg_list[3]; + sljit_uw saved_reg_count; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst16(compiler, BKPT); + case SLJIT_NOP: + return push_inst16(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); + } + } + +#ifdef _WIN32 + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv)))); +#elif defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); + } + return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + if (src == SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + if (src == SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + if (src == SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + if (src == SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + default: + SLJIT_UNREACHABLE(); + flags = 0; + break; + } + + if (src == SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); + else if (FAST_IS_REG(dst)) + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + else + dst_r = src; + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); + } + + SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0); + flags = WORD_SIZE; + + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16)) + op |= REGISTER_OP; + flags |= HALF_SIZE; + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_reg, src2_tmp_reg, flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + + if (src2 == SLJIT_IMM) + flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1); + src2w = src2_tmp_reg; + } else + src2w = src2; + + if (src1 == SLJIT_IMM) + flags |= ARG1_IMM; + else if (src1 & SLJIT_MEM) { + emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); + src1w = TMP_REG1; + } else + src1w = src1; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + op = GET_OPCODE(op); + is_left = (op == SLJIT_SHL || op == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= 0x1f; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (IS_2_LO_REGS(dst_reg, src1_reg)) + FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6))); + else + FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w))); + + src3w = (src3w ^ 0x1f) + 1; + return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2)); + src3 = TMP_REG2; + } + + if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) { + FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f)); + src3 = TMP_REG2; + } + + if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3)) + FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3))); + else + FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3))); + + FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6))); + FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f)); + FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2))); + return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); + + return push_inst16(compiler, BX | RN3(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 size, dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2)); + break; + case SLJIT_GET_RETURN_ADDRESS: + size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0); + + if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + /* The size of pc is not added above. */ + if ((size & SSIZE_OF(sw)) == 0) + size += SSIZE_OF(sw); + + size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64); + } + + SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1)); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64) + return freg_map[reg]; + + if (type != SLJIT_SIMD_REG_128) + return freg_map[reg] & ~0x1; + + return -1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + if (size == 2) + return push_inst16(compiler, *(sljit_u16*)instruction); + return push_inst32(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FPU_LOAD (1 << 20) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG1; + argw = 0; + } + + if ((arg & REG_MASK) && (argw & 0x3) == 0) { + if (!(argw & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2)); + } + + if (arg & REG_MASK) { + if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg)); + } + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + } + + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); + if (imm != INVALID_IMM) { + argw = -argw; + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + } + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + if (arg & REG_MASK) + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src))); + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_32; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2))); + FAIL_IF(push_inst32(compiler, VMRS)); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8)); + return push_inst16(compiler, CMP /* Rm, Rn = r0 */); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2))); + break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0))); + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1))); + FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0)); + FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8)); + return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r)); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; + + if (exp == 0x20 || exp == 0x1f) { + ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f); + return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ +#if defined(__ARM_NEON) && __ARM_NEON + sljit_u32 exp; + sljit_ins ins; +#endif /* NEON */ + union { + sljit_u32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + +#if defined(__ARM_NEON) && __ARM_NEON + if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) { + exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) { + ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f); + return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf)); + } + } +#endif /* NEON */ + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[0] == u.imm[1]) + return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_s32 reg2; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + + inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg); + } else { + inst = VMOV | VN4(freg) | RT4(reg); + + if (!(op & SLJIT_32)) + inst |= 1 << 7; + } + + if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64) + inst |= 1 << 20; + + return push_inst32(compiler, inst); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return 0x0; + + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0x1; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + + case SLJIT_LESS: + return 0x3; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x2; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x8; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x9; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xb; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xa; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xc; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xd; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x1; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x6; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x0; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x7; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x4; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x5; + + default: /* SLJIT_JUMP */ + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins cc; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + cc = get_cc(compiler, type); + jump->flags |= cc << 8; + PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + } + + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else { + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; + + if (src && FAST_IS_REG(*src)) + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2))); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } + + SLJIT_ASSERT(reg_map[TMP_REG1] == 12); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count--; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + default: + word_arg_offset -= sizeof(sljit_sw); + offset = *(--offset_ptr); + + SLJIT_ASSERT(offset >= word_arg_offset); + + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = (sljit_s32)(1 + (offset >> 2)); + src_offset = offset; + } + FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1))); + } else + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + } + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); + + return SLJIT_SUCCESS; +} + +#else + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset))); + + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset))); + f32_offset = new_offset; + new_offset++; + } + offset++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (src != SLJIT_IMM) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); + } + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1)); + if (type >= SLJIT_FAST_CALL) + return push_inst16(compiler, BLX | RN3(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) + return push_inst16(compiler, BX | RN3(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +#ifdef __SOFTFP__ + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + if (op & SLJIT_32) + return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0)); + return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1)); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (op & SLJIT_32) + return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw); + return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw); +} + +#endif /* __SOFTFP__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + if (reg_map[dst_r] > 7) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); + } else { + /* The movsi (immediate) instruction does not set flags in IT block. */ + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); + } + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (op == SLJIT_AND) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0)); + } + else { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1)); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (!(flags & SLJIT_SET_Z)) + return SLJIT_SUCCESS; + + /* The condition must always be set, even if the ORR/EORI is not executed above. */ + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src2_reg != dst_reg && src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + src2_reg = dst_reg; + type ^= 0x1; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1)); + + if (src2_reg != dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + src1 = TMP_REG1; + src1w = 0; + } + } else if (dst_reg != src2_reg) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg))); + + cc = get_cc(compiler, type & ~SLJIT_32); + + if (src1 != SLJIT_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1)); + } + + tmp = (sljit_uw)src1w; + + if (tmp < 0x10000) { + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); + } + + tmp = get_imm((sljit_uw)src1w); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); + } + + tmp = get_imm(~(sljit_uw)src1w); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); + } + + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); + + tmp = (sljit_uw)src1w; + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst_reg) + | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + type ^= SLJIT_32; + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg))); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w)); + src1 = TMP_FREG2; + } + + FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8)); + return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_uw imm, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) { + if ((mem & REG_MASK) == 0) { + if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) { + imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff)); + + if (imm != INVALID_IMM) + memw = (memw & 0xfff) - 0x1000; + } else { + imm = get_imm((sljit_uw)(memw & ~0xfff)); + + if (imm != INVALID_IMM) + memw &= 0xfff; + } + + if (imm == INVALID_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + memw = 0; + } else + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm)); + + mem = SLJIT_MEM1(TMP_REG1); + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6))); + memw = 0; + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw < -0xff) { + /* Zero value can be included in the first case. */ + if ((-memw & 0xfff) <= SSIZE_OF(sw)) + tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff); + else + tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff); + + SLJIT_ASSERT(tmp >= (sljit_uw)-memw); + imm = get_imm(tmp); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw += (sljit_sw)tmp; + SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw)); + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw >= (0x1000 - SSIZE_OF(sw))) { + if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) { + imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff)); + + if (imm != INVALID_IMM) + memw = (memw & 0xfff) - 0x1000; + } else { + imm = get_imm((sljit_uw)(memw & ~0xfff)); + + if (imm != INVALID_IMM) + memw &= 0xfff; + } + + if (imm != INVALID_IMM) { + SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff); + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } + + flags = WORD_SIZE; + + SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff); + + if (type & SLJIT_MEM_STORE) { + flags |= STORE; + } else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2)); + return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2); + } + + FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2)); + return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2); + } + + flags = 1 << 23; + + if ((mem & REG_MASK) == 0) { + tmp = (sljit_uw)(memw & 0x7fc); + imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm == INVALID_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + memw = 0; + } else { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm)); + memw = (memw & 0x3fc) >> 2; + + if (tmp > 0x400) { + memw = 0x100 - memw; + flags = 0; + } + + SLJIT_ASSERT(memw >= 0 && memw <= 0xff); + } + + mem = SLJIT_MEM1(TMP_REG1); + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6))); + memw = 0; + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw < 0) { + if ((-memw & ~0x3fc) == 0) { + flags = 0; + memw = -memw >> 2; + } else { + tmp = (sljit_uw)(-memw & 0x7fc); + imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw = (-memw & 0x3fc) >> 2; + + if (tmp <= 0x400) + flags = 0; + else + memw = 0x100 - memw; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } + } else if ((memw & ~0x3fc) != 0) { + tmp = (sljit_uw)(memw & 0x7fc); + imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw = (memw & 0x3fc) >> 2; + + if (tmp > 0x400) { + memw = 0x100 - memw; + flags = 0; + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } else + memw >>= 2; + + SLJIT_ASSERT(memw >= 0 && memw <= 0xff); + return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (type & SLJIT_MEM_STORE) + flags |= STORE; + + inst = sljit_mem32[flags] | 0x900; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x400; + + if (memw >= 0) + inst |= 0x200; + else + memw = -memw; + + return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw); +} + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm; + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -0xff) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw < 0) { + imm = get_imm((sljit_uw)(-argw & ~0xff)); + + if (imm) { + *memw = -(-argw & 0xff); + return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else if ((argw & 0xfff) <= max_offset) { + imm = get_imm((sljit_uw)(argw & ~0xfff)); + + if (imm) { + *memw = argw & 0xfff; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else { + imm = get_imm((sljit_uw)((argw | 0xfff) + 1)); + + if (imm) { + *memw = (argw & 0xfff) - 0x1000; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } + } + + imm = (sljit_uw)(argw & ~0xfff); + + if ((argw & 0xfff) > max_offset) { + imm += 0x1000; + *memw = (argw & 0xfff) - 0x1000; + } else + *memw = argw & 0xfff; + + FAIL_IF(load_immediate(compiler, TMP_REG1, imm)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_ALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1)); +} + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_uw imm; + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)); + } + + if (SLJIT_UNLIKELY(!(mem & REG_MASK))) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, (sljit_uw)memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw)); + + if (imm != INVALID_IMM) + return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm); + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem)); +} + +static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg) +{ + freg += freg & 0x1; + + SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)); + + if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS) + freg--; + + return freg; +} + +#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (!(srcdst & SLJIT_MEM)) { + if (reg_size == 4) + srcdst = simd_get_quad_reg_index(srcdst); + + if (type & SLJIT_SIMD_STORE) + ins = VD4(srcdst) | VN4(freg) | VM4(freg); + else + ins = VD4(freg) | VN4(srcdst) | VM4(srcdst); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VORR | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg) + | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8)); + + SLJIT_ASSERT(reg_size >= alignment); + + if (alignment == 3) + ins |= 0x10; + else if (alignment >= 4) + ins |= 0x20; + + return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 1 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe00; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x800; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 5); + } + break; + default: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x200; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x400; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x600; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc00; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd00; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value = ~value; + result = (1 << 5); + } + break; + } + + return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src == SLJIT_IMM && srcw == 0) + return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg)); + + if (SLJIT_UNLIKELY(elem_size == 3)) { + SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw)); + src = freg; + } else if (freg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + return SLJIT_SUCCESS; + } + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)(elem_size << 6); + + if (reg_size == 4) + ins |= 1 << 5; + + return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(elem_size == 2); + ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]); + } + + if (src == SLJIT_IMM) { + if (elem_size < 2) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + if (reg_size == 4) + imm |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VMOV_i | imm | VD4(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + src = TMP_REG1; + } + + switch (elem_size) { + case 0: + ins = 1 << 22; + break; + case 1: + ins = 1 << 5; + break; + default: + ins = 0; + break; + } + + if (reg_size == 4) + ins |= (sljit_ins)1 << 21; + + return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6); + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3 && !(srcdst & SLJIT_MEM)) { + if (lane_index == 1) + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (srcdst != freg) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VMOV_i | VD4(freg)); + } + + if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) { + FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg))); + srcdst = TMP_FREG2; + srcdstw = 0; + } + } + + FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg))); + } + + if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) { + lane_index -= (0x8 >> elem_size); + freg += SLJIT_QUAD_OTHER_HALF(freg); + } + + if (srcdst & SLJIT_MEM) { + if (elem_size == 3) + return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw); + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + lane_index = lane_index << elem_size; + ins = (sljit_ins)((elem_size << 10) | (lane_index << 5)); + return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (type & SLJIT_SIMD_STORE) + return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg)); + return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst)); + } + + if (type & SLJIT_SIMD_STORE) { + if (freg_ebit_map[freg] == 0) { + if (lane_index == 1) + freg = SLJIT_F64_SECOND(freg); + + return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg)); + } + + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1))); + return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1)); + } + + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1))); + return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 2) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw)); + srcdst = TMP_REG1; + } + + if (elem_size == 0) + ins = 0x400000; + else if (elem_size == 1) + ins = 0x20; + else + ins = 0; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5)); + + if (type & SLJIT_SIMD_STORE) { + ins |= (1 << 20); + + if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED)) + ins |= (1 << 23); + } + + return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + src = simd_get_quad_reg_index(src); + + if (src_lane_index >= (0x8 >> elem_size)) { + src_lane_index -= (0x8 >> elem_size); + src += SLJIT_QUAD_OTHER_HALF(src); + } + } + + if (elem_size == 3) { + if (freg != src) + FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src))); + + freg += SLJIT_QUAD_OTHER_HALF(freg); + + if (freg != src) + return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)); + return SLJIT_SUCCESS; + } + + ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size)); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 6; + + return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 dst_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + freg = simd_get_quad_reg_index(freg); + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf)); + else + FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf)); + src = freg; + } else if (reg_size == 4) + src = simd_get_quad_reg_index(src); + + if (!(type & SLJIT_SIMD_FLOAT)) { + dst_reg = (reg_size == 4) ? freg : TMP_FREG2; + + do { + FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28)) + | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src))); + src = dst_reg; + } while (++elem_size < elem2_size); + + if (dst_reg == TMP_FREG2) + return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2)); + return SLJIT_SUCCESS; + } + + /* No SIMD variant, must use VFP instead. */ + SLJIT_ASSERT(reg_size == 4); + + if (freg == src) { + freg += SLJIT_QUAD_OTHER_HALF(freg); + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20)); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)); + } + + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src))); + freg += SLJIT_QUAD_OTHER_HALF(freg); + return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x243219; + ins = VSHR | (1 << 28) | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x243219 : 0x2231; + ins = VSHR | (1 << 28) | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x2231 : 0x21; + ins = VSHR | (1 << 28) | (0x21 << 16); + break; + default: + imms = 0x21; + ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7); + break; + } + + if (reg_size == 4) { + freg = simd_get_quad_reg_index(freg); + ins |= (sljit_ins)1 << 6; + } + + SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0); + FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + + ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0; + + while (imms >= 0x100) { + FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + imms >>= 8; + } + + FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2))); + + if (reg_size == 4 && elem_size == 0) { + SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]); + FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12))); + } + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND; + break; + case SLJIT_SIMD_OP2_OR: + ins = VORR; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VEOR; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) { + dst_freg = simd_get_quad_reg_index(dst_freg); + src1_freg = simd_get_quad_reg_index(src1_freg); + src2_freg = simd_get_quad_reg_index(src2_freg); + ins |= (sljit_ins)1 << 6; + } + + return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg)); +} + +#undef FPU_LOAD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LDREXB; + break; + case SLJIT_MOV_U16: + ins = LDREXH; + break; + default: + ins = LDREX; + break; + } + + return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + + /* temp_reg == mem_reg is undefined so use another temp register */ + SLJIT_UNUSED_ARG(temp_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = STREXB | RM4(TMP_REG1); + break; + case SLJIT_MOV_U16: + ins = STREXH | RM4(TMP_REG1); + break; + default: + ins = STREX | RD4(TMP_REG1); + break; + } + + FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg))); + if (op & SLJIT_SET_ATOMIC_STORED) + return push_inst32(compiler, CMPI_W | RN4(TMP_REG1)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r))); + compiler->size += 3; + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_u16 *inst = (sljit_u16*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + modify_imm32_const(inst, new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeLOONGARCH_64.c b/src/sljit/sljitNativeLOONGARCH_64.c new file mode 100644 index 0000000..2e1d742 --- /dev/null +++ b/src/sljit/sljitNativeLOONGARCH_64.c @@ -0,0 +1,3765 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "LOONGARCH" SLJIT_CPUINFO; +} + +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO 0 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5) +#define RETURN_ADDR_REG TMP_REG2 +#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* +LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them): + +| Format name | Composition | +| 2R | Opcode + Rj + Rd | +| 3R | Opcode + Rk + Rj + Rd | +| 4R | Opcode + Ra + Rk + Rj + Rd | +| 2RI8 | Opcode + I8 + Rj + Rd | +| 2RI12 | Opcode + I12 + Rj + Rd | +| 2RI14 | Opcode + I14 + Rj + Rd | +| 2RI16 | Opcode + I16 + Rj + Rd | +| 1RI21 | Opcode + I21L + Rj + I21H | +| I26 | Opcode + I26L + I26H | + +Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands. +I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and +lower parts in the instruction word, denoted by the “L” and “H” suffixes. */ + +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RJ(rj) ((sljit_ins)reg_map[rj] << 5) +#define RK(rk) ((sljit_ins)reg_map[rk] << 10) +#define RA(ra) ((sljit_ins)reg_map[ra] << 15) + +#define FD(fd) ((sljit_ins)reg_map[fd]) +#define FRD(fd) ((sljit_ins)freg_map[fd]) +#define FRJ(fj) ((sljit_ins)freg_map[fj] << 5) +#define FRK(fk) ((sljit_ins)freg_map[fk] << 10) +#define FRA(fa) ((sljit_ins)freg_map[fa] << 15) + +#define IMM_V(imm) ((sljit_ins)(imm) << 10) +#define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10) +#define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10) +#define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10) +#define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10) +#define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5) +#define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f)) +#define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff)) + +#define OPC_I26(opc) ((sljit_ins)(opc) << 26) +#define OPC_1RI21(opc) ((sljit_ins)(opc) << 26) +#define OPC_2RI16(opc) ((sljit_ins)(opc) << 26) +#define OPC_2RI14(opc) ((sljit_ins)(opc) << 24) +#define OPC_2RI12(opc) ((sljit_ins)(opc) << 22) +#define OPC_2RI8(opc) ((sljit_ins)(opc) << 18) +#define OPC_4R(opc) ((sljit_ins)(opc) << 20) +#define OPC_3R(opc) ((sljit_ins)(opc) << 15) +#define OPC_2R(opc) ((sljit_ins)(opc) << 10) +#define OPC_1RI20(opc) ((sljit_ins)(opc) << 25) + +/* Arithmetic operation instructions */ +#define ADD_W OPC_3R(0x20) +#define ADD_D OPC_3R(0x21) +#define SUB_W OPC_3R(0x22) +#define SUB_D OPC_3R(0x23) +#define ADDI_W OPC_2RI12(0xa) +#define ADDI_D OPC_2RI12(0xb) +#define ANDI OPC_2RI12(0xd) +#define ORI OPC_2RI12(0xe) +#define XORI OPC_2RI12(0xf) +#define ADDU16I_D OPC_2RI16(0x4) +#define LU12I_W OPC_1RI20(0xa) +#define LU32I_D OPC_1RI20(0xb) +#define LU52I_D OPC_2RI12(0xc) +#define SLT OPC_3R(0x24) +#define SLTU OPC_3R(0x25) +#define SLTI OPC_2RI12(0x8) +#define SLTUI OPC_2RI12(0x9) +#define PCADDI OPC_1RI20(0xc) +#define PCALAU12I OPC_1RI20(0xd) +#define PCADDU12I OPC_1RI20(0xe) +#define PCADDU18I OPC_1RI20(0xf) +#define NOR OPC_3R(0x28) +#define AND OPC_3R(0x29) +#define OR OPC_3R(0x2a) +#define XOR OPC_3R(0x2b) +#define ORN OPC_3R(0x2c) +#define ANDN OPC_3R(0x2d) +#define MUL_W OPC_3R(0x38) +#define MULH_W OPC_3R(0x39) +#define MULH_WU OPC_3R(0x3a) +#define MUL_D OPC_3R(0x3b) +#define MULH_D OPC_3R(0x3c) +#define MULH_DU OPC_3R(0x3d) +#define MULW_D_W OPC_3R(0x3e) +#define MULW_D_WU OPC_3R(0x3f) +#define DIV_W OPC_3R(0x40) +#define MOD_W OPC_3R(0x41) +#define DIV_WU OPC_3R(0x42) +#define MOD_WU OPC_3R(0x43) +#define DIV_D OPC_3R(0x44) +#define MOD_D OPC_3R(0x45) +#define DIV_DU OPC_3R(0x46) +#define MOD_DU OPC_3R(0x47) + +/* Bit-shift instructions */ +#define SLL_W OPC_3R(0x2e) +#define SRL_W OPC_3R(0x2f) +#define SRA_W OPC_3R(0x30) +#define SLL_D OPC_3R(0x31) +#define SRL_D OPC_3R(0x32) +#define SRA_D OPC_3R(0x33) +#define ROTR_W OPC_3R(0x36) +#define ROTR_D OPC_3R(0x37) +#define SLLI_W OPC_3R(0x81) +#define SLLI_D ((sljit_ins)(0x41) << 16) +#define SRLI_W OPC_3R(0x89) +#define SRLI_D ((sljit_ins)(0x45) << 16) +#define SRAI_W OPC_3R(0x91) +#define SRAI_D ((sljit_ins)(0x49) << 16) +#define ROTRI_W OPC_3R(0x99) +#define ROTRI_D ((sljit_ins)(0x4d) << 16) + +/* Bit-manipulation instructions */ +#define CLO_W OPC_2R(0x4) +#define CLZ_W OPC_2R(0x5) +#define CTO_W OPC_2R(0x6) +#define CTZ_W OPC_2R(0x7) +#define CLO_D OPC_2R(0x8) +#define CLZ_D OPC_2R(0x9) +#define CTO_D OPC_2R(0xa) +#define CTZ_D OPC_2R(0xb) +#define REVB_2H OPC_2R(0xc) +#define REVB_4H OPC_2R(0xd) +#define REVB_2W OPC_2R(0xe) +#define REVB_D OPC_2R(0xf) +#define REVH_2W OPC_2R(0x10) +#define REVH_D OPC_2R(0x11) +#define BITREV_4B OPC_2R(0x12) +#define BITREV_8B OPC_2R(0x13) +#define BITREV_W OPC_2R(0x14) +#define BITREV_D OPC_2R(0x15) +#define EXT_W_H OPC_2R(0x16) +#define EXT_W_B OPC_2R(0x17) +#define BSTRINS_W (0x1 << 22 | 1 << 21) +#define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15) +#define BSTRINS_D (0x2 << 22) +#define BSTRPICK_D (0x3 << 22) + +/* Branch instructions */ +#define BEQZ OPC_1RI21(0x10) +#define BNEZ OPC_1RI21(0x11) +#define JIRL OPC_2RI16(0x13) +#define B OPC_I26(0x14) +#define BL OPC_I26(0x15) +#define BEQ OPC_2RI16(0x16) +#define BNE OPC_2RI16(0x17) +#define BLT OPC_2RI16(0x18) +#define BGE OPC_2RI16(0x19) +#define BLTU OPC_2RI16(0x1a) +#define BGEU OPC_2RI16(0x1b) + +/* Memory access instructions */ +#define LD_B OPC_2RI12(0xa0) +#define LD_H OPC_2RI12(0xa1) +#define LD_W OPC_2RI12(0xa2) +#define LD_D OPC_2RI12(0xa3) + +#define ST_B OPC_2RI12(0xa4) +#define ST_H OPC_2RI12(0xa5) +#define ST_W OPC_2RI12(0xa6) +#define ST_D OPC_2RI12(0xa7) + +#define LD_BU OPC_2RI12(0xa8) +#define LD_HU OPC_2RI12(0xa9) +#define LD_WU OPC_2RI12(0xaa) + +#define LDX_B OPC_3R(0x7000) +#define LDX_H OPC_3R(0x7008) +#define LDX_W OPC_3R(0x7010) +#define LDX_D OPC_3R(0x7018) + +#define STX_B OPC_3R(0x7020) +#define STX_H OPC_3R(0x7028) +#define STX_W OPC_3R(0x7030) +#define STX_D OPC_3R(0x7038) + +#define LDX_BU OPC_3R(0x7040) +#define LDX_HU OPC_3R(0x7048) +#define LDX_WU OPC_3R(0x7050) + +#define PRELD OPC_2RI12(0xab) + +/* Atomic memory access instructions */ +#define LL_W OPC_2RI14(0x20) +#define SC_W OPC_2RI14(0x21) +#define LL_D OPC_2RI14(0x22) +#define SC_D OPC_2RI14(0x23) + +/* LoongArch V1.10 Instructions */ +#define AMCAS_B OPC_3R(0x70B0) +#define AMCAS_H OPC_3R(0x70B1) +#define AMCAS_W OPC_3R(0x70B2) +#define AMCAS_D OPC_3R(0x70B3) + +/* Other instructions */ +#define BREAK OPC_3R(0x54) +#define DBGCALL OPC_3R(0x55) +#define SYSCALL OPC_3R(0x56) + +/* Basic Floating-Point Instructions */ +/* Floating-Point Arithmetic Operation Instructions */ +#define FADD_S OPC_3R(0x201) +#define FADD_D OPC_3R(0x202) +#define FSUB_S OPC_3R(0x205) +#define FSUB_D OPC_3R(0x206) +#define FMUL_S OPC_3R(0x209) +#define FMUL_D OPC_3R(0x20a) +#define FDIV_S OPC_3R(0x20d) +#define FDIV_D OPC_3R(0x20e) +#define FCMP_COND_S OPC_4R(0xc1) +#define FCMP_COND_D OPC_4R(0xc2) +#define FCOPYSIGN_S OPC_3R(0x225) +#define FCOPYSIGN_D OPC_3R(0x226) +#define FSEL OPC_4R(0xd0) +#define FABS_S OPC_2R(0x4501) +#define FABS_D OPC_2R(0x4502) +#define FNEG_S OPC_2R(0x4505) +#define FNEG_D OPC_2R(0x4506) +#define FMOV_S OPC_2R(0x4525) +#define FMOV_D OPC_2R(0x4526) + +/* Floating-Point Conversion Instructions */ +#define FCVT_S_D OPC_2R(0x4646) +#define FCVT_D_S OPC_2R(0x4649) +#define FTINTRZ_W_S OPC_2R(0x46a1) +#define FTINTRZ_W_D OPC_2R(0x46a2) +#define FTINTRZ_L_S OPC_2R(0x46a9) +#define FTINTRZ_L_D OPC_2R(0x46aa) +#define FFINT_S_W OPC_2R(0x4744) +#define FFINT_S_L OPC_2R(0x4746) +#define FFINT_D_W OPC_2R(0x4748) +#define FFINT_D_L OPC_2R(0x474a) + +/* Floating-Point Move Instructions */ +#define FMOV_S OPC_2R(0x4525) +#define FMOV_D OPC_2R(0x4526) +#define MOVGR2FR_W OPC_2R(0x4529) +#define MOVGR2FR_D OPC_2R(0x452a) +#define MOVGR2FRH_W OPC_2R(0x452b) +#define MOVFR2GR_S OPC_2R(0x452d) +#define MOVFR2GR_D OPC_2R(0x452e) +#define MOVFRH2GR_S OPC_2R(0x452f) +#define MOVGR2FCSR OPC_2R(0x4530) +#define MOVFCSR2GR OPC_2R(0x4532) +#define MOVFR2CF OPC_2R(0x4534) +#define MOVCF2FR OPC_2R(0x4535) +#define MOVGR2CF OPC_2R(0x4536) +#define MOVCF2GR OPC_2R(0x4537) + +/* Floating-Point Branch Instructions */ +#define BCEQZ OPC_I26(0x12) +#define BCNEZ OPC_I26(0x12) + +/* Floating-Point Common Memory Access Instructions */ +#define FLD_S OPC_2RI12(0xac) +#define FLD_D OPC_2RI12(0xae) +#define FST_S OPC_2RI12(0xad) +#define FST_D OPC_2RI12(0xaf) + +#define FLDX_S OPC_3R(0x7060) +#define FLDX_D OPC_3R(0x7068) +#define FSTX_S OPC_3R(0x7070) +#define FSTX_D OPC_3R(0x7078) + +/* Vector Instructions */ + +/* Vector Arithmetic Instructions */ +#define VOR_V OPC_3R(0xe24d) +#define VXOR_V OPC_3R(0xe24e) +#define VAND_V OPC_3R(0xe24c) +#define VMSKLTZ OPC_2R(0x1ca710) + +/* Vector Memory Access Instructions */ +#define VLD OPC_2RI12(0xb0) +#define VST OPC_2RI12(0xb1) +#define XVLD OPC_2RI12(0xb2) +#define XVST OPC_2RI12(0xb3) +#define VSTELM OPC_2RI8(0xc40) + +/* Vector Float Conversion Instructions */ +#define VFCVTL_D_S OPC_2R(0x1ca77c) + +/* Vector Bit Manipulate Instructions */ +#define VSLLWIL OPC_2R(0x1cc200) + +/* Vector Move And Shuffle Instructions */ +#define VLDREPL OPC_2R(0xc0000) +#define VINSGR2VR OPC_2R(0x1cbac0) +#define VPICKVE2GR_U OPC_2R(0x1cbce0) +#define VREPLGR2VR OPC_2R(0x1ca7c0) +#define VREPLVE OPC_3R(0xe244) +#define VREPLVEI OPC_2R(0x1cbde0) +#define XVPERMI OPC_2RI8(0x1dfa) + +#define I12_MAX (0x7ff) +#define I12_MIN (-0x800) +#define BRANCH16_MAX (0x7fff << 2) +#define BRANCH16_MIN (-(0x8000 << 2)) +#define BRANCH21_MAX (0xfffff << 2) +#define BRANCH21_MIN (-(0x100000 << 2)) +#define JUMP_MAX (0x1ffffff << 2) +#define JUMP_MIN (-(0x2000000 << 2)) +#define JIRL_MAX (0x7fff << 2) +#define JIRL_MIN (-(0x8000 << 2)) + +#define S32_MAX (0x7fffffffl) +#define S32_MIN (-0x80000000l) +#define S52_MAX (0x7ffffffffffffl) + +#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D)) + +/* LoongArch CPUCFG register for feature detection */ +#define LOONGARCH_CFG2 0x02 +#define LOONGARCH_CFG2_LAMCAS (1 << 28) + +static sljit_u32 cfg2_feature_list = 0; + +/* According to Software Development and Build Convention for LoongArch Architectures, ++ the status of LSX and LASX extension must be checked through HWCAP */ +#include + +#define LOONGARCH_HWCAP_LSX (1 << 4) +#define LOONGARCH_HWCAP_LASX (1 << 5) + +static sljit_u32 hwcap_feature_list = 0; + +/* Feature type */ +#define GET_CFG2 0 +#define GET_HWCAP 1 + +static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type) + { + if (cfg2_feature_list == 0) + __asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2)); + if (hwcap_feature_list == 0) + hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP); + + return feature_type ? hwcap_feature_list : cfg2_feature_list; + } + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + + inst = (sljit_ins *)jump->addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + + if (jump->flags & IS_COND) { + diff += SSIZE_OF(ins); + + if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) { + inst--; + inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000; + jump->flags |= PATCH_B; + jump->addr = (sljit_uw)inst; + return inst; + } + + diff -= SSIZE_OF(ins); + } + + if (diff >= JUMP_MIN && diff <= JUMP_MAX) { + if (jump->flags & IS_COND) { + inst[-1] |= (sljit_ins)IMM_I16(2); + } + + jump->flags |= PATCH_J; + return inst; + } + + if (diff >= S32_MIN && diff <= S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(3); + + jump->flags |= PATCH_REL32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= (sljit_uw)S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(3); + + jump->flags |= PATCH_ABS32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= S52_MAX) { + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(4); + + jump->flags |= PATCH_ABS52; + inst[2] = inst[0]; + return inst + 2; + } + +exit: + if (jump->flags & IS_COND) + inst[-1] |= (sljit_ins)IMM_I16(5); + inst[3] = inst[0]; + return inst + 3; +} + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff >= S32_MIN && diff <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_REL32; + return 1; + } + + if (addr <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; + return 1; + } + + if (addr <= S52_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS52; + return 2; + } + + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + return 3; +} + +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; + SLJIT_UNUSED_ARG(executable_offset); + + if (flags & PATCH_REL32) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr); + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL); + ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); + } else + ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr); + return; + } + + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= S32_MAX); + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + } else if (flags & PATCH_ABS52) { + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + ins += 1; + } else { + ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5); + ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5); + ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52); + ins += 2; + } + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL); + ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2); + } else + ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target <= S32_MAX) + total_size = 2; + else if (jump->u.target <= S52_MAX) + total_size = 3; + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins))) + total_size = 0; + else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins))) + total_size = 1; + else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 2; + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; + } else { + total_size = 3; + + if (!(jump->flags & JUMP_ADDR)) { + /* Real size minus 1. Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < S32_MAX) + total_size = 1; + else if (jump->u.target <= S52_MAX) + total_size = 2; + + size_reduce += 3 - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) { + load_addr_to_reg(jump, executable_offset); + break; + } + + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + buf_ptr = (sljit_ins *)jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX); + buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2); + break; + } + + SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); + if (jump->flags & IS_CALL) + buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2); + else + buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2); + } while (0); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) + { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_LASX: + return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP)); + + case SLJIT_HAS_SIMD: + return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP)); + + case SLJIT_HAS_ATOMIC: + return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2)); + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + return 1; + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 + +#define STACK_STORE ST_D +#define STACK_LOAD LD_D + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm) +{ + if (imm <= I12_MAX && imm >= I12_MIN) + return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)); + } else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) { + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm))); + return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)); + } + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm))); + FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5))); + return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52)); +} + +#define STACK_MAX_DISTANCE (-I12_MIN) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, offset; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size))); + offset = local_size - SSIZE_OF(sw); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE))); + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(load_immediate(compiler, TMP_REG1, local_size)); + offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); + } + + FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset))); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1))); + else if (local_size > 0) + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + + return SLJIT_SUCCESS; +} + +#define STACK_MAX_DISTANCE (-I12_MIN - 16) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, offset; + sljit_s32 local_size = compiler->local_size; + + if (local_size > STACK_MAX_DISTANCE) { + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) { + FAIL_IF(load_immediate(compiler, TMP_REG2, local_size)); + FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2))); + } else + FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size))); + + local_size = STACK_MAX_DISTANCE; + } + + SLJIT_ASSERT(local_size > 0); + + offset = local_size - SSIZE_OF(sw); + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset))); + } + + return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)); +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ST_D /* st.d */, +/* u w l */ LD_D /* ld.d */, +/* u b s */ ST_B /* st.b */, +/* u b l */ LD_BU /* ld.bu */, +/* u h s */ ST_H /* st.h */, +/* u h l */ LD_HU /* ld.hu */, +/* u i s */ ST_W /* st.w */, +/* u i l */ LD_WU /* ld.wu */, + +/* s w s */ ST_D /* st.d */, +/* s w l */ LD_D /* ld.d */, +/* s b s */ ST_B /* st.b */, +/* s b l */ LD_B /* ld.b */, +/* s h s */ ST_H /* st.h */, +/* s h l */ LD_H /* ld.h */, +/* s i s */ ST_W /* st.w */, +/* s i l */ LD_W /* ld.w */, + +/* d s */ FST_D /* fst.d */, +/* d l */ FLD_D /* fld.d */, +/* s s */ FST_S /* fst.s */, +/* s l */ FLD_S /* fld.s */, +}; + +static const sljit_ins data_transfer_insts_x[16 + 4] = { +/* u w s */ STX_D /* stx.d */, +/* u w l */ LDX_D /* ldx.d */, +/* u b s */ STX_B /* stx.b */, +/* u b l */ LDX_BU /* ldx.bu */, +/* u h s */ STX_H /* stx.h */, +/* u h l */ LDX_HU /* ldx.hu */, +/* u i s */ STX_W /* stx.w */, +/* u i l */ LDX_WU /* ldx.wu */, + +/* s w s */ STX_D /* stx.d */, +/* s w l */ LDX_D /* ldx.d */, +/* s b s */ STX_B /* stx.b */, +/* s b l */ LDX_B /* ldx.b */, +/* s h s */ STX_H /* stx.h */, +/* s h l */ LDX_H /* ldx.h */, +/* s i s */ STX_W /* stx.w */, +/* s i l */ LDX_W /* ldx.w */, + +/* d s */ FSTX_D /* fstx.d */, +/* d l */ FLDX_D /* fldx.d */, +/* s s */ FSTX_S /* fstx.s */, +/* s l */ FLDX_S /* fldx.s */, +}; + +static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_ins ins; + sljit_s32 base = arg & REG_MASK; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (arg & OFFS_REG_MASK) { + sljit_s32 offs = OFFS_REG(arg); + + SLJIT_ASSERT(!argw); + ins = data_transfer_insts_x[flags & MEM_MASK] | + ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | + RJ(base) | RK(offs); + } else { + SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN); + + ins = data_transfer_insts[flags & MEM_MASK] | + ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | + RJ(base) | IMM_I12(argw); + } + return push_inst(compiler, ins); +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* argw == 0 (ldx/stx rd, rj, rk) can be used. + * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */ + if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) { + /* Works for both absolute and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + if (arg & OFFS_REG_MASK) + return 0; + + if (arg == next_arg) { + if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; + sljit_sw offset; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) { + offset = argw - compiler->cache_argw; + } else { + sljit_sw argw_hi=TO_ARGW_HI(argw); + compiler->cache_arg = SLJIT_MEM; + + if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi)); + compiler->cache_argw = argw_hi; + offset = argw & 0xfff; + argw = argw_hi; + } + } + + if (!base) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset); + + if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset); + } + + if (!offset) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0); + + FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw))); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0); + } else { + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + + if (base != 0) + return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0); + return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0); + } +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v)))) + +/* andi/ori/xori are zero-extended */ +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) {\ + FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \ + } \ + if (!(flags & UNUSED_DEST)) { \ + if (dst == src1) { \ + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \ + } else { \ + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \ + } \ + } \ + } else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \ + } \ + while (0) + +#define EMIT_SHIFT(imm, reg) \ + op_imm = (imm); \ + op_reg = (reg) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; + sljit_ins op_imm, op_reg; + sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2)); + + case SLJIT_REV: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2)); + + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); + return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst)); + + case SLJIT_REV_U16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2))); + return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16)); + + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); + return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0)); + + case SLJIT_REV_U32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2))); + return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16)); + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1))); + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); + } + } else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2))); + } else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (is_overflow || carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0))); + carry_src_r = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG)); + + case SLJIT_ADDC: + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2))); + } else { + if (carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + carry_src_r = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r))); + } + + FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG))); + + if (carry_src_r == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG))); + /* Set carry flag. */ + return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG)); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == I12_MIN) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS) { + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + is_handled = 1; + } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = reg; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + break; + case SLJIT_GREATER: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1))); + break; + case SLJIT_SIG_LESS: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + break; + case SLJIT_SIG_GREATER: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1))); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)); + } else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0))); + else { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1))); + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); + } + } else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); + } else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG)); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == I12_MIN) { + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2))); + } else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG))); + + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1)); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) + return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2)); + + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2))); + return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)); + } + + FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2))); + FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63)))); + return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG)); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + if (op & SLJIT_32) { + EMIT_SHIFT(SLLI_W, SLL_W); + } else { + EMIT_SHIFT(SLLI_D, SLL_D); + } + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (op & SLJIT_32) { + EMIT_SHIFT(SRLI_W, SRL_W); + } else { + EMIT_SHIFT(SRLI_D, SRL_D); + } + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (op & SLJIT_32) { + EMIT_SHIFT(SRAI_W, SRA_W); + } else { + EMIT_SHIFT(SRAI_D, SRA_D); + } + break; + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); + + if (GET_OPCODE(op) == SLJIT_ROTL) + src2 = word_size - src2; + return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)); + } + + if (src2 == TMP_ZERO) { + if (dst != src1) + return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0)); + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_ROTL) { + FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2))); + src2 = OTHER_FLAG; + } + return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2)); + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2)); +} + +#undef IMM_EXTEND + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == 0) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + dst = TMP_REG2; + } else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + src2_tmp_reg = dst_r; + } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 == SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else + src1_r = TMP_ZERO; + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } else if (src2 == SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; + } else { + src2_r = TMP_ZERO; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + + src2_r = src2_tmp_reg; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK); + case SLJIT_NOP: + return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0)); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1)); + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1)); + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1)); + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0))); + FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1))); + return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1)); + case SLJIT_DIV_UW: + return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); + case SLJIT_DIV_SW: + return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); + + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + /* Logical operators have no W variant, so sign extended input is necessary for them. */ + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 == SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 == SLJIT_IMM) + src2w = (sljit_s32)src2w; + } + + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 == SLJIT_IMM) { + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; + } + + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; + + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) { + ins1 = INST(SLLI, op) | IMM_I12(src3w); + src3w = bit_length - src3w; + ins2 = INST(SRLI, op) | IMM_I12(src3w); + } else { + ins1 = INST(SRLI, op) | IMM_I12(src3w); + src3w = bit_length - src3w; + ins2 = INST(SLLI, op) | IMM_I12(src3w); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg))); + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg))); + return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0)); + src3 = TMP_REG2; + } + + if (is_left) { + ins1 = INST(SLL, op); + ins2 = INST(SRLI, op); + ins3 = INST(SRL, op); + } else { + ins1 = INST(SRL, op); + ins2 = INST(SLLI, op); + ins3 = INST(SLL, op); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1))); + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3))); + + FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2))); + return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 base = src & REG_MASK; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) { + srcw &= 0x3; + if (SLJIT_UNLIKELY(srcw)) + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw))); + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1))); + } else { + if (base && srcw <= I12_MAX && srcw >= I12_MIN) + return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + if (base != 0) + FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1))); + } + return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0)); + + SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw))); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ +#define SET_COND(cond) (sljit_ins)(cond << 15) + +#define COND_CUN SET_COND(0x8) /* UN */ +#define COND_CEQ SET_COND(0x4) /* EQ */ +#define COND_CUEQ SET_COND(0xc) /* UN EQ */ +#define COND_CLT SET_COND(0x2) /* LT */ +#define COND_CULT SET_COND(0xa) /* UN LT */ +#define COND_CLE SET_COND(0x6) /* LT EQ */ +#define COND_CULE SET_COND(0xe) /* UN LT EQ */ +#define COND_CNE SET_COND(0x10) /* GT LT */ +#define COND_CUNE SET_COND(0x18) /* UN GT LT */ +#define COND_COR SET_COND(0x14) /* GT LT EQ */ + +#define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D) +#define FCD(cd) (sljit_ins)(cd & 0x7) +#define FCJ(cj) (sljit_ins)((cj & 0x7) << 5) +#define FCA(ca) (sljit_ins)((ca & 0x7) << 15) +#define F_OTHER_FLAG 1 + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) + +/* convert to inter exact toward zero */ +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_SW_FROM_F64: + word_data = 1; + inst = FINST(FTINTRZ_L, op); + break; + case SLJIT_CONV_S32_FROM_F64: + inst = FINST(FTINTRZ_W, op); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src))); + FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_F64_FROM_SW: + word_data = 1; + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L); + break; + case SLJIT_CONV_F64_FROM_S32: + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; + sljit_u32 word_data = 0; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + switch (GET_OPCODE(op)) + { + case SLJIT_CONV_F64_FROM_UW: + word_data = 1; + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L); + break; + case SLJIT_CONV_F64_FROM_U32: + inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + if (!word_data) + FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0))); + + FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4))); + + FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + FAIL_IF(push_inst(compiler, B | IMM_I26(7))); + + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1))); + FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2))); + FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1))); + FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r))); + FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG))); + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1))); + break; + case SLJIT_UNORDERED_OR_GREATER: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1))); + break; + case SLJIT_UNORDERED_OR_LESS: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_UNORDERED_OR_EQUAL: + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + break; + default: /* SLJIT_UNORDERED */ + FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2))); + } + return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2))); + break; + } + + if (dst_r != dst) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0)); + src2 = TMP_FREG1; + } + + if (src1 & SLJIT_MEM) { + reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0)); + src1 = reg; + } + + return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg); + else + inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg); + return push_inst(compiler, inst); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_jump_instruction(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO); + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_STORED: + return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO); + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_UNORDERED: + return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO); + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED: + return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO); + default: + /* Not conditional branch. */ + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + inst = get_jump_instruction(type); + + if (inst != 0) { + PTR_FAIL_IF(push_inst(compiler, inst)); + jump->flags |= IS_COND; + } + + jump->addr = compiler->size; + inst = JIRL | RJ(TMP_REG1) | IMM_I16(0); + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_CALL; + inst |= RD(RETURN_ADDR_REG); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; + } + + if (src1 == SLJIT_IMM) { + if (src1w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } + else + src1 = TMP_ZERO; + } + + if (src2 == SLJIT_IMM) { + if (src2w != 0) { + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2 = src2_tmp_reg; + } + else + src2 = TMP_ZERO; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND)); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RJ(src1) | RD(src2); + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RJ(src1) | RD(src2); + break; + case SLJIT_LESS: + inst = BGEU | RJ(src1) | RD(src2); + break; + case SLJIT_GREATER_EQUAL: + inst = BLTU | RJ(src1) | RD(src2); + break; + case SLJIT_GREATER: + inst = BGEU | RJ(src2) | RD(src1); + break; + case SLJIT_LESS_EQUAL: + inst = BLTU | RJ(src2) | RD(src1); + break; + case SLJIT_SIG_LESS: + inst = BGE | RJ(src1) | RD(src2); + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLT | RJ(src1) | RD(src2); + break; + case SLJIT_SIG_GREATER: + inst = BGE | RJ(src2) | RD(src1); + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLT | RJ(src2) | RD(src1); + break; + default: + inst = BREAK; + SLJIT_UNREACHABLE(); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src != SLJIT_IMM) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_r, dst_r, invert; + sljit_s32 saved_op = op; + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_r = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + src_r = dst_r; + break; + case SLJIT_ATOMIC_STORED: + case SLJIT_ATOMIC_NOT_STORED: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1))); + src_r = dst_r; + invert ^= 0x1; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_r = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1))); + src_r = dst_r; + invert ^= 0x1; + break; + } + } else { + invert = 0; + src_r = OTHER_FLAG; + + switch (type) { + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED: + invert = 1; + break; + } + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1))); + src_r = dst_r; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_r, dst, dstw); + + if (src_r != dst_r) + return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0)); + return SLJIT_SUCCESS; + } + + mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins *ptr; + sljit_uw size; + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG1; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } + + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0))); + } + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w)); + } else if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + FAIL_IF(load_immediate(compiler, dst_reg, src1w)); + } else + FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0))); + + *ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_s32 invert = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) { + if ((type & ~SLJIT_32) == SLJIT_EQUAL) + invert = 1; + FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG))); + } else { + if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO))) + invert = 1; + FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG))); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w)); + if (invert) + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG)); + } else { + if (invert) + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG)); + return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG)); + } +} + +#undef FLOAT_DATA + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (SLJIT_UNLIKELY(memw != 0)) { + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw))); + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) { + if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw))); + memw &= 0xfff; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + memw = 0; + } + + if (mem & REG_MASK) + FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK))); + + mem = TMP_REG1; + } else { + mem &= REG_MASK; + memw &= 0xfff; + } + + SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff)); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff)); + return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw); + } + + flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0); + + FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw)); + return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff); +} + +#undef TO_ARGW_HI + +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG3; + FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3))); + return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK)); + } + + if (!(mem & REG_MASK)) { + *mem_ptr = TMP_REG3; + return load_immediate(compiler, TMP_REG3, memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG3; + + FAIL_IF(load_immediate(compiler, TMP_REG3, memw)); + return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = FRD(srcdst) | FRJ(freg) | FRK(freg); + else + ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst); + + if (reg_size == 5) + ins |= VOR_V | (sljit_ins)1 << 26; + else + ins |= VOR_V; + + return push_inst(compiler, ins); + } + + ins = (type & SLJIT_SIMD_STORE) ? VST : VLD; + + if (reg_size == 5) + ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; + + if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX)) + return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw)); + else { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0)); + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + if (reg_size == 5) + ins = (sljit_ins)1 << 25; + + return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size)); + } + + if (reg_size == 5) + ins = (sljit_ins)1 << 26; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10); + + FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15)); + + if (reg_size == 5) { + ins = (sljit_ins)(0x44 << 10); + return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + } + + return SLJIT_SUCCESS; + } + + ins |= VREPLGR2VR | (sljit_ins)elem_size << 10; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; + } + + return push_inst(compiler, ins | FRD(freg) | RJ(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0; + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (reg_size == 5) + ins = (sljit_ins)1 << 25; + + if (type & SLJIT_SIMD_STORE) { + ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size)); + return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst)); + } else { + emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0); + srcdst = TMP_REG1; + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + if (elem_size < 2) { + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + if (lane_index >= (2 << (3 - elem_size))) { + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + } else { + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + } + } else + ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; + } + + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + } + } + + if (type & SLJIT_SIMD_FLOAT) { + ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (type & SLJIT_SIMD_STORE) { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index))); + return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0)); + } else { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0))); + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index)); + } + } + + if (srcdst == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); + srcdst = TMP_REG1; + } + + if (type & SLJIT_SIMD_STORE) { + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (type & SLJIT_SIMD_LANE_SIGNED) + ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18)); + else + ins |= VPICKVE2GR_U; + + if (reg_size == 5) { + if (elem_size < 2) { + if (lane_index >= (2 << (3 - elem_size))) { + if (type & SLJIT_SIMD_LANE_SIGNED) + ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18)); + else + ins |= VPICKVE2GR_U; + + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size)))); + } + } else { + ins ^= (sljit_ins)1 << (15 - elem_size); + ins |= (sljit_ins)1 << 26; + } + } + + return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index)); + } else { + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + if (elem_size < 2) { + FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg))); + if (lane_index >= (2 << (3 - elem_size))) { + FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1))); + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size))))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2)); + } else { + FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index))); + return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18)); + } + } else + ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26; + } + + return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)); + } + + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10; + + if (reg_size == 5) { + FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size))))); + + ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10); + + return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg)); + } + + return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ins = (type & SLJIT_SIMD_STORE) ? VST : VLD; + + if (reg_size == 5) + ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD; + + if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX)) + FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw))); + else { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0))); + } + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size != 2 || elem2_size != 3) + return SLJIT_ERR_UNSUPPORTED; + + ins = 0; + if (reg_size == 5) { + ins = (sljit_ins)1 << 26; + FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); + } + + return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src)); + } + + ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18); + + if (reg_size == 5) + ins |= (sljit_ins)1 << 26; + + do { + if (reg_size == 5) + FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16))); + + FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (reg_size == 5) + ins = (sljit_ins)1 << 26; + + FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg))); + + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1))); + + if (reg_size == 5) { + FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2))); + FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size)))); + FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3))); + } + + if (dst_r == TMP_REG2) + return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 5 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX)) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = VAND_V; + break; + case SLJIT_SIMD_OP2_OR: + ins = VOR_V; + break; + case SLJIT_SIMD_OP2_XOR: + ins = VXOR_V; + break; + } + + if (reg_size == 5) + ins |= (sljit_ins)1 << 26; + + return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + switch(GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = LD_BU; + break; + case SLJIT_MOV_U16: + ins = LD_HU; + break; + case SLJIT_MOV32: + ins = LD_W; + break; + case SLJIT_MOV_U32: + ins = LD_WU; + break; + default: + ins = LD_D; + break; + } + + return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins = 0; + sljit_ins unsign = 0; + sljit_s32 tmp = temp_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_U8: + ins = AMCAS_B; + unsign = BSTRPICK_D | (7 << 16); + break; + case SLJIT_MOV_U16: + ins = AMCAS_H; + unsign = BSTRPICK_D | (15 << 16); + break; + case SLJIT_MOV32: + ins = AMCAS_W; + break; + case SLJIT_MOV_U32: + ins = AMCAS_W; + unsign = BSTRPICK_D | (31 << 16); + break; + default: + ins = AMCAS_D; + break; + } + + if (op & SLJIT_SET_ATOMIC_STORED) { + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO))); + tmp = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg))); + if (!(op & SLJIT_SET_ATOMIC_STORED)) + return SLJIT_SUCCESS; + + if (unsign) + FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp))); + + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg))); + return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + SLJIT_UNUSED_ARG(last_ins); + + FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5))); + FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5))); + FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52)))); + return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + + SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W); + inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5); + + SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D); + inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5); + + SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D); + inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52); + + SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL); + if ((inst[3] & OPC_2RI12(0x3ff)) == ORI) + inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target); + else + inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); + + compiler->size += JUMP_MAX_SIZE - 1; + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c new file mode 100644 index 0000000..91153e5 --- /dev/null +++ b/src/sljit/sljitNativeMIPS_32.c @@ -0,0 +1,472 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 32-bit arch dependent functions. */ + +static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op, + sljit_sw src1, sljit_sw src2, sljit_sw dst) +{ + int is_32 = (op & SLJIT_32); + sljit_ins mfhc = MFC1, mthc = MTC1; + sljit_ins src1_r = FS(src1), src2_r = FS(src2), dst_r = FS(dst); + + if (!is_32) { + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + mfhc = MFHC1; + mthc = MTHC1; + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + src1_r |= (1 << 11); + src2_r |= (1 << 11); + dst_r |= (1 << 11); + break; + } + } + + FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG1) | src1_r, DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG2) | src2_r, DR(TMP_REG2))); + if (!is_32 && src1 != dst) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(src1) | FD(dst), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + else + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, XOR | T(TMP_REG1) | D(TMP_REG2) | S(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SLL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | T(TMP_REG2) | D(TMP_REG1) | S(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, mthc | T(TMP_REG1) | dst_r, MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (mthc == MTC1) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + struct { +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_s32 lo; + sljit_s32 hi; +#else /* !SLJIT_LITTLE_ENDIAN */ + sljit_s32 hi; + sljit_s32 lo; +#endif /* SLJIT_LITTLE_ENDIAN */ + } bin; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.bin.lo != 0) + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.bin.lo)); + if (u.bin.hi != 0) + FAIL_IF(load_immediate(compiler, DR(TMP_REG2), u.bin.hi)); + + FAIL_IF(push_inst(compiler, MTC1 | (u.bin.lo != 0 ? T(TMP_REG1) : TA(0)) | FS(freg), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + return push_inst(compiler, MTHC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg), MOVABLE_INS); +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg) | (1 << 11), MOVABLE_INS)); + break; + } +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_s32 reg2 = 0; + sljit_ins inst = FS(freg); + sljit_ins mthc = MTC1, mfhc = MFC1; + int is_32 = (op & SLJIT_32); + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + op = GET_OPCODE(op); + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + + inst |= T(reg2); + + if (op == SLJIT_COPY_TO_F64) + FAIL_IF(push_inst(compiler, MTC1 | inst, MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, MFC1 | inst, DR(reg2))); + + inst = FS(freg) | (1 << 11); +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + if (cpu_feature_list & CPU_FEATURE_FR) { + mthc = MTHC1; + mfhc = MFHC1; + inst = FS(freg); + } +#endif /* SLJIT_MIPS_REV >= 2 */ + } + + inst |= T(reg); + if (!is_32 && !reg2) { + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + mthc = MTHC1; + mfhc = MFHC1; + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + inst |= (1 << 11); + break; + } + } + + if (op == SLJIT_COPY_TO_F64) + FAIL_IF(push_inst(compiler, mthc | inst, MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, mfhc | inst, DR(reg))); + +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (mthc == MTC1 || mfhc == MFC1) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + sljit_u8 offsets[4]; + sljit_u8 *offsets_ptr = offsets; +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + sljit_ins f64_hi = TA(7), f64_lo = TA(6); +#else + sljit_ins f64_hi = TA(6), f64_lo = TA(7); +#endif /* SLJIT_LITTLE_ENDIAN */ + + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_ARG_SHIFT; + + /* See ABI description in sljit_emit_enter. */ + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + *offsets_ptr = (sljit_u8)offset; + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) { + offset += sizeof(sljit_sw); + *offsets_ptr = (sljit_u8)offset; + } + + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); + + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); + + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + offset += sizeof(sljit_sw); + word_arg_count++; + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + offsets_ptr++; + } + + /* Stack is aligned to 16 bytes. */ + SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw)); + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + if (is_tail_call) { + offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf; + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins)); + *extra_space = offset; + } else { + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + *extra_space = 16; + } + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins)); + *extra_space = 0; + } + + while (types) { + --offsets_ptr; + + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (*offsets_ptr < 4 * sizeof(sljit_sw)) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + + /* Must be preceded by at least one other argument, + * and its starting offset must be 8 because of alignment. */ + SLJIT_ASSERT((*offsets_ptr >> 2) == 2); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + prev_ins = MFHC1 | f64_hi | FS(float_arg_count); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + prev_ins = MFC1 | f64_hi | FS(float_arg_count) | (1 << 11); + break; + } + ins = MFC1 | f64_lo | FS(float_arg_count); + } else if (*offsets_ptr < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1); + + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) + ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count); + else if (*offsets_ptr < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1); + + float_arg_count--; + break; + default: + if (*offsets_ptr >= 4 * sizeof (sljit_sw)) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr); + else if ((*offsets_ptr >> 2) != word_arg_count - 1) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2)); + else if (*offsets_ptr == 0) + ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); + + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_ARG_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_u32 extra_space = 0; + sljit_ins ins = NOP; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + extra_space = (sljit_u32)type; + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + } else if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); + + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) { + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 2; + + if (extra_space == 0) + return jump; + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u32 extra_space = (sljit_u32)type; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); + + if (src == SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (src != PIC_ADDR_REG) + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + + FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + + /* Register input. */ + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + if (extra_space == 0) + return SLJIT_SUCCESS; + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)); +} diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c new file mode 100644 index 0000000..b9f03a7 --- /dev/null +++ b/src/sljit/sljitNativeMIPS_64.c @@ -0,0 +1,387 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 64-bit arch dependent functions. */ + +static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_s32 src2, sljit_s32 dst) +{ + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG1) | FS(src1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG2) | FS(src2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG2) | T(TMP_REG1) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | T(TMP_REG1) | FS(dst), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (!(op & SLJIT_32)) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + sljit_s32 shift = 32; + sljit_s32 shift2; + sljit_s32 inv = 0; + sljit_ins ins; + sljit_uw uimm; + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; + } + + /* Zero extended number. */ + uimm = (sljit_uw)imm; + if (imm < 0) { + uimm = ~(sljit_uw)imm; + inv = 1; + } + + while (!(uimm & 0xff00000000000000l)) { + shift -= 8; + uimm <<= 8; + } + + if (!(uimm & 0xf000000000000000l)) { + shift -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift -= 2; + uimm <<= 2; + } + + if ((sljit_sw)uimm < 0) { + uimm >>= 1; + shift += 1; + } + SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32)); + + if (inv) + uimm = ~uimm; + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + if (uimm & 0x0000ffff00000000l) + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar)); + + imm &= (1l << shift) - 1; + if (!(imm & ~0xffff)) { + ins = (shift == 32) ? DSLL32 : DSLL; + if (shift < 32) + ins |= SH_IMM(shift); + FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar)); + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); + } + + /* Double shifts needs to be performed. */ + uimm <<= 32; + shift2 = shift - 16; + + while (!(uimm & 0xf000000000000000l)) { + shift2 -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift2 -= 2; + uimm <<= 2; + } + + if (!(uimm & 0x8000000000000000l)) { + shift2--; + uimm <<= 1; + } + + SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16)); + + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar)); + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar)); + + imm &= (1l << shift2) - 1; + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) { + FAIL_IF(push_inst(compiler, DMTC1 | TA(0) | FS(freg), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; + } + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm)); + FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + inst = T(reg) | FS(freg); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | inst, MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | inst, DR(reg))); + +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + if (!(op & SLJIT_32)) + return push_inst(compiler, NOP, UNMOVABLE_INS); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); + inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 6); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = *ins_ptr; + sljit_ins ins = NOP; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + float_arg_count++; + break; + default: + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + ins = MOV_fmt(FMT_D) | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + ins = MOV_fmt(FMT_S) | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count) + ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); + else if (arg_count == 1) + ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4); + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_ARG_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins = NOP; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25); + + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + if (!(type & SLJIT_CALL_RETURN)) { + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 6; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = NOP; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG1); + + if (src == SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (src != PIC_ADDR_REG) + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + if (!(type & SLJIT_CALL_RETURN)) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c new file mode 100644 index 0000000..88eb30b --- /dev/null +++ b/src/sljit/sljitNativeMIPS_common.c @@ -0,0 +1,4259 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Latest MIPS architecture. */ + +#ifdef HAVE_PRCTL +#include +#endif + +#if !defined(__mips_hard_float) || defined(__mips_single_float) +/* Disable automatic detection, covers both -msoft-float and -mno-float */ +#define SLJIT_IS_FPU_AVAILABLE 0 +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R6" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R6" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 5) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R5" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R5" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R2" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R2" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R1" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#else /* SLJIT_MIPS_REV < 1 */ + return "MIPS III" SLJIT_CPUINFO; +#endif /* SLJIT_MIPS_REV >= 6 */ +} + +/* Length of an instruction word + Both for mips-32 and mips-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) + +/* For position independent code, t9 must contain the function address. */ +#define PIC_ADDR_REG TMP_REG1 + +/* Floating point status register. */ +#define FCSR_REG 31 +/* Return address register. */ +#define RETURN_ADDR_REG 31 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 3 +#define OTHER_FLAG 1 + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 25, 4, 31, 3, 1 +}; + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) +#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) << 1) + 1] = { + 0, + 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, + 12, 10, 16, + 1, 15, 3, 5, 7, 9, 19, 31, 29, 27, 25, 23, 21, + 13, 11, 17 +}; + +#else /* !SLJIT_CONFIG_MIPS_32 */ + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 +}; + +#endif /* SLJIT_CONFIG_MIPS_32 */ + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define T(t) ((sljit_ins)reg_map[t] << 16) +#define D(d) ((sljit_ins)reg_map[d] << 11) +#define FT(t) ((sljit_ins)freg_map[t] << 16) +#define FS(s) ((sljit_ins)freg_map[s] << 11) +#define FD(d) ((sljit_ins)freg_map[d] << 6) +/* Absolute registers. */ +#define SA(s) ((sljit_ins)(s) << 21) +#define TA(t) ((sljit_ins)(t) << 16) +#define DA(d) ((sljit_ins)(d) << 11) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define SH_IMM(imm) ((sljit_ins)(imm) << 6) + +#define DR(dr) (reg_map[dr]) +#define FR(dr) (freg_map[dr]) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +/* CMP.cond.fmt */ +/* S = (20 << 21) D = (21 << 21) */ +#define CMP_FMT_S (20 << 21) +#endif /* SLJIT_MIPS_REV >= 6 */ +/* S = (16 << 21) D = (17 << 21) */ +#define FMT_S (16 << 21) +#define FMT_D (17 << 21) + +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) +#define ADDIU (HI(9)) +#define ADDU (HI(0) | LO(33)) +#define AND (HI(0) | LO(36)) +#define ANDI (HI(12)) +#define B (HI(4)) +#define BAL (HI(1) | (17 << 16)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define BC1EQZ (HI(17) | (9 << 21) | FT(TMP_FREG3)) +#define BC1NEZ (HI(17) | (13 << 21) | FT(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define BC1F (HI(17) | (8 << 21)) +#define BC1T (HI(17) | (8 << 21) | (1 << 16)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define BEQ (HI(4)) +#define BGEZ (HI(1) | (1 << 16)) +#define BGTZ (HI(7)) +#define BLEZ (HI(6)) +#define BLTZ (HI(1) | (0 << 16)) +#define BNE (HI(5)) +#define BREAK (HI(0) | LO(13)) +#define CFC1 (HI(17) | (2 << 21)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define C_EQ_S (HI(17) | CMP_FMT_S | LO(2)) +#define C_OLE_S (HI(17) | CMP_FMT_S | LO(6)) +#define C_OLT_S (HI(17) | CMP_FMT_S | LO(4)) +#define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3)) +#define C_ULE_S (HI(17) | CMP_FMT_S | LO(7)) +#define C_ULT_S (HI(17) | CMP_FMT_S | LO(5)) +#define C_UN_S (HI(17) | CMP_FMT_S | LO(1)) +#define C_FD (FD(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define C_EQ_S (HI(17) | FMT_S | LO(50)) +#define C_OLE_S (HI(17) | FMT_S | LO(54)) +#define C_OLT_S (HI(17) | FMT_S | LO(52)) +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_FD (0) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define CVT_S_S (HI(17) | FMT_S | LO(32)) +#define DADDIU (HI(25)) +#define DADDU (HI(0) | LO(45)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DDIV (HI(0) | (2 << 6) | LO(30)) +#define DDIVU (HI(0) | (2 << 6) | LO(31)) +#define DMOD (HI(0) | (3 << 6) | LO(30)) +#define DMODU (HI(0) | (3 << 6) | LO(31)) +#define DIV (HI(0) | (2 << 6) | LO(26)) +#define DIVU (HI(0) | (2 << 6) | LO(27)) +#define DMUH (HI(0) | (3 << 6) | LO(28)) +#define DMUHU (HI(0) | (3 << 6) | LO(29)) +#define DMUL (HI(0) | (2 << 6) | LO(28)) +#define DMULU (HI(0) | (2 << 6) | LO(29)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DDIV (HI(0) | LO(30)) +#define DDIVU (HI(0) | LO(31)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) +#define DMULT (HI(0) | LO(28)) +#define DMULTU (HI(0) | LO(29)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define DIV_S (HI(17) | FMT_S | LO(3)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define DINSU (HI(31) | LO(6)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#define DMFC1 (HI(17) | (1 << 21)) +#define DMTC1 (HI(17) | (5 << 21)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define DROTR (HI(0) | (1 << 21) | LO(58)) +#define DROTR32 (HI(0) | (1 << 21) | LO(62)) +#define DROTRV (HI(0) | (1 << 6) | LO(22)) +#define DSBH (HI(31) | (2 << 6) | LO(36)) +#define DSHD (HI(31) | (5 << 6) | LO(36)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#define DSLL (HI(0) | LO(56)) +#define DSLL32 (HI(0) | LO(60)) +#define DSLLV (HI(0) | LO(20)) +#define DSRA (HI(0) | LO(59)) +#define DSRA32 (HI(0) | LO(63)) +#define DSRAV (HI(0) | LO(23)) +#define DSRL (HI(0) | LO(58)) +#define DSRL32 (HI(0) | LO(62)) +#define DSRLV (HI(0) | LO(22)) +#define DSUBU (HI(0) | LO(47)) +#define J (HI(2)) +#define JAL (HI(3)) +#define JALR (HI(0) | LO(9)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define JR (HI(0) | LO(9)) +#else /* SLJIT_MIPS_REV < 6 */ +#define JR (HI(0) | LO(8)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define LD (HI(55)) +#define LDL (HI(26)) +#define LDR (HI(27)) +#define LDC1 (HI(53)) +#define LUI (HI(15)) +#define LW (HI(35)) +#define LWL (HI(34)) +#define LWR (HI(38)) +#define LWC1 (HI(49)) +#define MFC1 (HI(17)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define MFHC1 (HI(17) | (3 << 21)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MOD (HI(0) | (3 << 6) | LO(26)) +#define MODU (HI(0) | (3 << 6) | LO(27)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MFHI (HI(0) | LO(16)) +#define MFLO (HI(0) | LO(18)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MTC1 (HI(17) | (4 << 21)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define MTHC1 (HI(17) | (7 << 21)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MUH (HI(0) | (3 << 6) | LO(24)) +#define MUHU (HI(0) | (3 << 6) | LO(25)) +#define MUL (HI(0) | (2 << 6) | LO(24)) +#define MULU (HI(0) | (2 << 6) | LO(25)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MUL_S (HI(17) | FMT_S | LO(2)) +#define NEG_S (HI(17) | FMT_S | LO(7)) +#define NOP (HI(0) | LO(0)) +#define NOR (HI(0) | LO(39)) +#define OR (HI(0) | LO(37)) +#define ORI (HI(13)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define ROTR (HI(0) | (1 << 21) | LO(2)) +#define ROTRV (HI(0) | (1 << 6) | LO(6)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#define SD (HI(63)) +#define SDL (HI(44)) +#define SDR (HI(45)) +#define SDC1 (HI(61)) +#define SLT (HI(0) | LO(42)) +#define SLTI (HI(10)) +#define SLTIU (HI(11)) +#define SLTU (HI(0) | LO(43)) +#define SLL (HI(0) | LO(0)) +#define SLLV (HI(0) | LO(4)) +#define SRL (HI(0) | LO(2)) +#define SRLV (HI(0) | LO(6)) +#define SRA (HI(0) | LO(3)) +#define SRAV (HI(0) | LO(7)) +#define SUB_S (HI(17) | FMT_S | LO(1)) +#define SUBU (HI(0) | LO(35)) +#define SW (HI(43)) +#define SWL (HI(42)) +#define SWR (HI(46)) +#define SWC1 (HI(57)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define WSBH (HI(31) | (2 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#define XOR (HI(0) | LO(38)) +#define XORI (HI(14)) + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#define CLZ (HI(28) | LO(32)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DCLZ (LO(18)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DCLZ (HI(28) | LO(36)) +#define MOVF (HI(0) | (0 << 16) | LO(1)) +#define MOVF_S (HI(17) | FMT_S | (0 << 16) | LO(17)) +#define MOVN (HI(0) | LO(11)) +#define MOVN_S (HI(17) | FMT_S | LO(19)) +#define MOVT (HI(0) | (1 << 16) | LO(1)) +#define MOVT_S (HI(17) | FMT_S | (1 << 16) | LO(17)) +#define MOVZ (HI(0) | LO(10)) +#define MOVZ_S (HI(17) | FMT_S | LO(18)) +#define MUL (HI(28) | LO(2)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define PREF (HI(51)) +#define PREFX (HI(19) | LO(15)) +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#define SEB (HI(31) | (16 << 6) | LO(32)) +#define SEH (HI(31) | (24 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 2 */ +#endif /* SLJIT_MIPS_REV >= 1 */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ADDU_W ADDU +#define ADDIU_W ADDIU +#define SLL_W SLL +#define SRA_W SRA +#define SUBU_W SUBU +#define STORE_W SW +#define LOAD_W LW +#else +#define ADDU_W DADDU +#define ADDIU_W DADDIU +#define SLL_W DSLL +#define SRA_W DSRA +#define SUBU_W DSUBU +#define STORE_W SD +#define LOAD_W LD +#endif + +#define MOV_fmt(f) (HI(17) | f | LO(6)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +#define CPU_FEATURE_DETECTED (1 << 0) +#define CPU_FEATURE_FPU (1 << 1) +#define CPU_FEATURE_FP64 (1 << 2) +#define CPU_FEATURE_FR (1 << 3) + +static sljit_u32 cpu_feature_list = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32) +{ + if (compiler->scratches == -1) + return 0; + + if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0)) + fr -= SLJIT_F64_SECOND(0); + + return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches)) + || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) + || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); +} + +#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */ + +static void get_cpu_features(void) +{ +#if !defined(SLJIT_IS_FPU_AVAILABLE) && defined(__GNUC__) + sljit_u32 fir = 0; +#endif /* !SLJIT_IS_FPU_AVAILABLE && __GNUC__ */ + sljit_u32 feature_list = CPU_FEATURE_DETECTED; + +#if defined(SLJIT_IS_FPU_AVAILABLE) +#if SLJIT_IS_FPU_AVAILABLE + feature_list |= CPU_FEATURE_FPU; +#if SLJIT_IS_FPU_AVAILABLE == 64 + feature_list |= CPU_FEATURE_FP64; +#endif /* SLJIT_IS_FPU_AVAILABLE == 64 */ +#endif /* SLJIT_IS_FPU_AVAILABLE */ +#elif defined(__GNUC__) + __asm__ ("cfc1 %0, $0" : "=r"(fir)); + if ((fir & (0x3 << 16)) == (0x3 << 16)) + feature_list |= CPU_FEATURE_FPU; + +#if (defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64) \ + && (!defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV < 2) + if ((feature_list & CPU_FEATURE_FPU)) + feature_list |= CPU_FEATURE_FP64; +#else /* SLJIT_CONFIG_MIPS32 || SLJIT_MIPS_REV >= 2 */ + if ((fir & (1 << 22))) + feature_list |= CPU_FEATURE_FP64; +#endif /* SLJIT_CONFIG_MIPS_64 && SLJIT_MIPS_REV < 2 */ +#endif /* SLJIT_IS_FPU_AVAILABLE */ + + if ((feature_list & CPU_FEATURE_FPU) && (feature_list & CPU_FEATURE_FP64)) { +#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32 +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 6 + feature_list |= CPU_FEATURE_FR; +#elif defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 0 +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 5 + feature_list |= CPU_FEATURE_FR; +#endif /* SLJIT_MIPS_REV >= 5 */ +#else + sljit_s32 flag = -1; +#ifndef FR_GET_FP_MODE + sljit_f64 zero = 0.0; +#else /* PR_GET_FP_MODE */ + flag = prctl(PR_GET_FP_MODE); + + if (flag > 0) + feature_list |= CPU_FEATURE_FR; +#endif /* FP_GET_PR_MODE */ +#if ((defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 2) \ + || (!defined(PR_GET_FP_MODE) && (!defined(SLJIT_DETECT_FR) || SLJIT_DETECT_FR >= 1))) \ + && (defined(__GNUC__) && (defined(__mips) && __mips >= 2)) + if (flag < 0) { + __asm__ (".set oddspreg\n" + "lwc1 $f17, %0\n" + "ldc1 $f16, %1\n" + "swc1 $f17, %0\n" + : "+m" (flag) : "m" (zero) : "$f16", "$f17"); + if (flag) + feature_list |= CPU_FEATURE_FR; + } +#endif /* (!PR_GET_FP_MODE || (PR_GET_FP_MODE && SLJIT_DETECT_FR == 2)) && __GNUC__ */ +#endif /* SLJIT_MIPS_REV >= 6 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ + /* StatusFR=1 is the only mode supported by the code in MIPS64 */ + feature_list |= CPU_FEATURE_FR; +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + cpu_feature_list = feature_list; +} + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS + || (sljit_ins)delay_slot == ((ins >> 11) & 0x1f) + || (sljit_ins)delay_slot == ((ins >> 16) & 0x1f)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) +{ + if (flags & IS_BIT26_COND) + return (1 << 26); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + if (flags & IS_BIT23_COND) + return (1 << 23); +#endif /* SLJIT_MIPS_REV >= 6 */ + return (1 << 16); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + inst = (sljit_ins *)jump->addr; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + goto exit; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + if (jump->flags & IS_COND) + inst--; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (jump->flags & IS_CALL) + goto preserve_addr; +#endif + + /* B instructions. */ + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? BAL : B; + jump->addr -= sizeof(sljit_ins); + return inst; + } + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = saved_inst ^ invert_branch(jump->flags); + jump->addr -= 2 * sizeof(sljit_ins); + return inst; + } + } else { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = (jump->flags & IS_JAL) ? BAL : B; + /* Keep inst[1] */ + return inst + 1; + } + inst[0] ^= invert_branch(jump->flags); + inst[1] = NOP; + jump->addr -= sizeof(sljit_ins); + return inst + 1; + } + } + + if (jump->flags & IS_COND) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = (saved_inst & 0xffff0000) | 3; + inst[1] = J; + inst[2] = NOP; + return inst + 2; + } + else if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & 0xffff0000) | 3; + inst[1] = NOP; + inst[2] = J; + inst[3] = NOP; + jump->addr += sizeof(sljit_ins); + return inst + 3; + } + } + else { + /* J instuctions. */ + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == (jump->addr & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? JAL : J; + jump->addr -= sizeof(sljit_ins); + return inst; + } + + if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (jump->flags & IS_JAL) ? JAL : J; + /* Keep inst[1] */ + return inst + 1; + } + } + + if (jump->flags & IS_COND) + inst++; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +preserve_addr: + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + if (jump->flags & IS_COND) + inst[-1] -= 4; + + inst[2] = inst[0]; + inst[3] = inst[1]; + return inst + 3; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + if (jump->flags & IS_COND) + inst[-1] -= 2; + + inst[4] = inst[0]; + inst[5] = inst[1]; + return inst + 5; + } +#endif + +exit: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + inst[2] = inst[0]; + inst[3] = inst[1]; + return inst + 3; +#else + inst[6] = inst[0]; + inst[7] = inst[1]; + return inst + 7; +#endif +} + +#ifdef __GNUC__ +static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) +{ + SLJIT_CACHE_FLUSH(code, code_ptr); +} +#endif + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + jump->flags |= PATCH_ABS32; + return 1; + } + + if (addr < 0x800000000000l) { + jump->flags |= PATCH_ABS48; + return 3; + } + + return 5; +} + +#endif /* SLJIT_CONFIG_MIPS_64 */ + +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump) +{ + sljit_uw flags = jump->flags; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : PIC_ADDR_REG; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + ins[0] = LUI | T(reg) | IMM(addr >> 16); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr < 0x80000000l); + ins[0] = LUI | T(reg) | IMM(addr >> 16); + } + else if (flags & PATCH_ABS48) { + SLJIT_ASSERT(addr < 0x800000000000l); + ins[0] = LUI | T(reg) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 2; + } + else { + ins[0] = LUI | T(reg) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); + ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + ins[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); + ins += 4; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + ins[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + sljit_uw addr; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + word_count += 2; +#else /* !SLJIT_CONFIG_MIPS_32 */ + word_count += 6; +#endif /* SLJIT_CONFIG_MIPS_32 */ + jump->addr = (sljit_uw)(code_ptr - 1); + code_ptr = detect_jump_type(jump, code, executable_offset); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + code_ptr += 1; + word_count += 1; +#else /* !SLJIT_CONFIG_MIPS_32 */ + code_ptr += mov_addr_get_length(jump, code, executable_offset); + word_count += 5; +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_uw)((sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) - sizeof(sljit_ins)) >> 2); + SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((sljit_ins)addr & 0xffff); + break; + } + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~(sljit_uw)0xfffffff) + == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)); + buf_ptr[0] |= (sljit_ins)(addr >> 2) & 0x03ffffff; + break; + } + + load_addr_to_reg(jump); + } while (0); + + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + +#ifndef __GNUC__ + SLJIT_CACHE_FLUSH(code, code_ptr); +#else + /* GCC workaround for invalid code generation with -O2. */ + sljit_cache_flush(code, code_ptr); +#endif + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && (!defined(SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE) + case SLJIT_HAS_F64_AS_F32_PAIR: + if (!cpu_feature_list) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_FR) != 0; +#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_IS_FPU_AVAILABLE */ + case SLJIT_HAS_FPU: + if (!cpu_feature_list) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_FPU) != 0; + case SLJIT_HAS_ZERO_REGISTER: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + return 1; +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_CTZ: + return 2; +#endif /* SLJIT_MIPS_REV >= 1 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + return 1; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define LOGICAL_OP 0x00100 +#define IMM_OP 0x00200 +#define MOVE_OP 0x00400 +#define SRC2_IMM 0x00800 + +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SELECT_OP(d, w) (w) +#else +#define SELECT_OP(d, w) (!(op & SLJIT_32) ? (d) : (w)) +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_ins base; + sljit_s32 i, tmp, offset; + sljit_s32 arg_count, word_arg_count, float_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + } + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; +#endif + compiler->local_size = local_size; + + offset = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (!(options & SLJIT_ENTER_REG_ARG)) { + tmp = arg_types >> SLJIT_ARG_SHIFT; + arg_count = 0; + + while (tmp) { + offset = arg_count; + if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { + if ((arg_count & 0x1) != 0) + arg_count++; + arg_count++; + } + + arg_count++; + tmp >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = (sljit_uw)arg_count << 2; + offset = (offset >= 4) ? (offset << 2) : 0; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (local_size + offset <= -SIMM_MIN) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); + offset = local_size - SSIZE_OF(sw); + } else { + FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); + base = S(TMP_REG1); + offset = -SSIZE_OF(sw); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + local_size = 0; +#endif + } + + FAIL_IF(push_inst(compiler, STORE_W | base | TA(RETURN_ADDR_REG) | IMM(offset), UNMOVABLE_INS)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STORE_W | base | T(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STORE_W | base | T(i) | IMM(offset), MOVABLE_INS)); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + arg_count = 0; + word_arg_count = 0; + float_arg_count = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* The first maximum two floating point arguments are passed in floating point + registers if no integer argument precedes them. The first 16 byte data is + passed in four integer registers, the rest is placed onto the stack. + The floating point registers are also part of the first 16 byte data, so + their corresponding integer registers are not used when they are present. */ + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if ((arg_count & 0x1) != 0) + arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) { + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MTHC1 | TA(5 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + break; + } + } else + FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count + 1 || arg_count == 0) + tmp = word_arg_count; + else + break; + + if (arg_count < 4) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp))); + else + FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp))); + break; + } + arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(compiler->args_size == (sljit_uw)arg_count << 2); +#else /* !SLJIT_CONFIG_MIPS_32 */ + while (arg_types) { + arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count || word_arg_count <= 1) + tmp = word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp))); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + } + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; +#endif + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr) +{ + sljit_s32 local_size, i, tmp, offset; + sljit_s32 load_return_addr = (frame_size == 0); + sljit_s32 scratches = compiler->scratches; + sljit_s32 saveds = compiler->saveds; + sljit_s32 fsaveds = compiler->fsaveds; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0xf) == 0); + frame_size &= ~0xf; + + local_size = compiler->local_size; + + tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds - kept_saveds_count, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((tmp & SSIZE_OF(sw)) != 0) + tmp += SSIZE_OF(sw); + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + } +#else + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); +#endif + + if (local_size <= SIMM_MAX) { + if (local_size < frame_size) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size), DR(SLJIT_SP))); + local_size = frame_size; + } + } else { + if (tmp < frame_size) + tmp = frame_size; + + FAIL_IF(load_immediate(compiler, DR(TMP_REG2), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG2) | D(SLJIT_SP), DR(SLJIT_SP))); + local_size = tmp; + } + + SLJIT_ASSERT(local_size >= frame_size); + + offset = local_size - SSIZE_OF(sw); + if (load_return_addr) + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | TA(RETURN_ADDR_REG) | IMM(offset), RETURN_ADDR_REG)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - kept_saveds_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + if (local_size > frame_size) + *ins_ptr = ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size); + else + *ins_ptr = NOP; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + emit_stack_frame_release(compiler, 0, &ins); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1, &ins)); + + if (src != SLJIT_IMM) { + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); + } + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* u b s */ HI(40) /* sb */, +/* u b l */ HI(36) /* lbu */, +/* u h s */ HI(41) /* sh */, +/* u h l */ HI(37) /* lhu */, +/* u i s */ HI(43) /* sw */, +/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), + +/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* s b s */ HI(40) /* sb */, +/* s b l */ HI(32) /* lb */, +/* s h s */ HI(41) /* sh */, +/* s h l */ HI(33) /* lh */, +/* s i s */ HI(43) /* sw */, +/* s i l */ HI(35) /* lw */, + +/* d s */ HI(61) /* sdc1 */, +/* d l */ HI(53) /* ldc1 */, +/* s s */ HI(57) /* swc1 */, +/* s l */ HI(49) /* lwc1 */, +}; + +#undef ARCH_32_64 + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK) + | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xffff) + (((argw) & 0x8000) ? 0x10000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + sljit_sw offset, argw_hi; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(argw - compiler->cache_argw), delay_slot); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + offset = argw - compiler->cache_argw; + } else { + compiler->cache_arg = SLJIT_MEM; + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw_hi)); + compiler->cache_argw = argw_hi; + offset = argw & 0xffff; + argw = argw_hi; + } + } + + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); + } + + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(offset), delay_slot); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar)); + FAIL_IF(push_inst(compiler, ADDU_W | SA(tmp_ar) | T(base) | DA(tmp_ar), tmp_ar)); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + FAIL_IF(load_immediate(compiler, tmp_ar, TO_ARGW_HI(argw))); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADDU_W | SA(tmp_ar) | T(base) | DA(tmp_ar), tmp_ar)); + + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(argw), delay_slot); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ + op_imm = (imm); \ + op_v = (v); + +#else /* !SLJIT_CONFIG_MIPS_32 */ + + +#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ + op_dimm = (dimm); \ + op_dimm32 = (dimm32); \ + op_imm = (imm); \ + op_dv = (dv); \ + op_v = (v); + +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV < 1) + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ + sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_ins word_size = 32; +#endif /* SLJIT_CONFIG_MIPS_64 */ + + /* The TMP_REG2 is the next value. */ + if (src != TMP_REG2) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(is_clz ? 13 : 14), UNMOVABLE_INS)); + /* The OTHER_FLAG is the counter. Delay slot. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(word_size), OTHER_FLAG)); + + if (!is_clz) { + FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG2) | T(TMP_REG1) | IMM(1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BNE | S(TMP_REG1) | TA(0) | IMM(11), UNMOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, BLTZ | S(TMP_REG2) | TA(0) | IMM(11), UNMOVABLE_INS)); + + /* Delay slot. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(0), OTHER_FLAG)); + + /* The TMP_REG1 is the next shift. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(word_size), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(TMP_REG2) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, (is_clz ? SELECT_OP(DSRLV, SRLV) : SELECT_OP(DSLLV, SLLV)) | S(TMP_REG1) | TA(EQUAL_FLAG) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, BNE | S(TMP_REG2) | TA(0) | IMM(-4), UNMOVABLE_INS)); + /* Delay slot. */ + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(TMP_REG1) | T(TMP_REG2) | IMM(-1), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (is_clz ? SELECT_OP(DSRLV, SRLV) : SELECT_OP(DSLLV, SLLV)) | S(TMP_REG2) | TA(EQUAL_FLAG) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(-7), UNMOVABLE_INS)); + /* Delay slot. */ + FAIL_IF(push_inst(compiler, OR | SA(OTHER_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG)); + + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | SA(OTHER_FLAG) | TA(0) | D(dst), DR(dst)); +} + +#endif /* SLJIT_MIPS_REV < 1 */ + +static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + int is_32 = (op & SLJIT_32); +#endif /* SLJIT_CONFIG_MIPS_64 */ + + op = GET_OPCODE(op); +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + if (!is_32 && (op == SLJIT_REV)) { + FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); + return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst)); + } + if (op != SLJIT_REV && src != TMP_REG2) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1))); + src = TMP_REG1; + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, ROTR | T(dst) | D(dst) | SH_IMM(16), DR(dst))); +#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64 + if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) + FAIL_IF(push_inst(compiler, DINSU | T(dst) | SA(0) | (31 << 11), DR(dst))); +#endif /* SLJIT_CONFIG_MIPS_64 */ +#else /* SLJIT_MIPS_REV < 2 */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (!is_32) { + FAIL_IF(push_inst(compiler, DSRL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ORI | SA(0) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG)); + FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(dst) | SH_IMM(0), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL32 | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(0), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); + + FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG)); + FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, DSLL | TA(OTHER_FLAG) | DA(EQUAL_FLAG) | SH_IMM(8), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, XOR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); + + FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(8), DR(dst))); + return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); + } + + if (op != SLJIT_REV && src != TMP_REG2) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(0), DR(TMP_REG2))); + src = TMP_REG2; + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + + FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, LUI | TA(OTHER_FLAG) | 0xff, OTHER_FLAG)); + FAIL_IF(push_inst(compiler, SLL | T(src) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xff, OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); + + FAIL_IF(push_inst(compiler, SRL | T(dst) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, AND | S(TMP_REG1) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SLL | T(dst) | D(dst) | SH_IMM(8), DR(dst))); + FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst))); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) { + FAIL_IF(push_inst(compiler, DSLL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst))); + FAIL_IF(push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst))); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ +#endif /* SLJIT_MIPR_REV >= 2 */ + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 +#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32 + FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst))); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst))); +#endif /* SLJIT_CONFIG_MIPS_32 */ + if (GET_OPCODE(op) == SLJIT_REV_U16) + return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst)); + else + return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 2 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst))); + FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG1) | T(TMP_REG1) | 0xff, DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SELECT_OP(DSRL32, SRL) : SELECT_OP(DSRA32, SRA)) | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ +} + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled, reg; + sljit_ins op_imm, op_v; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_ins ins, op_dimm, op_dimm32, op_dv; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 2 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 2 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_MIPS_64 */ + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 6 */ + return push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, AND | S(src2) | T(TMP_REG1) | D(dst), DR(dst))); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(dst) | D(dst), DR(dst))); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(dst) | T(dst) | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(TMP_REG1) | IMM(SELECT_OP(-64, -32)), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(SELECT_OP(26, 27)), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + case SLJIT_CLZ: + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_clz_ctz(compiler, op, dst, src2); +#endif /* SLJIT_MIPS_REV >= 1 */ + + case SLJIT_REV: + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && src2 != TMP_REG1 && dst != TMP_REG1); + return emit_rev(compiler, op, dst, src2); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_rev16(compiler, op, dst, src2); + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_ar = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(OTHER_FLAG), OTHER_FLAG)); + carry_src_ar = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(OTHER_FLAG), OTHER_FLAG)); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_ADDC: + carry_src_ar = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + carry_src_ar = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (carry_src_ar == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(reg) | IMM(src2), DR(reg))); + src2 = reg; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_GREATER: + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_LESS: + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_GREATER: + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (op & SLJIT_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + if (!(flags & LOGICAL_OP)) { + SLJIT_ASSERT((flags & SRC2_IMM) && src2 == -1); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + } + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); + break; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_ROTL: + if ((flags & SRC2_IMM) || src2 == 0) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + src2 = -src2 & 0x1f; +#else /* !SLJIT_CONFIG_MIPS_32 */ + src2 = -src2 & ((op & SLJIT_32) ? 0x1f : 0x3f); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } else { + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG2), DR(TMP_REG2))); + src2 = TMP_REG2; + } + /* fallthrough */ + + case SLJIT_ROTR: + EMIT_SHIFT(DROTR, DROTR32, ROTR, DROTRV, ROTRV); + break; +#else /* SLJIT_MIPS_REV < 1 */ + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (!(op & SLJIT_32)) { + if (GET_OPCODE(op) == SLJIT_ROTL) + op_imm = ((src2 < 32) ? DSLL : DSLL32); + else + op_imm = ((src2 < 32) ? DSRL : DSRL32); + + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(OTHER_FLAG) | (((sljit_ins)src2 & 0x1f) << 6), OTHER_FLAG)); + + src2 = 64 - src2; + if (GET_OPCODE(op) == SLJIT_ROTL) + op_imm = ((src2 < 32) ? DSRL : DSRL32); + else + op_imm = ((src2 < 32) ? DSLL : DSLL32); + + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | (((sljit_ins)src2 & 0x1f) << 6), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL; + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(OTHER_FLAG) | ((sljit_ins)src2 << 6), OTHER_FLAG)); + + src2 = 32 - src2; + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL; + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | (((sljit_ins)src2 & 0x1f) << 6), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } + + if (src2 == 0) { + if (dst != src1) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (!(op & SLJIT_32)) { + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? DSLLV : DSRLV; + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? DSRLV : DSLLV; + FAIL_IF(push_inst(compiler, op_v | SA(EQUAL_FLAG) | T(src1) | D(dst), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLV : SRLV; + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLV : SLLV; + FAIL_IF(push_inst(compiler, op_v | SA(EQUAL_FLAG) | T(src1) | D(dst), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if ((flags & SRC2_IMM) || src2 == 0) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if ((flags & SRC2_IMM) || src2 == 0) { + if (src2 >= 32) { + SLJIT_ASSERT(!(op & SLJIT_32)); + ins = op_dimm32; + src2 -= 32; + } + else + ins = (op & SLJIT_32) ? op_imm : op_dimm; + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst)); + } + + ins = (op & SLJIT_32) ? op_v : op_dv; + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +} + +#define CHECK_IMM(flags, srcw) \ + ((!((flags) & LOGICAL_OP) && ((srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)) \ + || (((flags) & LOGICAL_OP) && !((srcw) & ~UIMM_MAX))) + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == 0) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + dst = TMP_REG2; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + src2_tmp_reg = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if (src2 == SLJIT_IMM && src2w != 0 && CHECK_IMM(flags, src2w)) { + flags |= SRC2_IMM; + src2_r = src2w; + } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && CHECK_IMM(flags, src1w)) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 == SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } + else if (src2 == SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); + src2_r = src2_tmp_reg; + } + else { + src2_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = src2_tmp_reg; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if ((flags & SLOW_DEST) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +#undef CHECK_IMM + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 int_op = op & SLJIT_32; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULU : MUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MUHU : MUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } + else { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DMODU : DMOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if !(defined SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw <= SIMM_MAX && srcw >= SIMM_MIN) + return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + srcw &= 0x3; + + if (SLJIT_UNLIKELY(srcw != 0)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1))); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS); +} +#endif /* SLJIT_MIPS_REV >= 1 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 == SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 == SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_XOR: + if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) { + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + /* fallthrough */ + case SLJIT_AND: + case SLJIT_OR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (src2 == SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 == SLJIT_IMM) { + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; + } +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); +} + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SELECT_OP3(op, src2w, D, D32, W) (((op & SLJIT_32) ? (W) : ((src2w) < 32) ? (D) : (D32)) | (((sljit_ins)src2w & 0x1f) << 6)) +#else /* !SLJIT_CONFIG_MIPS_64 */ +#define SELECT_OP3(op, src2w, D, D32, W) ((W) | ((sljit_ins)(src2w) << 6)) +#endif /* SLJIT_CONFIG_MIPS_64 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst_reg) | T(TMP_REG2) | D(dst_reg), DR(dst_reg)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_MIPS_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) { + ins1 = SELECT_OP3(op, src3w, DSLL, DSLL32, SLL); + src3w = bit_length - src3w; + ins2 = SELECT_OP3(op, src3w, DSRL, DSRL32, SRL); + } else { + ins1 = SELECT_OP3(op, src3w, DSRL, DSRL32, SRL); + src3w = bit_length - src3w; + ins2 = SELECT_OP3(op, src3w, DSLL, DSLL32, SLL); + } + + FAIL_IF(push_inst(compiler, ins1 | T(src1_reg) | D(dst_reg), DR(dst_reg))); + FAIL_IF(push_inst(compiler, ins2 | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1))); + return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src3, src3w)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + src3 = TMP_REG2; + } + + if (is_left) { + ins1 = SELECT_OP(DSRL, SRL); + ins2 = SELECT_OP(DSLLV, SLLV); + ins3 = SELECT_OP(DSRLV, SRLV); + } else { + ins1 = SELECT_OP(DSLL, SLL); + ins2 = SELECT_OP(DSRLV, SRLV); + ins3 = SELECT_OP(DSLLV, SLLV); + } + + FAIL_IF(push_inst(compiler, ins2 | S(src3) | T(src1_reg) | D(dst_reg), DR(dst_reg))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins1 | T(src2_reg) | D(TMP_REG1) | (1 << 6), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src3) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2))); + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1))); + return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg)); +} + +#undef SELECT_OP3 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG)); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return emit_prefetch(compiler, src, srcw); +#else /* SLJIT_MIPS_REV < 1 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_ar = RETURN_ADDR_REG; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_ar = DR(FAST_IS_REG(dst) ? dst : TMP_REG2); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_ar, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw))); + break; + } + + if (dst & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw)); + + if (op == SLJIT_FAST_ENTER) + compiler->delay_slot = UNMOVABLE_INS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; + + return FR(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) (FMT_S | (~(sljit_ins)op & SLJIT_32) << (21 - (5 + 3))) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_u32 flags = 0; +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (FAST_IS_REG(dst)) { + FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; + } + + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_u32 flags = 0; +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; +#endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + else { + if (src == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_u32 flags = 0; +#else + sljit_u32 flags = 1 << 21; +#endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW ? WORD_DATA : INT_DATA) | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw)); + src = TMP_REG1; + } else if (src == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + } + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src != TMP_REG1) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, DSRL32 | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1))); + } + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + } +#else /* !SLJIT_CONFIG_MIPS_64 */ + if (!(op & SLJIT_32)) { + FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG2) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | 1 | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BGEZ | S(src) | 5, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BGEZ | S(src) | 4, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + + FAIL_IF(push_inst(compiler, LUI | T(TMP_REG2) | IMM(0x41e0), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, MTC1 | TA(0) | FS(TMP_FREG2), UNMOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(TMP_FREG2), UNMOVABLE_INS)); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(TMP_FREG2) | (1 << 11), UNMOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + break; + } + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(TMP_FREG2) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BLTZ | S(src) | 5, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BLTZ | S(src) | 4, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, ANDI | S(src) | T(TMP_REG2) | IMM(1), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, BEQ | 6, UNMOVABLE_INS)); +#else /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, BEQ | 5, UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV < 1 */ + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, DSRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + + FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(dst_r) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + inst = C_EQ_S; + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + inst = C_UEQ_S; + break; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + inst = C_OLT_S; + break; + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS: + inst = C_ULT_S; + break; + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + inst = C_ULE_S; + break; + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + inst = C_OLE_S; + break; + default: + SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED); + inst = C_UN_S; + break; + } + return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1) | C_FD, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(dst_r), src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, CVT_S_S | (sljit_ins)((op & SLJIT_32) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(dst_r), dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_COPYSIGN_F64: + return emit_copysign(compiler, op, src1, src2, dst_r); + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG2), dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, MTC1 | TA(0) | FS(freg), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm)); + return push_inst(compiler, MTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define BRANCH_LENGTH 4 +#else +#define BRANCH_LENGTH 8 +#endif + +#define BR_Z(src) \ + inst = BEQ | SA(src) | TA(0) | BRANCH_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_NZ(src) \ + inst = BNE | SA(src) | TA(0) | BRANCH_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#define BR_T() \ + inst = BC1NEZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1EQZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; + +#else /* SLJIT_MIPS_REV < 6 */ + +#define BR_T() \ + inst = BC1T | BRANCH_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1F | BRANCH_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; + +#endif /* SLJIT_MIPS_REV >= 6 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_u32 flags = 0; + sljit_s32 delay_check = UNMOVABLE_INS; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + BR_Z(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + BR_NZ(OTHER_FLAG); + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + BR_T(); + break; + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: + BR_F(); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) + jump->flags |= IS_MOVABLE; + + if (inst) + PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); + + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + else { + jump->flags |= IS_JAL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + return jump; +} + +#define RESOLVE_IMM1() \ + if (src1 == SLJIT_IMM) { \ + if (src1w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ + src1 = TMP_REG1; \ + } \ + else \ + src1 = 0; \ + } + +#define RESOLVE_IMM2() \ + if (src2 == SLJIT_IMM) { \ + if (src2w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); \ + src2 = src2_tmp_reg; \ + } \ + else \ + src2 = 0; \ + } + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_MIPS_32 */ + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(src2_tmp_reg), src2, src2w, 0, 0)); + src2 = src2_tmp_reg; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type <= SLJIT_NOT_EQUAL) { + RESOLVE_IMM1(); + RESOLVE_IMM2(); + jump->flags |= IS_BIT26_COND; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) + jump->flags |= IS_MOVABLE; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS)); + } else if (type >= SLJIT_SIG_LESS && ((src1 == SLJIT_IMM && src1w == 0) || (src2 == SLJIT_IMM && src2w == 0))) { + inst = NOP; + if (src1 == SLJIT_IMM && src1w == 0) { + RESOLVE_IMM2(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + } + src1 = src2; + } + else { + RESOLVE_IMM1(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + } + } + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | BRANCH_LENGTH, UNMOVABLE_INS)); + } + else { + if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { + RESOLVE_IMM1(); + if (src2 == SLJIT_IMM && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + else { + RESOLVE_IMM2(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + else { + RESOLVE_IMM2(); + if (src1 == SLJIT_IMM && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + else { + RESOLVE_IMM1(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + + jump->flags |= IS_BIT26_COND; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS)); + } + + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + return jump; +} + +#undef RESOLVE_IMM1 +#undef RESOLVE_IMM2 + +#undef BRANCH_LENGTH +#undef BR_Z +#undef BR_NZ +#undef BR_T +#undef BR_F + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); + jump->u.target = (sljit_uw)srcw; + + if (compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + src = PIC_ADDR_REG; + } else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + } + + if (type <= SLJIT_JUMP) + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JALR | S(src) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + if (jump != NULL) { + jump->addr = compiler->size; + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + } + + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_ar, dst_ar, invert; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_ar = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_ar = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + invert ^= 0x1; + break; + } + } else { + invert = 0; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + invert = 1; + break; + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar)); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar)); + FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_ar, dst, dstw); + + if (src_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar); + return SLJIT_SUCCESS; + } + + /* OTHER_FLAG cannot be specified as src2 argument at the moment. */ + if (DR(TMP_REG2) != src_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0); +} + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + +static sljit_ins get_select_cc(sljit_s32 type, sljit_s32 is_float) +{ + switch (type & ~SLJIT_32) { + case SLJIT_EQUAL: + return (is_float ? MOVZ_S : MOVZ) | TA(EQUAL_FLAG); + case SLJIT_NOT_EQUAL: + return (is_float ? MOVN_S : MOVN) | TA(EQUAL_FLAG); + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + return (is_float ? MOVN_S : MOVN) | TA(OTHER_FLAG); + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG); + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: + return is_float ? MOVT_S : MOVT; + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + return is_float ? MOVF_S : MOVF; + default: + SLJIT_UNREACHABLE(); + return (is_float ? MOVZ_S : MOVZ) | TA(OTHER_FLAG); + } +} + +#endif /* SLJIT_MIPS_REV >= 1 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_ins mov_ins = (type & SLJIT_32) ? ADDU : DADDU; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_ins mov_ins = ADDU; +#endif /* SLJIT_CONFIG_MIPS_64 */ + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + struct sljit_label *label; + struct sljit_jump *jump; +#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + ADJUST_LOCAL_OFFSET(src1, src1w); + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w)); + src1 = TMP_REG1; + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1 = TMP_REG1; + } + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); + } + + return push_inst(compiler, get_select_cc(type, 0) | S(src1) | D(dst_reg), DR(dst_reg)); + +#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG1; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } + + FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg))); + } + } + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(dst_reg), src1, src1w)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(load_immediate(compiler, DR(dst_reg), src1w)); + } else + FAIL_IF(push_inst(compiler, mov_ins | S(src1) | TA(0) | D(dst_reg), DR(dst_reg))); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + struct sljit_label *label; + struct sljit_jump *jump; +#endif /* !(SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src2_freg) | FD(dst_freg), MOVABLE_INS)); + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG2), src1, src1w)); + src1 = TMP_FREG2; + } + + return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS); + +#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(dst_freg), src1, src1w)); + else + FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src1) | FD(dst_freg), MOVABLE_INS)); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ +} + +#undef FLOAT_DATA +#undef FMT + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s16 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG1) | T(arg & REG_MASK) | D(TMP_REG1), DR(TMP_REG1))); + } else + FAIL_IF(push_inst(compiler, ADDU_W | S(arg & REG_MASK) | T(OFFS_REG(arg)) | D(TMP_REG1), DR(TMP_REG1))); + + *mem = TMP_REG1; + *memw = 0; + + return SLJIT_SUCCESS; + } + + if (argw <= max_offset && argw >= SIMM_MIN) { + *mem = arg & REG_MASK; + return SLJIT_SUCCESS; + } + + *mem = TMP_REG1; + + if ((sljit_s16)argw > max_offset) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), argw)); + *memw = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), TO_ARGW_HI(argw))); + *memw = (sljit_s16)argw; + } + + if ((arg & REG_MASK) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDU_W | S(TMP_REG1) | T(arg & REG_MASK) | D(TMP_REG1), DR(TMP_REG1)); +} + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define IMM_LEFT(memw) IMM((memw) + SSIZE_OF(sw) - 1) +#define IMM_RIGHT(memw) IMM(memw) +#define IMM_32_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_32_RIGHT(memw) IMM(memw) +#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_F64_FIRST_RIGHT(memw) IMM(memw) +#define IMM_F64_SECOND_LEFT(memw) IMM((memw) + SSIZE_OF(f64) - 1) +#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32)) +#define IMM_16_FIRST(memw) IMM((memw) + 1) +#define IMM_16_SECOND(memw) IMM(memw) +#else /* !SLJIT_LITTLE_ENDIAN */ +#define IMM_LEFT(memw) IMM(memw) +#define IMM_RIGHT(memw) IMM((memw) + SSIZE_OF(sw) - 1) +#define IMM_32_LEFT(memw) IMM(memw) +#define IMM_32_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32)) +#define IMM_F64_FIRST_RIGHT(memw) IMM((memw) + SSIZE_OF(f64) - 1) +#define IMM_F64_SECOND_LEFT(memw) IMM(memw) +#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1) +#define IMM_16_FIRST(memw) IMM(memw) +#define IMM_16_SECOND(memw) IMM((memw) + 1) +#endif /* SLJIT_LITTLE_ENDIAN */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16)) +#else /* !SLJIT_CONFIG_MIPS_32 */ +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) +#endif /* SLJIT_CONFIG_MIPS_32 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 op = type & 0xff; + sljit_s32 flags = 0; + sljit_ins ins; +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + sljit_ins ins_right; +#endif /* !(SLJIT_MIPS_REV >= 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (reg & REG_PAIR_MASK) { + ADJUST_LOCAL_OFFSET(mem, memw); + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + if (MEM_CHECK_UNALIGNED(type)) { + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - (2 * SSIZE_OF(sw) - 1))); + + if (!(type & SLJIT_MEM_STORE) && (mem == REG_PAIR_FIRST(reg) || mem == REG_PAIR_SECOND(reg))) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + ins = ((type & SLJIT_MEM_STORE) ? SWL : LWL) | S(mem); + ins_right = ((type & SLJIT_MEM_STORE) ? SWR : LWR) | S(mem); +#else /* !SLJIT_CONFIG_MIPS_32 */ + ins = ((type & SLJIT_MEM_STORE) ? SDL : LDL) | S(mem); + ins_right = ((type & SLJIT_MEM_STORE) ? SDR : LDR) | S(mem); +#endif /* SLJIT_CONFIG_MIPS_32 */ + + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM_LEFT(memw), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM_RIGHT(memw), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM_LEFT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); + return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM_RIGHT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))); + } +#endif /* !(SLJIT_MIPS_REV >= 6) */ + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - SSIZE_OF(sw))); + + ins = ((type & SLJIT_MEM_STORE) ? STORE_W : LOAD_W) | S(mem); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); + return push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg))); + } + + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg)))); + return push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))); + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); +#else /* !(SLJIT_MIPS_REV >= 6) */ + ADJUST_LOCAL_OFFSET(mem, memw); + + switch (op) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + flags = BYTE_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + if (op == SLJIT_MOV_S8) + flags |= SIGNED_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 1)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), MOVABLE_INS)); + return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), MOVABLE_INS); + } + + flags = BYTE_DATA | LOAD_DATA; + + if (op == SLJIT_MOV_S16) + flags |= SIGNED_DATA; + + FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), DR(reg))); + FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg)); + + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (type & SLJIT_MEM_ALIGNED_32) { + flags = WORD_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + } +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(reg) | IMM_RIGHT(memw), MOVABLE_INS); + } + + if (mem == reg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM_LEFT(memw), DR(reg))); + return push_inst(compiler, LDR | S(mem) | T(reg) | IMM_RIGHT(memw), DR(reg)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 3)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM_32_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), MOVABLE_INS); + } + + if (mem == reg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM_32_LEFT(memw), DR(reg))); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg))); + + if (op != SLJIT_MOV_U32) + return SLJIT_SUCCESS; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11), DR(reg)); +#else /* SLJIT_MIPS_REV < 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg))); + return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)); +#endif /* SLJIT_MIPS_REV >= 2 */ +#endif /* SLJIT_CONFIG_MIPS_32 */ +#endif /* SLJIT_MIPS_REV >= 6 */ +} + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - (type & SLJIT_32) ? 3 : 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), MOVABLE_INS); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), MOVABLE_INS)); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + FAIL_IF(push_inst(compiler, MFHC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); + break; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg) | (1 << 11), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + break; + } + + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), MOVABLE_INS); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, DMFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), MOVABLE_INS); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), DR(TMP_REG2))); + switch (cpu_feature_list & CPU_FEATURE_FR) { +#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2 + case CPU_FEATURE_FR: + return push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS); +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg) | (1 << 11), MOVABLE_INS)); + break; + } +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1 + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* MIPS III */ + return SLJIT_SUCCESS; +} + +#endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */ + +#undef IMM_16_SECOND +#undef IMM_16_FIRST +#undef IMM_F64_SECOND_RIGHT +#undef IMM_F64_SECOND_LEFT +#undef IMM_F64_FIRST_RIGHT +#undef IMM_F64_FIRST_LEFT +#undef IMM_32_RIGHT +#undef IMM_32_LEFT +#undef IMM_RIGHT +#undef IMM_LEFT +#undef MEM_CHECK_UNALIGNED + +#undef TO_ARGW_HI + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); + + return jump; +} diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c new file mode 100644 index 0000000..2352fad --- /dev/null +++ b/src/sljit/sljitNativePPC_32.c @@ -0,0 +1,485 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 32-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; +} + +/* Simplified mnemonics: clrlwi. */ +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLWINM | S(src) | A(dst) | RLWI_MBE(from, 31)) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + sljit_u32 imm; + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1); + FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2))); + FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1))); + FAIL_IF(push_inst(compiler, CNTLZW | S(dst) | A(dst))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM(-32))); + /* The highest bits are set, if dst < 32, zero otherwise. */ + FAIL_IF(push_inst(compiler, SRWI(27) | S(TMP_REG1) | A(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + imm = compiler->imm; + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + SLJIT_ASSERT(!(flags & ALT_FORM4)); + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + /* Setting XER SO is not enough, CR SO is also needed. */ + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + if (flags & ALT_FORM4) { + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst)); + } + + if (op == SLJIT_MSHL) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + /* Since imm can be 0, SRWI() cannot be used. */ + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31)); + } + + if (op == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11)); + } + + if (op == SLJIT_MASHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (op == SLJIT_ROTR) + imm = (sljit_u32)(-(sljit_s32)imm); + + imm &= 0x1f; + return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31)); + } + + if (op == SLJIT_ROTR) { + FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0)); + src2 = TMP_REG2; + } + + return push_inst(compiler, RLWNM | S(src1) | A(dst) | B(src2) | RLWI_MBE(0, 31)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 invert_sign = 1; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double precision floating point value is constructed: + (2^53 + (src xor (2^31))) + The upper 32 bits of this number is a constant, and the lower 32 bits + is simply the value of the source argument. The xor 2^31 operation adds + 0x80000000 to the source argument, which moves it into the 0 - 0xffffffff + range. Finally we substract 2^53 + 2^31 to get the converted value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double precision floating point value is constructed: + (2^53 + src) + The upper 32 bits of this number is a constant, and the lower 32 bits + is simply the value of the source argument. Finally we substract 2^53 + to get the converted value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + FAIL_IF(push_inst(compiler, STW | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); + + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0])); + if (u.imm[1] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1])); + + /* Saved in the same endianness. */ + FAIL_IF(push_inst(compiler, STW | S(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STW | S(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | A(SLJIT_SP) | (TMP_MEM_OFFSET + sizeof(sljit_s32)))); + return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_s32 reg2 = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (op & SLJIT_32) { + if (op == SLJIT_COPY32_TO_F32) { + FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + + FAIL_IF(push_inst(compiler, STFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + } + + if (op == SLJIT_COPY_TO_F64) { + FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI)); + + if (reg2 != 0) + FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + else + FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + + return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + + FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + + if (reg2 != 0) + FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO)); + + return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c new file mode 100644 index 0000000..b3cf9d0 --- /dev/null +++ b/src/sljit/sljitNativePPC_64.c @@ -0,0 +1,719 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 64-bit arch dependent functions. */ + +#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +#define ASM_SLJIT_CLZ(src, dst) \ + __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements" +#else +#error "Must implement count leading zeroes" +#endif + +/* Computes SLDI(63 - shift). */ +#define PUSH_SLDI_NEG(reg, shift) \ + push_inst(compiler, RLDICR | S(reg) | A(reg) | RLDI_SH(63 - shift) | RLDI_ME(shift)) + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_uw tmp; + sljit_uw shift; + sljit_uw tmp2; + sljit_uw shift2; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (((sljit_uw)imm >> 16) == 0) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + if (((sljit_uw)imm >> 32) == 0) { + FAIL_IF(push_inst(compiler, ORIS | S(TMP_ZERO) | A(reg) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + /* Count leading zeroes. */ + tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); + ASM_SLJIT_CLZ(tmp, shift); + SLJIT_ASSERT(shift > 0); + shift--; + tmp = ((sljit_uw)imm << shift); + + if ((tmp & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + shift += 15; + return PUSH_SLDI_NEG(reg, shift); + } + + if ((tmp & ~0xffffffff00000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); + shift += 31; + return PUSH_SLDI_NEG(reg, shift); + } + + /* Cut out the 16 bit from immediate. */ + shift += 15; + tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1); + + if (tmp2 <= 0xffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2); + } + + if (tmp2 <= 0xffffffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; + } + + ASM_SLJIT_CLZ(tmp2, shift2); + tmp2 <<= shift2; + + if ((tmp2 & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + shift2 += 15; + shift += (63 - shift2); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48))); + return PUSH_SLDI_NEG(reg, shift2); + } + + /* The general version. */ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); + FAIL_IF(PUSH_SLDI_NEG(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); +} + +#undef PUSH_SLDI_NEG + +#define CLRLDI(dst, src, n) \ + (RLDICL | S(src) | A(dst) | RLDI_SH(0) | RLDI_MB(n)) + +/* Sign extension for integer operations. */ +#define UN_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } + +#define BIN_EXTS() \ + if (flags & ALT_SIGN_EXT) { \ + if (flags & REG1_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } \ + if (flags & REG2_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } \ + } + +#define BIN_IMM_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + sljit_u32 imm; + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S32) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 32)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 56)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 48)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(src2) | A(dst)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1); + FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2))); + FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1))); + FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(dst) | A(dst))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM((flags & ALT_FORM1) ? -32 : -64))); + /* The highest bits are set, if dst < bit width, zero otherwise. */ + FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? SRWI(27) : SRDI(58)) | S(TMP_REG1) | A(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, SRDI(32) | S(dst) | A(dst)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + imm = compiler->imm; + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + BIN_IMM_EXTS(); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm)); + else + FAIL_IF(push_inst(compiler, ADD | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, CMPI | A(dst) | 0); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + BIN_EXTS(); + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + if (flags & ALT_SIGN_EXT) { + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2))); + src2 = TMP_REG2; + } + } + + /* Setting XER SO is not enough, CR SO is also needed. */ + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, SRDI(32) | S(dst) | A(dst)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + + case SLJIT_SUBC: + BIN_EXTS(); + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + BIN_EXTS(); + if (flags & ALT_FORM2) + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + if (flags & ALT_FORM4) { + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst)); + } + + imm &= 0x3f; + return push_inst(compiler, SLDI(imm) | RC(flags) | S(src1) | A(dst)); + } + + if (op == SLJIT_MSHL) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + /* Since imm can be 0, SRWI() cannot be used. */ + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31)); + } + + imm &= 0x3f; + /* Since imm can be 0, SRDI() cannot be used. */ + return push_inst(compiler, RLDICL | RC(flags) | S(src1) | A(dst) | RLDI_SH((64 - imm) & 0x3f) | RLDI_MB(imm)); + } + + if (op == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11)); + } + + imm &= 0x3f; + return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | RLDI_SH(imm)); + } + + if (op == SLJIT_MASHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (op == SLJIT_ROTR) + imm = (sljit_u32)(-(sljit_s32)imm); + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31)); + } + + imm &= 0x3f; + return push_inst(compiler, RLDICL | S(src1) | A(dst) | RLDI_SH(imm)); + } + + if (op == SLJIT_ROTR) { + FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0)); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? (RLWNM | RLWI_MBE(0, 31)) : (RLDCL | RLDI_MB(0))) | S(src1) | A(dst) | B(src2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 reg = 0; + + if (src) + reg = *src & REG_MASK; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count && arg_count == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); + *src = TMP_CALL_REG; + } + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count--; + break; + default: + if (arg_count != word_arg_count) + FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); + + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); + FAIL_IF(push_inst(compiler, SLDI(32) | S(reg) | A(reg))); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src == SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + } else + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_u32)srcw)); + src = TMP_REG1; + } else { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, CLRLDI(TMP_REG1, src, 32))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + } else { + if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | 1) | A(src) | 0)); + FAIL_IF(push_inst(compiler, BCx | (12 << 21) | (0 << 16) | 20)); + FAIL_IF(push_inst(compiler, STD | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + FAIL_IF(push_inst(compiler, Bx | ((op & SLJIT_32) ? 36 : 32))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(TMP_REG2) | RLWI_SH(10) | RLWI_MBE(10, 21))); + else + FAIL_IF(push_inst(compiler, ANDI | S(src) | A(TMP_REG2) | 0x1)); + + /* Shift right. */ + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(63) | RLDI_MB(1))); + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, RLDICR | S(TMP_REG1) | A(TMP_REG1) | RLDI_SH(0) | RLDI_ME(53))); + + FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + + FAIL_IF(push_inst(compiler, STD | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + FAIL_IF(push_inst(compiler, FADD | FD(dst_r) | FA(dst_r) | FB(dst_r))); + } + + if (op & SLJIT_32) + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + + FAIL_IF(push_inst(compiler, STD | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) { + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STW : STD) | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, ((op & SLJIT_32) ? LFS : LFD) | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STFS : STFD) | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, ((op & SLJIT_32) ? LWZ : LD) | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + inst[0] = (inst[0] & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000u) | ((sljit_ins)new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c new file mode 100644 index 0000000..1f17d90 --- /dev/null +++ b/src/sljit/sljitNativePPC_common.c @@ -0,0 +1,3161 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "PowerPC" SLJIT_CPUINFO; +} + +/* Length of an instruction word. + Both for ppc-32 and ppc-64. */ +typedef sljit_u32 sljit_ins; + +#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_PPC_STACK_FRAME_V2 1 +#endif + +#ifdef _AIX +#include +#endif + +#if (defined _CALL_ELF && _CALL_ELF == 2) +#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 +#endif + +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) + +static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#ifdef _AIX + _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from)); +#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +# if defined(_ARCH_PWR) || defined(_ARCH_PWR2) + /* Cache flush for POWER architecture. */ + while (from < to) { + __asm__ volatile ( + "clf 0, %0\n" + "dcs\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "ics" ); +# elif defined(_ARCH_COM) && !defined(_ARCH_PPC) +# error "Cache flush is not implemented for PowerPC/POWER common mode." +# else + /* Cache flush for PowerPC architecture. */ + while (from < to) { + __asm__ volatile ( + "dcbf 0, %0\n" + "sync\n" + "icbi 0, %0\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "isync" ); +# endif +# ifdef __xlc__ +# warning "This file may fail to compile if -qfuncsect is used" +# endif +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc" +#else +#error "This platform requires a cache flush implementation." +#endif /* _AIX */ +} + +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 4) + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5) +#else +#define TMP_CALL_REG TMP_REG1 +#endif + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 0, 13 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ +#define D(d) ((sljit_ins)reg_map[d] << 21) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define A(a) ((sljit_ins)reg_map[a] << 16) +#define B(b) ((sljit_ins)reg_map[b] << 11) +#define C(c) ((sljit_ins)reg_map[c] << 6) +#define FD(fd) ((sljit_ins)freg_map[fd] << 21) +#define FS(fs) ((sljit_ins)freg_map[fs] << 21) +#define FA(fa) ((sljit_ins)freg_map[fa] << 16) +#define FB(fb) ((sljit_ins)freg_map[fb] << 11) +#define FC(fc) ((sljit_ins)freg_map[fc] << 6) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define CRD(d) ((sljit_ins)(d) << 21) + +/* Instruction bit sections. + OE and Rc flag (see ALT_SET_FLAGS). */ +#define OE(flags) ((flags) & ALT_SET_FLAGS) +/* Rc flag (see ALT_SET_FLAGS). */ +#define RC(flags) ((sljit_ins)((flags) & ALT_SET_FLAGS) >> 10) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode) << 1) + +#define ADD (HI(31) | LO(266)) +#define ADDC (HI(31) | LO(10)) +#define ADDE (HI(31) | LO(138)) +#define ADDI (HI(14)) +#define ADDIC (HI(13)) +#define ADDIS (HI(15)) +#define ADDME (HI(31) | LO(234)) +#define AND (HI(31) | LO(28)) +#define ANDI (HI(28)) +#define ANDIS (HI(29)) +#define Bx (HI(18)) +#define BCx (HI(16)) +#define BCCTR (HI(19) | LO(528) | (3 << 11)) +#define BLR (HI(19) | LO(16) | (0x14 << 21)) +#if defined(_ARCH_PWR10) && _ARCH_PWR10 +#define BRD (HI(31) | LO(187)) +#endif /* POWER10 */ +#define CNTLZD (HI(31) | LO(58)) +#define CNTLZW (HI(31) | LO(26)) +#define CMP (HI(31) | LO(0)) +#define CMPI (HI(11)) +#define CMPL (HI(31) | LO(32)) +#define CMPLI (HI(10)) +#define CROR (HI(19) | LO(449)) +#define DCBT (HI(31) | LO(278)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) +#define EXTSB (HI(31) | LO(954)) +#define EXTSH (HI(31) | LO(922)) +#define EXTSW (HI(31) | LO(986)) +#define FABS (HI(63) | LO(264)) +#define FADD (HI(63) | LO(21)) +#define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) +#define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) +#define FDIV (HI(63) | LO(18)) +#define FDIVS (HI(59) | LO(18)) +#define FMR (HI(63) | LO(72)) +#define FMUL (HI(63) | LO(25)) +#define FMULS (HI(59) | LO(25)) +#define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) +#define FSUB (HI(63) | LO(20)) +#define FSUBS (HI(59) | LO(20)) +#define LD (HI(58) | 0) +#define LFD (HI(50)) +#define LFS (HI(48)) +#if defined(_ARCH_PWR7) && _ARCH_PWR7 +#define LDBRX (HI(31) | LO(532)) +#endif /* POWER7 */ +#define LHBRX (HI(31) | LO(790)) +#define LWBRX (HI(31) | LO(534)) +#define LWZ (HI(32)) +#define MFCR (HI(31) | LO(19)) +#define MFLR (HI(31) | LO(339) | 0x80000) +#define MFXER (HI(31) | LO(339) | 0x10000) +#define MTCTR (HI(31) | LO(467) | 0x90000) +#define MTLR (HI(31) | LO(467) | 0x80000) +#define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) +#define MULLD (HI(31) | LO(233)) +#define MULLI (HI(7)) +#define MULLW (HI(31) | LO(235)) +#define NEG (HI(31) | LO(104)) +#define NOP (HI(24)) +#define NOR (HI(31) | LO(124)) +#define OR (HI(31) | LO(444)) +#define ORI (HI(24)) +#define ORIS (HI(25)) +#define RLDCL (HI(30) | LO(8)) +#define RLDICL (HI(30) | LO(0 << 1)) +#define RLDICR (HI(30) | LO(1 << 1)) +#define RLDIMI (HI(30) | LO(3 << 1)) +#define RLWIMI (HI(20)) +#define RLWINM (HI(21)) +#define RLWNM (HI(23)) +#define SLD (HI(31) | LO(27)) +#define SLW (HI(31) | LO(24)) +#define SRAD (HI(31) | LO(794)) +#define SRADI (HI(31) | LO(413 << 1)) +#define SRAW (HI(31) | LO(792)) +#define SRAWI (HI(31) | LO(824)) +#define SRD (HI(31) | LO(539)) +#define SRW (HI(31) | LO(536)) +#define STD (HI(62) | 0) +#if defined(_ARCH_PWR7) && _ARCH_PWR7 +#define STDBRX (HI(31) | LO(660)) +#endif /* POWER7 */ +#define STDU (HI(62) | 1) +#define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) +#define STFIWX (HI(31) | LO(983)) +#define STFS (HI(52)) +#define STHBRX (HI(31) | LO(918)) +#define STW (HI(36)) +#define STWBRX (HI(31) | LO(662)) +#define STWU (HI(37)) +#define STWUX (HI(31) | LO(183)) +#define SUBF (HI(31) | LO(40)) +#define SUBFC (HI(31) | LO(8)) +#define SUBFE (HI(31) | LO(136)) +#define SUBFIC (HI(8)) +#define XOR (HI(31) | LO(316)) +#define XORI (HI(26)) +#define XORIS (HI(27)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* Shift helpers. */ +#define RLWI_SH(sh) ((sljit_ins)(sh) << 11) +#define RLWI_MBE(mb, me) (((sljit_ins)(mb) << 6) | ((sljit_ins)(me) << 1)) +#define RLDI_SH(sh) ((((sljit_ins)(sh) & 0x1f) << 11) | (((sljit_ins)(sh) & 0x20) >> 4)) +#define RLDI_MB(mb) ((((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20)) +#define RLDI_ME(me) RLDI_MB(me) + +#define SLWI(shift) (RLWINM | RLWI_SH(shift) | RLWI_MBE(0, 31 - (shift))) +#define SLDI(shift) (RLDICR | RLDI_SH(shift) | RLDI_ME(63 - (shift))) +/* shift > 0 */ +#define SRWI(shift) (RLWINM | RLWI_SH(32 - (shift)) | RLWI_MBE((shift), 31)) +#define SRDI(shift) (RLDICL | RLDI_SH(64 - (shift)) | RLDI_MB(shift)) + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define SLWI_W(shift) SLWI(shift) +#define TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) +#else /* !SLJIT_CONFIG_PPC_32 */ +#define SLWI_W(shift) SLDI(shift) +#define TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_32 */ + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET) +#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32)) +#define LWBRX_FIRST_REG S(TMP_REG1) +#define LWBRX_SECOND_REG S(dst) +#else /* !SLJIT_LITTLE_ENDIAN */ +#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET + sizeof(sljit_s32)) +#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET) +#define LWBRX_FIRST_REG S(dst) +#define LWBRX_SECOND_REG S(TMP_REG1) +#endif /* SLJIT_LITTLE_ENDIAN */ + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func) +{ + sljit_uw* ptrs; + + if (func_ptr) + *func_ptr = (void*)context; + + ptrs = (sljit_uw*)func; + context->addr = addr ? addr : ptrs[0]; + context->r2 = ptrs[1]; + context->r11 = ptrs[2]; +} +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + goto exit; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset; + + if (jump->flags & IS_COND) { + if (diff <= 0x7fff && diff >= -0x8000) { + jump->flags |= PATCH_B; + return code_ptr; + } + if (target_addr <= 0xffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; + return code_ptr; + } + + diff -= SSIZE_OF(ins); + } + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B; + } else if (target_addr <= 0x01ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; + } + + if (jump->flags & PATCH_B) { + if (!(jump->flags & IS_COND)) + return code_ptr; + + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr[1] = Bx; + jump->addr += sizeof(sljit_ins); + jump->flags -= IS_COND; + return code_ptr + 1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +keep_address: +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + if (target_addr < 0x80000000l) { + jump->flags |= PATCH_ABS32; + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; + } + + if (target_addr < 0x800000000000l) { + jump->flags |= PATCH_ABS48; + code_ptr[4] = MTCTR | S(TMP_CALL_REG); + code_ptr[5] = code_ptr[0]; + return code_ptr + 5; + } +#endif /* SLJIT_CONFIG_PPC_64 */ + +exit: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[2] = MTCTR | S(TMP_CALL_REG); + code_ptr[3] = code_ptr[0]; +#else /* !SLJIT_CONFIG_PPC_32 */ + code_ptr[5] = MTCTR | S(TMP_CALL_REG); + code_ptr[6] = code_ptr[0]; +#endif /* SLJIT_CONFIG_PPC_32 */ + return code_ptr + JUMP_MAX_SIZE - 1; +} + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)5 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (addr < 0x80000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; + return 1; + } + + if (addr < 0x800000000000l) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS48; + return 3; + } + + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + return 4; +} + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_s32 reg; + SLJIT_UNUSED_ARG(executable_offset); + + if (flags & PATCH_B) { + if (flags & IS_COND) { + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | (ins[0] & 0x03ff0001); + } else { + SLJIT_ASSERT(addr <= 0xffff); + ins[0] = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*ins) & 0x03ff0001); + } + return; + } + + if (!(flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | (ins[0] & 0x1); + } else { + SLJIT_ASSERT(addr <= 0x03ffffff); + ins[0] = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | (ins[0] & 0x1); + } + return; + } + + reg = (flags & JUMP_MOV_ADDR) ? (sljit_s32)ins[0] : TMP_CALL_REG; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); +#else /* !SLJIT_CONFIG_PPC_32 */ + + /* The TMP_ZERO cannot be used because it is restored for tail calls. */ + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr < 0x80000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 16); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr); + return; + } + + if (flags & PATCH_ABS48) { + SLJIT_ASSERT(addr < 0x800000000000l); + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 32); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 16); + ins[2] = SLDI(16) | S(reg) | A(reg); + ins[3] = ORI | S(reg) | A(reg) | IMM(addr); + return; + } + + ins[0] = ADDIS | D(reg) | A(0) | IMM(addr >> 48); + ins[1] = ORI | S(reg) | A(reg) | IMM(addr >> 32); + ins[2] = SLDI(32) | S(reg) | A(reg); + ins[3] = ORIS | S(reg) | A(reg) | IMM(addr >> 16); + ins[4] = ORI | S(reg) | A(reg) | IMM(addr); +#endif /* SLJIT_CONFIG_PPC_32 */ +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE - 1; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { + if (jump->u.target <= 0x01ffffff) + total_size = 1 - 1; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + else if (jump->u.target < 0x80000000l) + total_size = 4 - 1; + else if (jump->u.target < 0x800000000000l) + total_size = 6 - 1; +#endif /* SLJIT_CONFIG_PPC_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (jump->flags & IS_COND) { + if (diff <= (0x7fff / SSIZE_OF(ins)) && diff >= (-0x8000 / SSIZE_OF(ins))) + total_size = 1 - 1; + else if ((diff - 1) <= (0x01ffffff / SSIZE_OF(ins)) && (diff - 1) >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 2 - 1; + } else if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins))) + total_size = 1 - 1; + } + } + + size_reduce += (JUMP_MAX_SIZE - 1) - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } else { + total_size = (sljit_uw)4 << JUMP_SIZE_SHIFT; + + if (jump->flags & JUMP_ADDR) { + if (jump->u.target < 0x80000000l) { + total_size = (sljit_uw)1 << JUMP_SIZE_SHIFT; + size_reduce += 3; + } else if (jump->u.target < 0x800000000000l) { + total_size = (sljit_uw)3 << JUMP_SIZE_SHIFT; + size_reduce += 1; + } + } + jump->flags |= total_size; +#endif /* SLJIT_CONFIG_PPC_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + /* add to compiler->size additional instruction space to hold the trampoline and padding */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#else + compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#endif +#endif + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + /* Just recording the address. */ + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count += jump->flags >> JUMP_SIZE_SHIFT; + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + SLJIT_ASSERT(((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { + jump->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + word_count += jump->flags >> JUMP_SIZE_SHIFT; + code_ptr += mov_addr_get_length(jump, code, executable_offset); +#else /* !SLJIT_CONFIG_PPC_64 */ + word_count++; + code_ptr++; +#endif /* SLJIT_CONFIG_PPC_64 */ + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); +#else + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); +#endif + + jump = compiler->jumps; + while (jump) { + generate_jump_or_mov_addr(jump, executable_offset); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (((sljit_sw)code_ptr) & 0x4) + code_ptr++; +#endif + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_uw)code, (void*)sljit_generate_code); +#endif + + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins) + sizeof(struct sljit_function_context); + return code_ptr; +#else + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + return code; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + /* Available by default. */ + return 1; +#endif + case SLJIT_HAS_REV: +#if defined(_ARCH_PWR10) && _ARCH_PWR10 + return 1; +#else /* !POWER10 */ + return 2; +#endif /* POWER10 */ + /* A saved register is set to a zero value. */ + case SLJIT_HAS_ZERO_REGISTER: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_CTZ: + return 2; + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + return 1; + } + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* inp_flags: */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define INDEXED 0x02 +#define SIGNED_DATA 0x04 + +#define WORD_DATA 0x00 +#define BYTE_DATA 0x08 +#define HALF_DATA 0x10 +#define INT_DATA 0x18 +/* Separates integer and floating point registers */ +#define GPR_REG 0x1f +#define DOUBLE_DATA 0x20 + +#define MEM_MASK 0x7f + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) + +/* Other inp_flags. */ + +/* Integer opertion and set flags -> requires exts on 64 bit systems. */ +#define ALT_SIGN_EXT 0x000100 +/* This flag affects the RC() and OERC() macros. */ +#define ALT_SET_FLAGS 0x000400 +#define ALT_FORM1 0x001000 +#define ALT_FORM2 0x002000 +#define ALT_FORM3 0x004000 +#define ALT_FORM4 0x008000 +#define ALT_FORM5 0x010000 + +/* Source and destination is register. */ +#define REG_DEST 0x000001 +#define REG1_SOURCE 0x000002 +#define REG2_SOURCE 0x000004 +/* +ALT_SIGN_EXT 0x000100 +ALT_SET_FLAGS 0x000200 +ALT_FORM1 0x001000 +... +ALT_FORM5 0x010000 */ + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#include "sljitNativePPC_32.c" +#else +#include "sljitNativePPC_64.c" +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define STACK_STORE STW +#define STACK_LOAD LWZ +#else +#define STACK_STORE STD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) +#define LR_SAVE_OFFSET (2 * SSIZE_OF(sw)) +#else +#define LR_SAVE_OFFSET SSIZE_OF(sw) +#endif + +#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, base, offset; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 arg_count = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, MFLR | D(0))); + + base = SLJIT_SP; + offset = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#else + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#endif + } else { + base = TMP_REG1; + FAIL_IF(push_inst(compiler, OR | S(SLJIT_SP) | A(TMP_REG1) | B(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, TMP_REG2, -local_size)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#else + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#endif + local_size = 0; + offset = 0; + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + if (!(options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); + } + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } + + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + do { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (arg_count != word_arg_count) + tmp = SLJIT_R0 + word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count))); + } while (0); +#else + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } +#endif + word_arg_count++; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + arg_count++; +#endif + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, base, offset; + sljit_s32 local_size = compiler->local_size; + + SLJIT_ASSERT(TMP_CALL_REG != TMP_REG2); + + base = SLJIT_SP; + if (local_size > STACK_MAX_DISTANCE) { + base = TMP_REG2; + if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + local_size = STACK_MAX_DISTANCE; + } + } + + offset = local_size; + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET))); + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); + } + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + if (!is_return_to) + push_inst(compiler, MTLR | S(0)); + + if (local_size > 0) + return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); + + SLJIT_ASSERT(base == TMP_REG2); + return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); +} + +#undef STACK_STORE +#undef STACK_LOAD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, BLR); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src = TMP_CALL_REG; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* s/l - store/load (1 bit) + i/x - immediate/indexed form + u/s - signed/unsigned (1 bit) + w/b/h/i - word/byte/half/int allowed (2 bit) + + Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */ + +/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define INT_ALIGNED 0x10000 +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define ARCH_32_64(a, b) a +#define INST_CODE_AND_DST(inst, flags, reg) \ + ((sljit_ins)(inst) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#else +#define ARCH_32_64(a, b) b +#define INST_CODE_AND_DST(inst, flags, reg) \ + (((sljit_ins)(inst) & ~(sljit_ins)INT_ALIGNED) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#endif + +static const sljit_ins data_transfer_insts[64 + 16] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* Byte. */ + +/* b u i s */ HI(38) /* stb */, +/* b u i l */ HI(34) /* lbz */, +/* b u x s */ HI(31) | LO(215) /* stbx */, +/* b u x l */ HI(31) | LO(87) /* lbzx */, + +/* b s i s */ HI(38) /* stb */, +/* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */, +/* b s x s */ HI(31) | LO(215) /* stbx */, +/* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, + +/* Half. */ + +/* h u i s */ HI(44) /* sth */, +/* h u i l */ HI(40) /* lhz */, +/* h u x s */ HI(31) | LO(407) /* sthx */, +/* h u x l */ HI(31) | LO(279) /* lhzx */, + +/* h s i s */ HI(44) /* sth */, +/* h s i l */ HI(42) /* lha */, +/* h s x s */ HI(31) | LO(407) /* sthx */, +/* h s x l */ HI(31) | LO(343) /* lhax */, + +/* Int. */ + +/* i u i s */ HI(36) /* stw */, +/* i u i l */ HI(32) /* lwz */, +/* i u x s */ HI(31) | LO(151) /* stwx */, +/* i u x l */ HI(31) | LO(23) /* lwzx */, + +/* i s i s */ HI(36) /* stw */, +/* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */), +/* i s x s */ HI(31) | LO(151) /* stwx */, +/* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(54) /* stfd */, +/* d i l */ HI(50) /* lfd */, +/* d x s */ HI(31) | LO(727) /* stfdx */, +/* d x l */ HI(31) | LO(599) /* lfdx */, + +/* s i s */ HI(52) /* stfs */, +/* s i l */ HI(48) /* lfs */, +/* s x s */ HI(31) | LO(663) /* stfsx */, +/* s x l */ HI(31) | LO(535) /* lfsx */, +}; + +static const sljit_ins updated_data_transfer_insts[64] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* Byte. */ + +/* b u i s */ HI(39) /* stbu */, +/* b u i l */ HI(35) /* lbzu */, +/* b u x s */ HI(31) | LO(247) /* stbux */, +/* b u x l */ HI(31) | LO(119) /* lbzux */, + +/* b s i s */ HI(39) /* stbu */, +/* b s i l */ 0 /* no such instruction */, +/* b s x s */ HI(31) | LO(247) /* stbux */, +/* b s x l */ 0 /* no such instruction */, + +/* Half. */ + +/* h u i s */ HI(45) /* sthu */, +/* h u i l */ HI(41) /* lhzu */, +/* h u x s */ HI(31) | LO(439) /* sthux */, +/* h u x l */ HI(31) | LO(311) /* lhzux */, + +/* h s i s */ HI(45) /* sthu */, +/* h s i l */ HI(43) /* lhau */, +/* h s x s */ HI(31) | LO(439) /* sthux */, +/* h s x l */ HI(31) | LO(375) /* lhaux */, + +/* Int. */ + +/* i u i s */ HI(37) /* stwu */, +/* i u i l */ HI(33) /* lwzu */, +/* i u x s */ HI(31) | LO(183) /* stwux */, +/* i u x l */ HI(31) | LO(55) /* lwzux */, + +/* i s i s */ HI(37) /* stwu */, +/* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */), +/* i s x s */ HI(31) | LO(183) /* stwux */, +/* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(55) /* stfdu */, +/* d i l */ HI(51) /* lfdu */, +/* d x s */ HI(31) | LO(759) /* stfdux */, +/* d x l */ HI(31) | LO(631) /* lfdux */, + +/* s i s */ HI(53) /* stfsu */, +/* s i l */ HI(49) /* lfsu */, +/* s x s */ HI(31) | LO(695) /* stfsux */, +/* s x l */ HI(31) | LO(567) /* lfsux */, +}; + +#undef ARCH_32_64 + +/* Simple cases, (no caching is required). */ +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_ins inst; + sljit_s32 offs_reg; + + /* Should work when (arg & REG_MASK) == 0. */ + SLJIT_ASSERT(A(0) == 0); + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + offs_reg = OFFS_REG(arg); + + if (argw != 0) { + FAIL_IF(push_inst(compiler, SLWI_W(argw) | S(OFFS_REG(arg)) | A(tmp_reg))); + offs_reg = tmp_reg; + } + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(!(inst & INT_ALIGNED)); +#endif /* SLJIT_CONFIG_PPC_64 */ + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg)); + } + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + arg &= REG_MASK; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (argw <= 0x7fff7fffl && argw >= -0x80000000l) { +#endif /* SLJIT_CONFIG_PPC_64 */ + FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM((argw + 0x8000) >> 16))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw)); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); +#endif /* SLJIT_CONFIG_PPC_64 */ +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + result goes to TMP_REG2, so put result can use TMP_REG1. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_s32 src2_r; + sljit_s32 src2_tmp_reg = (!(input_flags & ALT_SIGN_EXT) && GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; + sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); + + /* Destination check. */ + if (FAST_IS_REG(dst)) { + dst_r = dst; + /* The REG_DEST is only used by SLJIT_MOV operations, although + * it is set for op2 operations with unset destination. */ + flags |= REG_DEST; + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + src2_tmp_reg = dst_r; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + dst_r = src2_r; + } else if (src2 == SLJIT_IMM) { + src2_r = TMP_ZERO; + if (src2w != 0) { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w)); + src2_r = src2_tmp_reg; + } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, src2_tmp_reg, src2, src2w, TMP_REG1)); + src2_r = src2_tmp_reg; + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 == SLJIT_IMM) { + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + } else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_r = TMP_REG1; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 int_op = op & SLJIT_32; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#endif + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#endif + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#else + return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#endif + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 mem, offs_reg, inp_flags; + sljit_sw memw; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 is_32 = op & SLJIT_32; + + op = GET_OPCODE(op); +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (!((dst | src) & SLJIT_MEM)) { + /* Both are registers. */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (src == dst) { + FAIL_IF(push_inst(compiler, RLWIMI | S(dst) | A(dst) | RLWI_SH(16) | RLWI_MBE(8, 15))); + FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | RLWI_SH(24) | RLWI_MBE(16, 31))); + } else { + FAIL_IF(push_inst(compiler, RLWINM | S(src) | A(dst) | RLWI_SH(8) | RLWI_MBE(16, 23))); + FAIL_IF(push_inst(compiler, RLWIMI | S(src) | A(dst) | RLWI_SH(24) | RLWI_MBE(24, 31))); + } + + if (op == SLJIT_REV_U16) + return SLJIT_SUCCESS; + return push_inst(compiler, EXTSH | S(dst) | A(dst)); + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!is_32) { +#if defined(_ARCH_PWR10) && _ARCH_PWR10 + return push_inst(compiler, BRD | S(src) | A(dst)); +#else /* !POWER10 */ + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_HI))); + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); + FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LO))); + FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(SLJIT_SP) | B(TMP_REG2))); + return push_inst(compiler, LD | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET); +#endif /* POWER10 */ + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET))); + FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWZ | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_REV_S32) + return push_inst(compiler, EXTSW | S(dst) | A(dst)); +#endif /* SLJIT_CONFIG_PPC_64 */ + return SLJIT_SUCCESS; + } + + mem = src; + memw = srcw; + + if (dst & SLJIT_MEM) { + mem = dst; + memw = dstw; + + if (src & SLJIT_MEM) { + inp_flags = HALF_DATA | LOAD_DATA; + + if (op != SLJIT_REV_U16 && op != SLJIT_REV_S16) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + inp_flags = (is_32 ? INT_DATA : WORD_DATA) | LOAD_DATA; +#else /* !SLJIT_CONFIG_PPC_64 */ + inp_flags = WORD_DATA | LOAD_DATA; +#endif /* SLJIT_CONFIG_PPC_64 */ + } + + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src, srcw, TMP_REG2)); + src = TMP_REG1; + } + } + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + offs_reg = OFFS_REG(mem); + mem &= REG_MASK; + memw &= 0x3; + + if (memw != 0) { + FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(offs_reg) | A(TMP_REG2))); + offs_reg = TMP_REG2; + } +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } else if (memw > 0x7fff7fffl || memw < -0x80000000l) { + FAIL_IF(load_immediate(compiler, TMP_REG2, memw)); + offs_reg = TMP_REG2; + mem &= REG_MASK; +#endif /* SLJIT_CONFIG_PPC_64 */ + } else { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(mem & REG_MASK) | IMM(memw))); + if (memw > SIMM_MAX || memw < SIMM_MIN) + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(TMP_REG2) | IMM((memw + 0x8000) >> 16))); + + mem = 0; + offs_reg = TMP_REG2; + } + + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { + if (dst & SLJIT_MEM) + return push_inst(compiler, STHBRX | S(src) | A(mem) | B(offs_reg)); + + FAIL_IF(push_inst(compiler, LHBRX | S(dst) | A(mem) | B(offs_reg))); + + if (op == SLJIT_REV_U16) + return SLJIT_SUCCESS; + return push_inst(compiler, EXTSH | S(dst) | A(dst)); + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!is_32) { + if (dst & SLJIT_MEM) { +#if defined(_ARCH_PWR7) && _ARCH_PWR7 + return push_inst(compiler, STDBRX | S(src) | A(mem) | B(offs_reg)); +#else /* !POWER7 */ +#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); + FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(offs_reg))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); + return push_inst(compiler, STWBRX | S(src) | A(mem) | B(TMP_REG2)); +#else /* !SLJIT_LITTLE_ENDIAN */ + FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg))); + FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); + return push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2)); +#endif /* SLJIT_LITTLE_ENDIAN */ +#endif /* POWER7 */ + } +#if defined(_ARCH_PWR7) && _ARCH_PWR7 + return push_inst(compiler, LDBRX | S(dst) | A(mem) | B(offs_reg)); +#else /* !POWER7 */ + FAIL_IF(push_inst(compiler, LWBRX | LWBRX_FIRST_REG | A(mem) | B(offs_reg))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32)))); + FAIL_IF(push_inst(compiler, LWBRX | LWBRX_SECOND_REG | A(mem) | B(TMP_REG2))); + return push_inst(compiler, RLDIMI | S(TMP_REG1) | A(dst) | RLDI_SH(32) | RLDI_MB(0)); +#endif /* POWER7 */ + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (dst & SLJIT_MEM) + return push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg)); + + FAIL_IF(push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_REV_S32) + return push_inst(compiler, EXTSW | S(dst) | A(dst)); +#endif /* SLJIT_CONFIG_PPC_64 */ + return SLJIT_SUCCESS; +} + +#define EMIT_MOV(type, type_flags, type_cast) \ + emit_op(compiler, (src == SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? type_cast srcw : srcw) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + + if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + if (op <= SLJIT_MOV_P && FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op_flags & SLJIT_32) { + if (op <= SLJIT_MOV_P) { + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src == SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } + } + else { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (HAS_FLAGS(op_flags)) + flags |= ALT_SIGN_EXT; + } + } +#endif + + switch (op) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); + + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); +#endif + + case SLJIT_MOV_U8: + return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8)); + + case SLJIT_MOV_S8: + return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8)); + + case SLJIT_MOV_U16: + return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16)); + + case SLJIT_MOV_S16: + return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16)); + + case SLJIT_CLZ: + case SLJIT_CTZ: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op_flags & SLJIT_32) + flags |= ALT_FORM1; +#endif /* SLJIT_CONFIG_PPC_64 */ + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_REV_U32: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op |= SLJIT_32; +#endif /* SLJIT_CONFIG_PPC_64 */ + /* fallthrough */ + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op |= (op_flags & SLJIT_32); +#endif /* SLJIT_CONFIG_PPC_64 */ + return emit_rev(compiler, op, dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; +} + +#undef EMIT_MOV + +/* Macros for checking different operand types / values. */ +#define TEST_SL_IMM(src, srcw) \ + ((src) == SLJIT_IMM && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) +#define TEST_UL_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & ~0xffff)) +#define TEST_UH_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & ~(sljit_sw)0xffff0000)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) +#define TEST_ADD_IMM(src, srcw) \ + ((src) == SLJIT_IMM && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) +#define TEST_UI_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & ~0xffffffff)) + +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY)) +#define TEST_SUB_FORM2(op) \ + ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z)) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) + +#else /* !SLJIT_CONFIG_PPC_64 */ +#define TEST_SH_IMM(src, srcw) \ + ((src) == SLJIT_IMM && !((srcw) & 0xffff)) +#define TEST_ADD_IMM(src, srcw) \ + ((src) == SLJIT_IMM) +#define TEST_UI_IMM(src, srcw) \ + ((src) == SLJIT_IMM) + +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#define TEST_SUB_FORM2(op) \ + (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#endif /* SLJIT_CONFIG_PPC_64 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (src1 == SLJIT_IMM) + src1w = (sljit_s32)(src1w); + if (src2 == SLJIT_IMM) + src2w = (sljit_s32)(src2w); + if (HAS_FLAGS(op)) + flags |= ALT_SIGN_EXT; + } +#endif + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + + if (TEST_ADD_FORM1(op)) + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + + if (!HAS_FLAGS(op) && (src1 == SLJIT_IMM || src2 == SLJIT_IMM)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_ADD_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } +#endif + if (HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == SLJIT_CARRY) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { + if (dst == TMP_REG1) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + } + + if (src2 == SLJIT_IMM && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + } + + if (dst == TMP_REG1 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM2(op)) { + if (src2 == SLJIT_IMM && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM3(op)) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)(-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); + } + + if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + + if (!HAS_FLAGS(op)) { + if (TEST_SH_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)((-src2w) >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)-src2w; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + + /* We know ALT_SIGN_EXT is set if it is an SLJIT_32 on 64 bit systems. */ + return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == SLJIT_CARRY) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) + flags |= ALT_FORM2; +#endif + if (!HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + else + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_XOR: + if (src2 == SLJIT_IMM && src2w == -1) { + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src1, src1w); + } + if (src1 == SLJIT_IMM && src1w == -1) { + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src2, src2w); + } + /* fallthrough */ + case SLJIT_AND: + case SLJIT_OR: + /* Commutative unsigned operations. */ + if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!HAS_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, the and resets unwanted bits as well. */ + if (TEST_UI_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UI_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) + flags |= ALT_FORM2; +#endif + if (src2 == SLJIT_IMM) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +#undef TEST_ADD_FORM1 +#undef TEST_SUB_FORM2 +#undef TEST_SUB_FORM3 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | D(dst_reg) | A(dst_reg) | B(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_right; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_PPC_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_PPC_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!(op & SLJIT_32)) { + if (is_right) { + FAIL_IF(push_inst(compiler, SRDI(src3w) | S(src1_reg) | A(dst_reg))); + return push_inst(compiler, RLDIMI | S(src2_reg) | A(dst_reg) | RLDI_SH(64 - src3w) | RLDI_MB(0)); + } + + FAIL_IF(push_inst(compiler, SLDI(src3w) | S(src1_reg) | A(dst_reg))); + /* Computes SRDI(64 - src2w). */ + FAIL_IF(push_inst(compiler, RLDICL | S(src2_reg) | A(TMP_REG1) | RLDI_SH(src3w) | RLDI_MB(64 - src3w))); + return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (is_right) { + FAIL_IF(push_inst(compiler, SRWI(src3w) | S(src1_reg) | A(dst_reg))); + return push_inst(compiler, RLWIMI | S(src2_reg) | A(dst_reg) | RLWI_SH(32 - src3w) | RLWI_MBE(0, src3w - 1)); + } + + FAIL_IF(push_inst(compiler, SLWI(src3w) | S(src1_reg) | A(dst_reg))); + return push_inst(compiler, RLWIMI | S(src2_reg) | A(dst_reg) | RLWI_SH(src3w) | RLWI_MBE(32 - src3w, 31)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w, TMP_REG2)); + src3 = TMP_REG2; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!(op & SLJIT_32)) { + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR || dst_reg == src3) { + FAIL_IF(push_inst(compiler, ANDI | S(src3) | A(TMP_REG2) | 0x3f)); + src3 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src1_reg) | A(dst_reg) | B(src3))); + FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src2_reg) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src3) | A(TMP_REG2) | 0x3f)); + FAIL_IF(push_inst(compiler, (is_right ? SLD : SRD) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR || dst_reg == src3) { + FAIL_IF(push_inst(compiler, ANDI | S(src3) | A(TMP_REG2) | 0x1f)); + src3 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src1_reg) | A(dst_reg) | B(src3))); + FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src2_reg) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src3) | A(TMP_REG2) | 0x1f)); + FAIL_IF(push_inst(compiler, (is_right ? SLW : SRW) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + return push_inst(compiler, OR | S(dst_reg) | A(dst_reg) | B(TMP_REG1)); +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw == 0 && (src & REG_MASK)) + return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + /* Works with SLJIT_MEM0() case as well. */ + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); + } + + srcw &= 0x3; + + if (srcw == 0) + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); + + FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1))); + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTLR | S(src))); + else { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2)); + FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); + } + + return push_inst(compiler, BLR); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFLR | D(dst)); + + FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG1))); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + LR_SAVE_OFFSET, TMP_REG2)); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double)) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + src = TMP_FREG1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); + + if (op == SLJIT_CONV_SW_FROM_F64) { + if (FAST_IS_REG(dst)) { + FAIL_IF(push_inst(compiler, STFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LD | S(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1); + } +#else /* !SLJIT_CONFIG_PPC_64 */ + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return push_inst(compiler, LWZ | S(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { + FAIL_IF(push_inst(compiler, SLWI_W(dstw) | S(OFFS_REG(dst)) | A(TMP_REG1))); + dstw = TMP_REG1; + } else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; + } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2))); + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_UNORDERED_OR_EQUAL: + return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_LESS: + return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_GREATER: + return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_CONV_F64_FROM_F32: + op ^= SLJIT_32; + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); + break; + } + /* Fall through. */ + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG1)); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); + break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? STFS : STFD) | FS(src2) | A(SLJIT_SP) | TMP_MEM_OFFSET)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, LWZ | S(TMP_REG1) | A(SLJIT_SP) | ((op & SLJIT_32) ? TMP_MEM_OFFSET : TMP_MEM_OFFSET_HI))); +#else /* !SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? LWZ : LD) | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET)); +#endif /* SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src1))); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(TMP_REG1) | 0)); +#else /* !SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((op & SLJIT_32) ? 0 : 1)) | A(TMP_REG1) | 0)); +#endif /* SLJIT_CONFIG_PPC_32 */ + FAIL_IF(push_inst(compiler, BCx | (4 << 21) | (0 << 16) | 8)); + return push_inst(compiler, FNEG | FD(dst_r) | FB(dst_r)); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1)); + + return SLJIT_SUCCESS; +} + +#undef SELECT_FOP + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm)); + + FAIL_IF(push_inst(compiler, STW | S(u.imm != 0 ? TMP_REG1 : TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET)); + return push_inst(compiler, LFS | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (4 << 21) | (2 << 16); + /* fallthrough */ + + case SLJIT_EQUAL: + return (12 << 21) | (2 << 16); + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (12 << 21) | (2 << 16); + /* fallthrough */ + + case SLJIT_NOT_EQUAL: + return (4 << 21) | (2 << 16); + + case SLJIT_LESS: + case SLJIT_SIG_LESS: + return (12 << 21) | (0 << 16); + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + return (4 << 21) | (0 << 16); + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + return (12 << 21) | (1 << 16); + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + return (4 << 21) | (1 << 16); + + case SLJIT_OVERFLOW: + return (12 << 21) | (3 << 16); + + case SLJIT_NOT_OVERFLOW: + return (4 << 21) | (3 << 16); + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return (12 << 21) | ((4 + 2) << 16); + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return (4 << 21) | ((4 + 2) << 16); + + case SLJIT_UNORDERED: + return (12 << 21) | ((4 + 3) << 16); + + case SLJIT_ORDERED: + return (4 << 21) | ((4 + 3) << 16); + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); + return (20 << 21); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins bo_bi_flags; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); + if (!bo_bi_flags) + return NULL; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if ((type | 0x1) == SLJIT_NOT_CARRY) + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG2) | A(TMP_ZERO) | B(TMP_ZERO))); + + /* In PPC, we don't need to touch the arguments. */ + if (type < SLJIT_JUMP) + jump->flags |= IS_COND; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); +#endif + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src == SLJIT_IMM) { + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = (sljit_uw)srcw; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } else + src_r = src; +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + src_r = src; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + } else { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src_r = TMP_CALL_REG; + } + + FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); + return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src = TMP_CALL_REG; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); +#endif + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 reg, invert; + sljit_u32 bit, from_xer; + sljit_s32 saved_op = op; + sljit_sw saved_dstw = dstw; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 input_flags = ((op & SLJIT_32) || op == SLJIT_MOV32) ? INT_DATA : WORD_DATA; +#else + sljit_s32 input_flags = WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); + + invert = 0; + bit = 0; + from_xer = 0; + + switch (type) { + case SLJIT_LESS: + case SLJIT_SIG_LESS: + break; + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + invert = 1; + break; + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + bit = 1; + break; + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + bit = 1; + invert = 1; + break; + + case SLJIT_EQUAL: + bit = 2; + break; + + case SLJIT_NOT_EQUAL: + bit = 2; + invert = 1; + break; + + case SLJIT_OVERFLOW: + from_xer = 1; + bit = 1; + break; + + case SLJIT_NOT_OVERFLOW: + from_xer = 1; + bit = 1; + invert = 1; + break; + + case SLJIT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0; + break; + + case SLJIT_NOT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; + break; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + bit = 4 + 0; + break; + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + bit = 4 + 0; + invert = 1; + break; + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + bit = 4 + 1; + break; + + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + bit = 4 + 1; + invert = 1; + break; + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + bit = 4 + 2; + break; + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + bit = 4 + 2; + invert = 1; + break; + + case SLJIT_UNORDERED: + bit = 4 + 3; + break; + + case SLJIT_ORDERED: + bit = 4 + 3; + invert = 1; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg))); + /* Simplified mnemonics: extrwi. */ + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | RLWI_SH(1 + bit) | RLWI_MBE(31, 31))); + + if (invert) + FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); + + if (op < SLJIT_ADD) { + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (dst & SLJIT_MEM) + return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0); + return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins *ptr; + sljit_uw size; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#else /* !SLJIT_CONFIG_PPC_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; +#endif /* SLJIT_CONFIG_PPC_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, OR | S(dst_reg) | A(TMP_REG1) | B(dst_reg))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG1; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } + + FAIL_IF(push_inst(compiler, OR | S(src2_reg) | A(dst_reg) | B(src2_reg))); + } + } + + if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY) + FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w, TMP_REG1)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_RISCV_64 */ + FAIL_IF(load_immediate(compiler, dst_reg, src1w)); + } else + FAIL_IF(push_inst(compiler, OR | S(src1) | A(dst_reg) | B(src1))); + + *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins *ptr; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src2_freg))); + } + + if (((type & ~SLJIT_32) | 0x1) == SLJIT_NOT_CARRY) + FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w, TMP_REG1)); + else + FAIL_IF(push_inst(compiler, FMR | FD(dst_freg) | FB(src1))); + + *ptr = BCx | get_bo_bi_flags(compiler, (type ^ 0x1) & ~SLJIT_32) | (sljit_ins)((compiler->size - size) << 2); + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + +#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \ + ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) + +#else /* !SLJIT_CONFIG_PPC_32 */ + +#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \ + ((((inst) & INT_ALIGNED) && ((memw) & 0x3) != 0) \ + || ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) \ + || ((memw) > 0x7fff7fffl || (memw) < -0x80000000l)) \ + +#endif /* SLJIT_CONFIG_PPC_32 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + inst = data_transfer_insts[WORD_DATA | ((type & SLJIT_MEM_STORE) ? 0 : LOAD_DATA)]; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (memw != 0) { + FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(OFFS_REG(mem)) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(TMP_REG1) | B(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(mem & REG_MASK) | B(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else { + if (EMIT_MEM_LOAD_IMM(inst, mem, memw)) { + if ((mem & REG_MASK) != 0) { + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_ADD, TMP_REG1, 0, mem & REG_MASK, 0, SLJIT_IMM, memw)); + } else + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + + memw = 0; + mem = TMP_REG1; + } else if (memw > SIMM_MAX || memw < SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(mem & REG_MASK) | IMM((memw + 0x8000) >> 16))); + + memw &= 0xffff; + mem = TMP_REG1; + } else { + memw &= 0xffff; + mem &= REG_MASK; + } + } + + SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw >= 0x8000 && memw <= 0xffff)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + inst &= (sljit_ins)~INT_ALIGNED; +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw)))); + return push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw)); + } + + FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw))); + return push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw))); +} + +#undef EMIT_MEM_LOAD_IMM + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + mem_flags = WORD_DATA; + break; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + case SLJIT_MOV32: + mem_flags = INT_DATA; + break; + + case SLJIT_MOV_S32: + mem_flags = INT_DATA; + + if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_32)) { + if (mem & OFFS_REG_MASK) + mem_flags |= SIGNED_DATA; + else + return SLJIT_ERR_UNSUPPORTED; + } + break; +#endif + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + mem_flags = BYTE_DATA; + break; + + case SLJIT_MOV_U16: + mem_flags = HALF_DATA; + break; + + case SLJIT_MOV_S16: + mem_flags = HALF_DATA | SIGNED_DATA; + break; + + default: + SLJIT_UNREACHABLE(); + mem_flags = WORD_DATA; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem)))); + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + + inst = updated_data_transfer_insts[mem_flags]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (memw & 0x3) != 0) + return SLJIT_ERR_UNSUPPORTED; +#endif + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw))); + } + + if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(reg) | A(reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + mem_flags = FLOAT_DATA(type); + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem))); + } + + inst = updated_data_transfer_insts[mem_flags]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, dst_r, dst, dstw, TMP_REG1)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + compiler->size++; +#else + compiler->size += 4; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeRISCV_32.c b/src/sljit/sljitNativeRISCV_32.c new file mode 100644 index 0000000..396c956 --- /dev/null +++ b/src/sljit/sljitNativeRISCV_32.c @@ -0,0 +1,142 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + SLJIT_UNUSED_ARG(tmp_r); + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm & 0x800) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0], TMP_REG3)); + if (u.imm[1] != 0) + FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1], TMP_REG3)); + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-16))); + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[0] != 0 ? TMP_REG1 : TMP_ZERO) | (8 << 7))); + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(u.imm[1] != 0 ? TMP_REG2 : TMP_ZERO) | (12 << 7))); + FAIL_IF(push_inst(compiler, FLD | FRD(freg) | RS1(SLJIT_SP) | IMM_I(8))); + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + sljit_s32 reg2 = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (op & SLJIT_32) { + if (op == SLJIT_COPY32_TO_F32) + inst = FMV_W_X | RS1(reg) | FRD(freg); + else + inst = FMV_X_W | FRS1(freg) | RD(reg); + + return push_inst(compiler, inst); + } + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-16))); + + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + } + + if (op == SLJIT_COPY_TO_F64) { + if (reg2 != 0) + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(reg2) | (8 << 7))); + else + FAIL_IF(push_inst(compiler, FSW | RS1(SLJIT_SP) | FRS2(freg) | (8 << 7))); + + FAIL_IF(push_inst(compiler, SW | RS1(SLJIT_SP) | RS2(reg) | (12 << 7))); + FAIL_IF(push_inst(compiler, FLD | FRD(freg) | RS1(SLJIT_SP) | IMM_I(8))); + } else { + FAIL_IF(push_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(freg) | (8 << 7))); + + if (reg2 != 0) + FAIL_IF(push_inst(compiler, FMV_X_W | FRS1(freg) | RD(reg2))); + + FAIL_IF(push_inst(compiler, LW | RD(reg) | RS1(SLJIT_SP) | IMM_I(12))); + } + + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(16)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI || (inst[1] & 0x707f) == JALR); + inst[1] = (inst[1] & 0xfffff) | IMM_I(new_target); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativeRISCV_64.c b/src/sljit/sljitNativeRISCV_64.c new file mode 100644 index 0000000..7fcf2c5 --- /dev/null +++ b/src/sljit/sljitNativeRISCV_64.c @@ -0,0 +1,222 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + sljit_sw high; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm <= 0x7fffffffl && imm >= S32_MIN) { + if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + /* Trailing zeroes could be used to produce shifted immediates. */ + + if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { + high = imm >> 12; + + if (imm & 0x800) + high = ~high; + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } + + FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); + + if ((imm & 0xfff) != 0) + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT(dst_r != tmp_r); + + high = imm >> 32; + imm = (sljit_s32)imm; + + if ((imm & 0x80000000l) != 0) + high = ~high; + + if (high <= 0x7ffff && high >= -0x80000) { + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12))); + high = 0x1000; + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff))); + high &= 0xfff; + } + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); + imm = 0; + } else if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + imm = 0x1000 | (imm & 0xfff); + } else { + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + imm &= 0xfff; + } + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); + + if (imm & 0x1000) + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + else if (imm != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); + return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3)); + return push_inst(compiler, FMV_W_X | (1 << 25) | RS1(TMP_REG1) | FRD(freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + inst = FMV_W_X | RS1(reg) | FRD(freg); + else + inst = FMV_X_W | FRS1(freg) | RD(reg); + + if (!(op & SLJIT_32)) + inst |= (sljit_ins)1 << 25; + + return push_inst(compiler, inst); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + sljit_sw high; + + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + high = init_value >> 32; + + if ((init_value & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff))); + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high))); + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(32))); + FAIL_IF(push_inst(compiler, XOR | RD(dst) | RS1(dst) | RS2(TMP_REG3))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + sljit_sw high; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + high = (sljit_sw)new_target >> 32; + + if ((new_target & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)(high & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI); + inst[1] = (inst[1] & 0xfffff) | IMM_I(high); + SLJIT_ASSERT((inst[2] & 0x7f) == LUI); + inst[2] = (inst[2] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[5] & 0x707f) == ADDI || (inst[5] & 0x707f) == JALR); + inst[5] = (inst[5] & 0xfffff) | IMM_I(new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativeRISCV_common.c b/src/sljit/sljitNativeRISCV_common.c new file mode 100644 index 0000000..d86100a --- /dev/null +++ b/src/sljit/sljitNativeRISCV_common.c @@ -0,0 +1,3121 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return "RISC-V-32" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_RISCV_32 */ + return "RISC-V-64" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_RISCV_32 */ +} + +/* Length of an instruction word + Both for riscv-32 and riscv-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO 0 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5) +#define RETURN_ADDR_REG TMP_REG2 +#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1, +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define RD(rd) ((sljit_ins)reg_map[rd] << 7) +#define RS1(rs1) ((sljit_ins)reg_map[rs1] << 15) +#define RS2(rs2) ((sljit_ins)reg_map[rs2] << 20) +#define FRD(rd) ((sljit_ins)freg_map[rd] << 7) +#define FRS1(rs1) ((sljit_ins)freg_map[rs1] << 15) +#define FRS2(rs2) ((sljit_ins)freg_map[rs2] << 20) +#define IMM_I(imm) ((sljit_ins)(imm) << 20) +#define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7)) + +/* Represents funct(i) parts of the instructions. */ +#define OPC(o) ((sljit_ins)(o)) +#define F3(f) ((sljit_ins)(f) << 12) +#define F12(f) ((sljit_ins)(f) << 20) +#define F7(f) ((sljit_ins)(f) << 25) + +#define ADD (F7(0x0) | F3(0x0) | OPC(0x33)) +#define ADDI (F3(0x0) | OPC(0x13)) +#define AND (F7(0x0) | F3(0x7) | OPC(0x33)) +#define ANDI (F3(0x7) | OPC(0x13)) +#define AUIPC (OPC(0x17)) +#define BEQ (F3(0x0) | OPC(0x63)) +#define BNE (F3(0x1) | OPC(0x63)) +#define BLT (F3(0x4) | OPC(0x63)) +#define BGE (F3(0x5) | OPC(0x63)) +#define BLTU (F3(0x6) | OPC(0x63)) +#define BGEU (F3(0x7) | OPC(0x63)) +#define DIV (F7(0x1) | F3(0x4) | OPC(0x33)) +#define DIVU (F7(0x1) | F3(0x5) | OPC(0x33)) +#define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73)) +#define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53)) +#define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53)) +#define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53)) +#define FLD (F3(0x3) | OPC(0x7)) +#define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53)) +#define FLT_S (F7(0x50) | F3(0x1) | OPC(0x53)) +/* These conversion opcodes are partly defined. */ +#define FCVT_S_D (F7(0x20) | OPC(0x53)) +#define FCVT_S_W (F7(0x68) | OPC(0x53)) +#define FCVT_S_WU (F7(0x68) | F12(0x1) | OPC(0x53)) +#define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53)) +#define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53)) +#define FMV_X_W (F7(0x70) | F3(0x0) | OPC(0x53)) +#define FMV_W_X (F7(0x78) | F3(0x0) | OPC(0x53)) +#define FSD (F3(0x3) | OPC(0x27)) +#define FSGNJ_S (F7(0x10) | F3(0x0) | OPC(0x53)) +#define FSGNJN_S (F7(0x10) | F3(0x1) | OPC(0x53)) +#define FSGNJX_S (F7(0x10) | F3(0x2) | OPC(0x53)) +#define FSUB_S (F7(0x4) | F3(0x7) | OPC(0x53)) +#define FSW (F3(0x2) | OPC(0x27)) +#define JAL (OPC(0x6f)) +#define JALR (F3(0x0) | OPC(0x67)) +#define LD (F3(0x3) | OPC(0x3)) +#define LUI (OPC(0x37)) +#define LW (F3(0x2) | OPC(0x3)) +#define MUL (F7(0x1) | F3(0x0) | OPC(0x33)) +#define MULH (F7(0x1) | F3(0x1) | OPC(0x33)) +#define MULHU (F7(0x1) | F3(0x3) | OPC(0x33)) +#define OR (F7(0x0) | F3(0x6) | OPC(0x33)) +#define ORI (F3(0x6) | OPC(0x13)) +#define REM (F7(0x1) | F3(0x6) | OPC(0x33)) +#define REMU (F7(0x1) | F3(0x7) | OPC(0x33)) +#define SD (F3(0x3) | OPC(0x23)) +#define SLL (F7(0x0) | F3(0x1) | OPC(0x33)) +#define SLLI (IMM_I(0x0) | F3(0x1) | OPC(0x13)) +#define SLT (F7(0x0) | F3(0x2) | OPC(0x33)) +#define SLTI (F3(0x2) | OPC(0x13)) +#define SLTU (F7(0x0) | F3(0x3) | OPC(0x33)) +#define SLTUI (F3(0x3) | OPC(0x13)) +#define SRL (F7(0x0) | F3(0x5) | OPC(0x33)) +#define SRLI (IMM_I(0x0) | F3(0x5) | OPC(0x13)) +#define SRA (F7(0x20) | F3(0x5) | OPC(0x33)) +#define SRAI (IMM_I(0x400) | F3(0x5) | OPC(0x13)) +#define SUB (F7(0x20) | F3(0x0) | OPC(0x33)) +#define SW (F3(0x2) | OPC(0x23)) +#define XOR (F7(0x0) | F3(0x4) | OPC(0x33)) +#define XORI (F3(0x4) | OPC(0x13)) + +#define SIMM_MAX (0x7ff) +#define SIMM_MIN (-0x800) +#define BRANCH_MAX (0xfff) +#define BRANCH_MIN (-0x1000) +#define JUMP_MAX (0xfffff) +#define JUMP_MIN (-0x100000) + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define S32_MAX (0x7ffff7ffl) +#define S32_MIN (-0x80000000l) +#define S44_MAX (0x7fffffff7ffl) +#define S52_MAX (0x7ffffffffffffl) +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_sw imm) +{ + return push_inst(compiler, ins | IMM_S(imm)); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + + inst = (sljit_ins *)jump->addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->u.label != NULL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + + if (jump->flags & IS_COND) { + diff += SSIZE_OF(ins); + + if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { + inst--; + inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + jump->flags |= PATCH_B; + jump->addr = (sljit_uw)inst; + return inst; + } + + diff -= SSIZE_OF(ins); + } + + if (diff >= JUMP_MIN && diff <= JUMP_MAX) { + if (jump->flags & IS_COND) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; +#else + inst[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7; +#endif + } + + jump->flags |= PATCH_J; + return inst; + } + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (diff >= S32_MIN && diff <= S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_REL32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= (sljit_uw)S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= S44_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS44; + inst[3] = inst[0]; + return inst + 3; + } + + if (target_addr <= S52_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS52; + inst[4] = inst[0]; + return inst + 4; + } +#endif + +exit: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[1] = inst[0]; + return inst + 1; +#else + inst[5] = inst[0]; + return inst + 5; +#endif +} + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + +static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff >= S32_MIN && diff <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_REL32; + return 1; + } + + if (addr <= S32_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS32; + return 1; + } + + if (addr <= S44_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS44; + return 3; + } + + if (addr <= S52_MAX) { + SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT)); + jump->flags |= PATCH_ABS52; + return 4; + } + + SLJIT_ASSERT(jump->flags >= ((sljit_uw)5 << JUMP_SIZE_SHIFT)); + return 5; +} + +#endif /* SLJIT_CONFIG_RISCV_64 */ + +static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_ins *ins = (sljit_ins*)jump->addr; + sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_sw high; +#endif + SLJIT_UNUSED_ARG(executable_offset); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (flags & PATCH_REL32) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset); + + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + ins[0] = AUIPC | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); + } else + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); + return; + } +#endif + + if ((addr & 0x800) != 0) + addr += 0x1000; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); +#else /* !SLJIT_CONFIG_RISCV_32 */ + + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= S32_MAX); + ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + } else if (flags & PATCH_ABS44) { + high = (sljit_sw)addr >> 12; + SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + ins[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + ins[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + ins[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + } + + ins[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + ins += 2; + } else { + high = (sljit_sw)addr >> 32; + + if ((addr & 0x80000000l) != 0) + high = ~high; + + if (flags & PATCH_ABS52) { + SLJIT_ASSERT(addr <= S52_MAX); + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); + ins[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); + ins++; + } + + ins[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + ins[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); + ins[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); + ins += 3; + } +#endif /* !SLJIT_CONFIG_RISCV_32 */ + + if (!(flags & JUMP_MOV_ADDR)) { + SLJIT_ASSERT((ins[1] & 0x707f) == JALR); + ins[1] = (ins[1] & 0xfffff) | IMM_I(addr); + } else + ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + SLJIT_NEXT_DEFINE_TYPES; + sljit_uw total_size; + sljit_uw size_reduce = 0; + sljit_sw diff; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + + while (1) { + SLJIT_GET_NEXT_MIN(); + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_const_addr) { + const_->addr -= size_reduce; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + continue; + } + + if (next_min_addr != next_jump_addr) + continue; + + jump->addr -= size_reduce; + if (!(jump->flags & JUMP_MOV_ADDR)) { + total_size = JUMP_MAX_SIZE; + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (jump->u.target <= S32_MAX) + total_size = 2; + else if (jump->u.target <= S44_MAX) + total_size = 4; + else if (jump->u.target <= S52_MAX) + total_size = 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH_MIN / SSIZE_OF(ins))) + total_size = 0; + else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins))) + total_size = 1; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + } + } + + size_reduce += JUMP_MAX_SIZE - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + } else { + total_size = 5; + + if (!(jump->flags & JUMP_ADDR)) { + /* Real size minus 1. Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + + if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) + total_size = 1; + } else if (jump->u.target < S32_MAX) + total_size = 1; + else if (jump->u.target < S44_MAX) + total_size = 3; + else if (jump->u.target <= S52_MAX) + total_size = 4; + + size_reduce += 5 - total_size; + jump->flags |= total_size << JUMP_SIZE_SHIFT; +#endif /* !SLJIT_CONFIG_RISCV_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + SLJIT_NEXT_DEFINE_TYPES; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + + code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_min_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + + /* These structures are ordered by their address. */ + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (!(jump->flags & JUMP_MOV_ADDR)) { + word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); + } else { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + word_count += 1; + jump->addr = (sljit_uw)code_ptr; + code_ptr += 1; +#else /* !SLJIT_CONFIG_RISCV_32 */ + word_count += jump->flags >> JUMP_SIZE_SHIFT; + addr = (sljit_uw)code_ptr; + code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset); + jump->addr = addr; +#endif /* SLJIT_CONFIG_RISCV_32 */ + } + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->u.addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) { + load_addr_to_reg(jump, executable_offset); + break; + } + + addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + buf_ptr = (sljit_ins *)jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((sljit_sw)addr >= BRANCH_MIN && (sljit_sw)addr <= BRANCH_MAX); + addr = ((addr & 0x800) >> 4) | ((addr & 0x1e) << 7) | ((addr & 0x7e0) << 20) | ((addr & 0x1000) << 19); + buf_ptr[0] |= (sljit_ins)addr; + break; + } + + SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); + addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11); + buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr; + } while (0); + + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#elif defined(__riscv_float_abi_soft) + return 0; +#else + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ + case SLJIT_HAS_ZERO_REGISTER: + case SLJIT_HAS_COPY_F32: +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_HAS_COPY_F64: +#endif /* !SLJIT_CONFIG_RISCV_64 */ + return 1; + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 2; + + case SLJIT_UNORDERED: + case SLJIT_ORDERED: + return 1; + } + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 +#define MEM_USE_TMP2 0x40000 + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#include "sljitNativeRISCV_32.c" +#else +#include "sljitNativeRISCV_64.c" +#endif + +#define STACK_MAX_DISTANCE (-SIMM_MIN) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, offset; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); +#endif + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + offset = local_size - SSIZE_OF(sw); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE))); + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3)); + offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); + } + + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(RETURN_ADDR_REG), offset)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG1))); + else if (local_size > 0) + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); +#endif + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + + return SLJIT_SUCCESS; +} + +#define STACK_MAX_DISTANCE (-SIMM_MIN - 16) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, offset; + sljit_s32 local_size = compiler->local_size; + + if (local_size > STACK_MAX_DISTANCE) { + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) { + FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3)); + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2))); + } else + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size))); + + local_size = STACK_MAX_DISTANCE; + } + + SLJIT_ASSERT(local_size > 0); + + offset = local_size - SSIZE_OF(sw); + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RS1(SLJIT_SP) | IMM_I(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)); +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* u w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* u b s */ F3(0x0) | OPC(0x23) /* sb */, +/* u b l */ F3(0x4) | OPC(0x3) /* lbu */, +/* u h s */ F3(0x1) | OPC(0x23) /* sh */, +/* u h l */ F3(0x5) | OPC(0x3) /* lhu */, +/* u i s */ F3(0x2) | OPC(0x23) /* sw */, +/* u i l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x6) | OPC(0x3) /* lwu */), + +/* s w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* s w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* s b s */ F3(0x0) | OPC(0x23) /* sb */, +/* s b l */ F3(0x0) | OPC(0x3) /* lb */, +/* s h s */ F3(0x1) | OPC(0x23) /* sh */, +/* s h l */ F3(0x1) | OPC(0x3) /* lh */, +/* s i s */ F3(0x2) | OPC(0x23) /* sw */, +/* s i l */ F3(0x2) | OPC(0x3) /* lw */, + +/* d s */ F3(0x3) | OPC(0x27) /* fsd */, +/* d l */ F3(0x3) | OPC(0x7) /* fld */, +/* s s */ F3(0x2) | OPC(0x27) /* fsw */, +/* s l */ F3(0x2) | OPC(0x7) /* flw */, +}; + +#undef ARCH_32_64 + +static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 base, sljit_sw offset) +{ + sljit_ins ins; + + SLJIT_ASSERT(FAST_IS_REG(base) && offset <= 0xfff && offset >= SIMM_MIN); + + ins = data_transfer_insts[flags & MEM_MASK] | RS1(base); + if (flags & LOAD_DATA) + ins |= ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | IMM_I(offset); + else + ins |= ((flags & MEM_MASK) <= GPR_REG ? RS2(reg) : FRS2(reg)) | IMM_S(offset); + + return push_inst(compiler, ins); +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_mem_inst(compiler, flags, reg, arg & REG_MASK, argw)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1; + sljit_sw offset, argw_hi; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + } + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + tmp_r = TMP_REG3; + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_mem_inst(compiler, flags, reg, TMP_REG3, argw - compiler->cache_argw); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= SIMM_MAX) && (argw - compiler->cache_argw >= SIMM_MIN)) { + offset = argw - compiler->cache_argw; + } else { + compiler->cache_arg = SLJIT_MEM; + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r)); + compiler->cache_argw = argw_hi; + offset = argw & 0xfff; + argw = argw_hi; + } + } + + if (!base) + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + } + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, tmp_r, offset); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base))); + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(OFFS_REG(arg)))); + + argw = 0; + } else { + FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base))); + } + + return push_mem_inst(compiler, flags, reg, tmp_r, argw & 0xfff); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define WORD 0 +#define WORD_32 0 +#define IMM_EXTEND(v) (IMM_I(v)) +#else /* !SLJIT_CONFIG_RISCV_32 */ +#define WORD word +#define WORD_32 0x08 +#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v)))) +#endif /* SLJIT_CONFIG_RISCV_32 */ + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ + sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_ins word_size = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + /* The OTHER_FLAG is the counter. */ + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(word_size))); + + /* The TMP_REG2 is the next value. */ + if (src != TMP_REG2) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src) | IMM_I(0))); + + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)((is_clz ? 4 : 5) * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(0))); + if (!is_clz) { + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG2) | IMM_I(1))); + FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + } else + FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + + /* The TMP_REG1 is the next shift. */ + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(word_size))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1))); + + FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG1))); + FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((2 * SSIZE_OF(ins)) << 7)))); + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(TMP_REG1) | IMM_I(-1))); + FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG2))); + FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1))); + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((5 * SSIZE_OF(ins)) << 7)))); + + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(OTHER_FLAG) | IMM_I(0)); +} + +static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ + SLJIT_UNUSED_ARG(op); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (!(op & SLJIT_32)) { + FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0x10000)); + FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(src) | IMM_I(32))); + FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xfff))); + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src) | IMM_I(32))); + FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(32))); + FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1))); + FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG))); + + FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(16))); + FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(8))); + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(16))); + FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG))); + FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(8))); + FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(8))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); + } +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(src) | IMM_I(16))); + FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0xff0000)); + FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(src) | IMM_I(16))); + FAIL_IF(push_inst(compiler, ORI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xff))); + FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(dst) | IMM_I(8))); + FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG))); + FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(dst) | IMM_I(8))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); +} + +static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + sljit_ins word_size = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_ins word_size = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(src) | IMM_I(8))); + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src) | IMM_I(word_size - 8))); + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(0xff))); + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | WORD | RD(dst) | RS1(dst) | IMM_I(word_size - 16))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)); +} + +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | RD(dst) | RS1(src1) | IMM_I(src2))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RS1(src1) | RS2(src2))); \ + } + +#define EMIT_SHIFT(imm, reg) \ + op_imm = (imm); \ + op_reg = (reg); + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg; + sljit_ins op_imm, op_reg; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32))); + return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + case SLJIT_CLZ: + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return emit_clz_ctz(compiler, op, dst, src2); + + case SLJIT_REV: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + case SLJIT_REV_U32: +#endif /* SLJIT_CONFIG_RISCV_32 */ + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return emit_rev(compiler, op, dst, src2); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); + return emit_rev16(compiler, op, dst, src2); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_REV_U32: + SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1); + FAIL_IF(emit_rev(compiler, op, dst, src2)); + if (dst == TMP_REG2) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(32))); + return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); +#endif /* SLJIT_CONFIG_RISCV_32 */ + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_ADDC: + carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } else { + if (carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (carry_src_r == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG))); + /* Set carry flag. */ + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS) { + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = reg; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_GREATER: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + case SLJIT_SIG_LESS: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_SIG_GREATER: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY; + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RS1(dst) | RS2(OTHER_FLAG))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(TMP_REG1)); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) + return push_inst(compiler, MUL | WORD | RD(dst) | RS1(src1) | RS2(src2)); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (word) { + FAIL_IF(push_inst(compiler, MUL | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | 0x8 | RD(dst) | RS1(src1) | RS2(src2))); + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG)); + } +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, MULH | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | RD(dst) | RS1(src1) | RS2(src2))); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(31))); +#else /* !SLJIT_CONFIG_RISCV_32 */ + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(63))); +#endif /* SLJIT_CONFIG_RISCV_32 */ + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(OTHER_FLAG)); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + EMIT_SHIFT(SLLI, SLL); + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + EMIT_SHIFT(SRLI, SRL); + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + EMIT_SHIFT(SRAI, SRA); + break; + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); + + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLI : SRLI; + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + src2 = ((op & SLJIT_32) ? 32 : 64) - src2; +#else /* !SLJIT_CONFIG_RISCV_64 */ + src2 = 32 - src2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLI : SLLI; + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); + } + + if (src2 == TMP_ZERO) { + if (dst != src1) + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(0)); + return SLJIT_SUCCESS; + } + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(TMP_ZERO) | RS2(src2))); + op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL; + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL; + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(EQUAL_FLAG))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(src2)); +} + +#undef IMM_EXTEND + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == 0) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + dst = TMP_REG2; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + src2_tmp_reg = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if (src2 == SLJIT_IMM && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 == SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1_r = TMP_REG1; + } + else + src1_r = TMP_ZERO; + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } else if (src2 == SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2_r = src2_tmp_reg; + } else { + src2_r = TMP_ZERO; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = src2_tmp_reg; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + + SLJIT_ASSERT(word == 0 || word == 0x8); +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, EBREAK); + case SLJIT_NOP: + return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0)); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULHU | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULH | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REMU | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REM | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIV_UW: + return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_DIV_SW: + return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + /* Logical operators have no W variant, so sign extended input is necessary for them. */ + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw); + + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); + + case SLJIT_REV_U32: + case SLJIT_REV_S32: + return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 == SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 == SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + src2w &= 0x1f; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; +#endif /* SLJIT_CONFIG_RISCV_32 */ + } + + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ADD | WORD | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5; + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) { + ins1 = SLLI | WORD | IMM_I(src3w); + src3w = bit_length - src3w; + ins2 = SRLI | WORD | IMM_I(src3w); + } else { + ins1 = SRLI | WORD | IMM_I(src3w); + src3w = bit_length - src3w; + ins2 = SLLI | WORD | IMM_I(src3w); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RS1(src1_reg))); + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src2_reg))); + return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src3) | IMM_I(0)); + src3 = TMP_REG2; + } + + if (is_left) { + ins1 = SLL; + ins2 = SRLI; + ins3 = SRL; + } else { + ins1 = SRL; + ins2 = SLLI; + ins3 = SLL; + } + + FAIL_IF(push_inst(compiler, ins1 | WORD | RD(dst_reg) | RS1(src1_reg) | RS2(src3))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src2_reg) | IMM_I(1))); + FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src3) | IMM_I((sljit_ins)bit_length - 1))); + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src3))); + + FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src2_reg) | RS2(TMP_REG2))); + return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDI | RD(RETURN_ADDR_REG) | RS1(src) | IMM_I(0))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + + SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw))); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((sljit_ins)((op & SLJIT_32) ^ SLJIT_32) << 17) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# define flags (sljit_u32)0 +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; +#endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, FCVT_W_S | FMT(op) | flags | RD(dst_r) | FRS1(src))); + + /* Store the integer value from a VFP register. */ + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); +#else + return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); +#endif + } + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# undef flags +#endif +} + +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#else /* SLJIT_CONFIG_RISCV_32 */ + FAIL_IF(emit_op_mem2(compiler, ((ins & (1 << 21)) ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#endif /* !SLJIT_CONFIG_RISCV_32 */ + src = TMP_REG1; + } else if (src == SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, ins | FRD(dst_r) | RS1(src))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, DOUBLE_DATA | ((sljit_s32)(~ins >> 24) & 0x2), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = FCVT_S_W | FMT(op); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (op & SLJIT_32) + ins |= F3(0x7); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins |= (1 << 21); + else if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; + + if (op != SLJIT_CONV_F64_FROM_S32) + ins |= F3(0x7); +#endif /* SLJIT_CONFIG_RISCV_32 */ + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = FCVT_S_WU | FMT(op); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (op & SLJIT_32) + ins |= F3(0x7); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW) + ins |= (1 << 21); + else if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + + if (op != SLJIT_CONV_F64_FROM_S32) + ins |= F3(0x7); +#endif /* SLJIT_CONFIG_RISCV_32 */ + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_ORDERED_GREATER: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_UNORDERED_OR_LESS: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_UNORDERED_OR_EQUAL: + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2))); + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1))); + inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); + break; + default: /* SLJIT_UNORDERED */ + if (src1 == src2) { + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1); + break; + } + FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1))); + FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src2))); + inst = AND | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); + break; + } + + return push_inst(compiler, inst); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (!(dst & SLJIT_MEM)) + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FSGNJN_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FSGNJX_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, FCVT_S_D | ((op & SLJIT_32) ? (1 << 25) : ((1 << 20) | F3(7))) | FRD(dst_r) | FRS1(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, FSUB_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, FMUL_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_COPYSIGN_F64: + return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)); + } + + if (dst_r != dst) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMV_W_X | RS1(TMP_ZERO) | FRD(freg)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3)); + return push_inst(compiler, FMV_W_X | RS1(TMP_REG1) | FRD(freg)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define BRANCH_LENGTH ((sljit_ins)(3 * sizeof(sljit_ins)) << 7) +#else +#define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7) +#endif + +static sljit_ins get_jump_instruction(sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + return BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO); + case SLJIT_NOT_EQUAL: + return BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO); + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED: + return BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + return BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO); + default: + /* Not conditional branch. */ + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + inst = get_jump_instruction(type); + + if (inst != 0) { + PTR_FAIL_IF(push_inst(compiler, inst | BRANCH_LENGTH)); + jump->flags |= IS_COND; + } + + jump->addr = compiler->size; + inst = JALR | RS1(TMP_REG1) | IMM_I(0); + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_CALL; + inst |= RD(RETURN_ADDR_REG); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_RISCV_32 */ + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0)); + src2 = src2_tmp_reg; + } + + if (src1 == SLJIT_IMM) { + if (src1w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1 = TMP_REG1; + } + else + src1 = TMP_ZERO; + } + + if (src2 == SLJIT_IMM) { + if (src2w != 0) { + PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3)); + src2 = src2_tmp_reg; + } + else + src2 = TMP_ZERO; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND)); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_LESS: + inst = BGEU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER_EQUAL: + inst = BLTU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER: + inst = BGEU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_LESS_EQUAL: + inst = BLTU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS: + inst = BGE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLT | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER: + inst = BGE | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLT | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return jump; +} + +#undef BRANCH_LENGTH + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src != SLJIT_IMM) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += JUMP_MAX_SIZE - 1; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_r, dst_r, invert; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_r = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(EQUAL_FLAG) | IMM_I(1))); + src_r = dst_r; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_r = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(OTHER_FLAG) | IMM_I(1))); + src_r = dst_r; + invert ^= 0x1; + break; + } + } else { + invert = 0; + src_r = OTHER_FLAG; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + invert = 1; + break; + } + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(src_r) | IMM_I(1))); + src_r = dst_r; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_r, dst, dstw); + + if (src_r != dst_r) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(src_r) | IMM_I(0)); + return SLJIT_SUCCESS; + } + + mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins *ptr; + sljit_uw size; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (sljit_ins)(type & SLJIT_32) >> 5; + sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(dst_reg) | IMM_I(0))); + + if ((src1 & REG_MASK) == dst_reg) + src1 = (src1 & ~REG_MASK) | TMP_REG1; + + if (OFFS_REG(src1) == dst_reg) + src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0))); + } + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w)); + } else if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (word) + src1w = (sljit_s32)src1w; +#endif /* SLJIT_CONFIG_RISCV_64 */ + FAIL_IF(load_immediate(compiler, dst_reg, src1w, TMP_REG1)); + } else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0))); + + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); + return SLJIT_SUCCESS; +} + +#undef WORD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins *ptr; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src2_freg) | FRS2(src2_freg))); + } + + size = compiler->size; + + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + compiler->size++; + + if (src1 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1))); + + size = compiler->size - size; + *ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25); + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef FMT + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (SLJIT_UNLIKELY(memw != 0)) { + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(mem & REG_MASK) | RS2(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else if (memw > SIMM_MAX - SSIZE_OF(sw) || memw < SIMM_MIN) { + if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw), TMP_REG3)); + memw &= 0xfff; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3)); + memw = 0; + } + + if (mem & REG_MASK) + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK))); + + mem = TMP_REG1; + } else { + mem &= REG_MASK; + memw &= 0xfff; + } + + SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw > SIMM_MAX && memw <= 0xfff)); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff)); + return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), mem, memw); + } + + flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0); + + FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), mem, memw)); + return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff); +} + +#undef TO_ARGW_HI + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, ADDI | RD(dst_r))); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else /* !SLJIT_CONFIG_RISCV_32 */ + compiler->size += 5; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c new file mode 100644 index 0000000..99e8463 --- /dev/null +++ b/src/sljit/sljitNativeS390X.c @@ -0,0 +1,4526 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#ifdef __ARCH__ +#define ENABLE_STATIC_FACILITY_DETECTION 1 +#else +#define ENABLE_STATIC_FACILITY_DETECTION 0 +#endif +#define ENABLE_DYNAMIC_FACILITY_DETECTION 1 + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "s390x" SLJIT_CPUINFO; +} + +/* Instructions are stored as 64 bit values regardless their size. */ +typedef sljit_uw sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14 +}; + +/* there are also a[2-15] available, but they are slower to access and + * their use is limited as mundaym explained: + * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689 + */ + +/* General Purpose Registers [0-15]. */ +typedef sljit_uw sljit_gpr; + +/* + * WARNING + * the following code is non standard and should be improved for + * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based + * registers because r0 and r1 are the ABI recommended volatiles. + * there is a gpr() function that maps sljit to physical register numbers + * that should be used instead of the usual index into reg_map[] and + * will be retired ASAP (TODO: carenas) + */ + +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ + +/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ +/* TODO(carenas): r12 might conflict in PIC code, reserve? */ +/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp + * like we do know might be faster though, reserve? + */ + +/* TODO(carenas): should be named TMP_REG[1-2] for consistency */ +#define tmp0 r0 +#define tmp1 r1 + +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + +/* Link register. */ +static const sljit_gpr link_r = 14; /* r14 */ + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1 +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) + +#define R0(r) ((sljit_ins)reg_map[r]) + +#define F0(r) ((sljit_ins)freg_map[r]) +#define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F12(r) (R12A((sljit_ins)freg_map[r])) +#define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F28(r) (R28A((sljit_ins)freg_map[r])) +#define F32(r) (R32A((sljit_ins)freg_map[r])) +#define F36(r) (R36A((sljit_ins)freg_map[r])) + +struct sljit_s390x_const { + struct sljit_const const_; /* must be first */ + sljit_sw init_value; /* required to build literal pool */ +}; + +/* Convert SLJIT register to hardware register. */ +static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); + return reg_map[r]; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ibuf); + *ibuf = ins; + + SLJIT_ASSERT(ins <= 0xffffffffffffL); + + compiler->size++; + if (ins & 0xffff00000000L) + compiler->size++; + + if (ins & 0xffffffff0000L) + compiler->size++; + + return SLJIT_SUCCESS; +} + +#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) + +/* Map the given type to a 4-bit condition code mask. */ +static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { + const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */ + const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */ + const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */ + const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */ + + switch (type) { + case SLJIT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return cc0; + if (type == SLJIT_OVERFLOW) + return (cc0 | cc3); + return (cc0 | cc2); + } + /* fallthrough */ + + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + return cc0; + + case SLJIT_NOT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return (cc1 | cc2 | cc3); + if (type == SLJIT_OVERFLOW) + return (cc1 | cc2); + return (cc1 | cc3); + } + /* fallthrough */ + + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return (cc1 | cc2 | cc3); + + case SLJIT_LESS: + case SLJIT_ATOMIC_NOT_STORED: + return cc1; + + case SLJIT_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_GREATER: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return cc2; + return cc3; + + case SLJIT_LESS_EQUAL: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return (cc0 | cc1); + return (cc0 | cc1 | cc2); + + case SLJIT_SIG_LESS: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return cc1; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return (cc0 | cc1); + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + + case SLJIT_SIG_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + /* Overflow is considered greater, see SLJIT_SUB. */ + return cc2 | cc3; + + case SLJIT_SIG_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc2 | cc3); + /* fallthrough */ + + case SLJIT_UNORDERED: + return cc3; + + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc0 | cc1); + /* fallthrough */ + + case SLJIT_ORDERED: + return (cc0 | cc1 | cc2); + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return (cc1 | cc2); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return cc2; + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return (cc0 | cc2); + + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (cc0 | cc1 | cc3); + + case SLJIT_UNORDERED_OR_EQUAL: + return (cc0 | cc3); + + case SLJIT_UNORDERED_OR_LESS: + return (cc1 | cc3); + } + + SLJIT_UNREACHABLE(); + return (sljit_u8)-1; +} + +/* Facility to bit index mappings. + Note: some facilities share the same bit index. */ +typedef sljit_uw facility_bit; +#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7 +#define FAST_LONG_DISPLACEMENT_FACILITY 19 +#define EXTENDED_IMMEDIATE_FACILITY 21 +#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34 +#define DISTINCT_OPERAND_FACILITY 45 +#define HIGH_WORD_FACILITY 45 +#define POPULATION_COUNT_FACILITY 45 +#define LOAD_STORE_ON_CONDITION_1_FACILITY 45 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49 +#define LOAD_STORE_ON_CONDITION_2_FACILITY 53 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58 +#define VECTOR_FACILITY 129 +#define VECTOR_ENHANCEMENTS_1_FACILITY 135 + +/* Report whether a facility is known to be present due to the compiler + settings. This function should always be compiled to a constant + value given a constant argument. */ +static SLJIT_INLINE int have_facility_static(facility_bit x) +{ +#if ENABLE_STATIC_FACILITY_DETECTION + switch (x) { + case FAST_LONG_DISPLACEMENT_FACILITY: + return (__ARCH__ >= 6 /* z990 */); + case EXTENDED_IMMEDIATE_FACILITY: + case STORE_FACILITY_LIST_EXTENDED_FACILITY: + return (__ARCH__ >= 7 /* z9-109 */); + case GENERAL_INSTRUCTION_EXTENSION_FACILITY: + return (__ARCH__ >= 8 /* z10 */); + case DISTINCT_OPERAND_FACILITY: + return (__ARCH__ >= 9 /* z196 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY: + return (__ARCH__ >= 10 /* zEC12 */); + case LOAD_STORE_ON_CONDITION_2_FACILITY: + case VECTOR_FACILITY: + return (__ARCH__ >= 11 /* z13 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY: + case VECTOR_ENHANCEMENTS_1_FACILITY: + return (__ARCH__ >= 12 /* z14 */); + default: + SLJIT_UNREACHABLE(); + } +#endif + return 0; +} + +static SLJIT_INLINE unsigned long get_hwcap() +{ + static unsigned long hwcap = 0; + if (SLJIT_UNLIKELY(!hwcap)) { + hwcap = getauxval(AT_HWCAP); + SLJIT_ASSERT(hwcap != 0); + } + return hwcap; +} + +static SLJIT_INLINE int have_stfle() +{ + if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY)) + return 1; + + return (get_hwcap() & HWCAP_S390_STFLE); +} + +/* Report whether the given facility is available. This function always + performs a runtime check. */ +static int have_facility_dynamic(facility_bit x) +{ +#if ENABLE_DYNAMIC_FACILITY_DETECTION + static struct { + sljit_uw bits[4]; + } cpu_features; + size_t size = sizeof(cpu_features); + const sljit_uw word_index = x >> 6; + const sljit_uw bit_index = ((1UL << 63) >> (x & 63)); + + SLJIT_ASSERT(x < size * 8); + if (SLJIT_UNLIKELY(!have_stfle())) + return 0; + + if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) { + __asm__ __volatile__ ( + "lgr %%r0, %0;" + "stfle 0(%1);" + /* outputs */: + /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features) + /* clobbers */: "r0", "cc", "memory" + ); + SLJIT_ASSERT(cpu_features.bits[0] != 0); + } + return (cpu_features.bits[word_index] & bit_index) != 0; +#else + return 0; +#endif +} + +#define HAVE_FACILITY(name, bit) \ +static SLJIT_INLINE int name() \ +{ \ + static int have = -1; \ + /* Static check first. May allow the function to be optimized away. */ \ + if (have_facility_static(bit)) \ + have = 1; \ + else if (SLJIT_UNLIKELY(have < 0)) \ + have = have_facility_dynamic(bit) ? 1 : 0; \ +\ + return have; \ +} + +HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY) +HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY) +HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY) +HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY) +HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY) +HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) +#undef HAVE_FACILITY + +#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL) +#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL) + +#define CHECK_SIGNED(v, bitlen) \ + ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1))) + +#define is_s8(d) CHECK_SIGNED((d), 8) +#define is_s16(d) CHECK_SIGNED((d), 16) +#define is_s20(d) CHECK_SIGNED((d), 20) +#define is_s32(d) ((d) == (sljit_s32)(d)) + +static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) +{ + sljit_uw dh, dl; + + SLJIT_ASSERT(is_s20(d)); + + dh = (d >> 12) & 0xff; + dl = ((sljit_uw)d << 8) & 0xfff00; + return (dh | dl) << 8; +} + +/* TODO(carenas): variadic macro is not strictly needed */ +#define SLJIT_S390X_INSTRUCTION(op, ...) \ +static SLJIT_INLINE sljit_ins op(__VA_ARGS__) + +/* RR form instructions. */ +#define SLJIT_S390X_RR(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ +} + +/* AND */ +SLJIT_S390X_RR(nr, 0x1400) + +/* BRANCH AND SAVE */ +SLJIT_S390X_RR(basr, 0x0d00) + +/* BRANCH ON CONDITION */ +SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */ + +/* DIVIDE */ +SLJIT_S390X_RR(dr, 0x1d00) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RR(xr, 0x1700) + +/* LOAD */ +SLJIT_S390X_RR(lr, 0x1800) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RR(lcr, 0x1300) + +/* OR */ +SLJIT_S390X_RR(or, 0x1600) + +#undef SLJIT_S390X_RR + +/* RRE form instructions */ +#define SLJIT_S390X_RRE(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | R4A(dst) | R0A(src); \ +} + +/* AND */ +SLJIT_S390X_RRE(ngr, 0xb9800000) + +/* DIVIDE LOGICAL */ +SLJIT_S390X_RRE(dlr, 0xb9970000) +SLJIT_S390X_RRE(dlgr, 0xb9870000) + +/* DIVIDE SINGLE */ +SLJIT_S390X_RRE(dsgr, 0xb90d0000) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RRE(xgr, 0xb9820000) + +/* LOAD */ +SLJIT_S390X_RRE(lgr, 0xb9040000) +SLJIT_S390X_RRE(lgfr, 0xb9140000) + +/* LOAD BYTE */ +SLJIT_S390X_RRE(lbr, 0xb9260000) +SLJIT_S390X_RRE(lgbr, 0xb9060000) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RRE(lcgr, 0xb9030000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RRE(lhr, 0xb9270000) +SLJIT_S390X_RRE(lghr, 0xb9070000) + +/* LOAD LOGICAL */ +SLJIT_S390X_RRE(llgfr, 0xb9160000) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RRE(llcr, 0xb9940000) +SLJIT_S390X_RRE(llgcr, 0xb9840000) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RRE(llhr, 0xb9950000) +SLJIT_S390X_RRE(llghr, 0xb9850000) + +/* MULTIPLY LOGICAL */ +SLJIT_S390X_RRE(mlgr, 0xb9860000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RRE(msgfr, 0xb91c0000) + +/* OR */ +SLJIT_S390X_RRE(ogr, 0xb9810000) + +/* SUBTRACT */ +SLJIT_S390X_RRE(sgr, 0xb9090000) + +#undef SLJIT_S390X_RRE + +/* RI-a form instructions */ +#define SLJIT_S390X_RIA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ +} + +/* ADD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16) + +/* LOAD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16) +SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16) +SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16) +SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16) +SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16) + +/* MULTIPLY HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16) +SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16) + +/* OR IMMEDIATE */ +SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) + +#undef SLJIT_S390X_RIA + +/* RIL-a form instructions (requires extended immediate facility) */ +#define SLJIT_S390X_RILA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + SLJIT_ASSERT(have_eimm()); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ +} + +/* ADD IMMEDIATE */ +SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32) + +/* ADD IMMEDIATE HIGH */ +SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */ + +/* AND IMMEDIATE */ +SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32) + +/* EXCLUSIVE OR IMMEDIATE */ +SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32) + +/* INSERT IMMEDIATE */ +SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32) +SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32) + +/* LOAD IMMEDIATE */ +SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32) +SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32) + +/* SUBTRACT LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) + +#undef SLJIT_S390X_RILA + +/* RX-a form instructions */ +#define SLJIT_S390X_RXA(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ +{ \ + SLJIT_ASSERT((d & 0xfff) == d); \ +\ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ +} + +/* LOAD */ +SLJIT_S390X_RXA(l, 0x58000000) + +/* LOAD ADDRESS */ +SLJIT_S390X_RXA(la, 0x41000000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXA(lh, 0x48000000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXA(ms, 0x71000000) + +/* STORE */ +SLJIT_S390X_RXA(st, 0x50000000) + +/* STORE CHARACTER */ +SLJIT_S390X_RXA(stc, 0x42000000) + +/* STORE HALFWORD */ +SLJIT_S390X_RXA(sth, 0x40000000) + +#undef SLJIT_S390X_RXA + +/* RXY-a instructions */ +#define SLJIT_S390X_RXYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \ +} + +/* LOAD */ +SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp()) +SLJIT_S390X_RXYA(lg, 0xe30000000004, 1) +SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1) + +/* LOAD BYTE */ +SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp()) +SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp()) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp()) +SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1) + +/* LOAD LOGICAL */ +SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm()) +SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm()) +SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp()) +SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1) + +/* STORE */ +SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp()) +SLJIT_S390X_RXYA(stg, 0xe30000000024, 1) + +/* STORE CHARACTER */ +SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp()) + +/* STORE HALFWORD */ +SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) + +#undef SLJIT_S390X_RXYA + +/* RSY-a instructions */ +#define SLJIT_S390X_RSYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \ +} + +/* LOAD MULTIPLE */ +SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1) + +/* SHIFT LEFT LOGICAL */ +SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1) + +/* SHIFT RIGHT SINGLE */ +SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1) + +/* STORE MULTIPLE */ +SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) + +#undef SLJIT_S390X_RSYA + +/* RIE-f instructions (require general-instructions-extension facility) */ +#define SLJIT_S390X_RIEF(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ +{ \ + sljit_ins i3, i4, i5; \ +\ + SLJIT_ASSERT(have_genext()); \ + i3 = (sljit_ins)start << 24; \ + i4 = (sljit_ins)end << 16; \ + i5 = (sljit_ins)rot << 8; \ +\ + return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \ +} + +/* ROTATE THEN AND SELECTED BITS */ +/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */ + +/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */ +/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */ + +/* ROTATE THEN OR SELECTED BITS */ +SLJIT_S390X_RIEF(rosbg, 0xec0000000056) + +/* ROTATE THEN INSERT SELECTED BITS */ +/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */ +/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */ + +/* ROTATE THEN INSERT SELECTED BITS HIGH */ +SLJIT_S390X_RIEF(risbhg, 0xec000000005d) + +/* ROTATE THEN INSERT SELECTED BITS LOW */ +/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */ + +#undef SLJIT_S390X_RIEF + +/* RRF-c instructions (require load/store-on-condition 1 facility) */ +#define SLJIT_S390X_RRFC(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ +{ \ + sljit_ins m3; \ +\ + SLJIT_ASSERT(have_lscond1()); \ + m3 = (sljit_ins)(mask & 0xf) << 12; \ +\ + return (pattern) | m3 | R4A(dst) | R0A(src); \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RRFC(locr, 0xb9f20000) +SLJIT_S390X_RRFC(locgr, 0xb9e20000) + +#undef SLJIT_S390X_RRFC + +/* RIE-g instructions (require load/store-on-condition 2 facility) */ +#define SLJIT_S390X_RIEG(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ +{ \ + sljit_ins m3, i2; \ +\ + SLJIT_ASSERT(have_lscond2()); \ + m3 = (sljit_ins)(mask & 0xf) << 32; \ + i2 = (sljit_ins)(imm & 0xffffL) << 16; \ +\ + return (pattern) | R36A(reg) | m3 | i2; \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RIEG(lochi, 0xec0000000042) +SLJIT_S390X_RIEG(locghi, 0xec0000000046) + +#undef SLJIT_S390X_RIEG + +#define SLJIT_S390X_RILB(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \ +} + +/* BRANCH RELATIVE AND SAVE LONG */ +SLJIT_S390X_RILB(brasl, 0xc00500000000, 1) + +/* LOAD ADDRESS RELATIVE LONG */ +SLJIT_S390X_RILB(larl, 0xc00000000000, 1) + +/* LOAD RELATIVE LONG */ +SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext()) + +#undef SLJIT_S390X_RILB + +SLJIT_S390X_INSTRUCTION(br, sljit_gpr target) +{ + return 0x07f0 | target; +} + +SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20; + sljit_ins ri2 = (sljit_ins)target & 0xffff; + return 0xa7040000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36; + sljit_ins ri2 = (sljit_ins)target & 0xffffffff; + return 0xc00400000000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) +{ + SLJIT_ASSERT(have_eimm()); + return 0xb9830000 | R8A(dst) | R0A(src); +} + +/* INSERT PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) +{ + return 0xb2220000 | R4A(dst); +} + +/* SET PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) +{ + return 0x0400 | R4A(dst); +} + +/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ +SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) +{ + return risbhg(dst, src, start, 0x8 | end, rot); +} + +#undef SLJIT_S390X_INSTRUCTION + +static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r) +{ + /* Condition codes: bits 18 and 19. + Transformation: + 0 (zero and no overflow) : unchanged + 1 (non-zero and no overflow) : unchanged + 2 (zero and overflow) : decreased by 1 + 3 (non-zero and overflow) : decreased by 1 if non-zero */ + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); + FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(tmp1))); + return SLJIT_SUCCESS; +} + +/* load 64-bit immediate into register without clobbering flags */ +static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v) +{ + /* 4 byte instructions */ + if (is_s16(v)) + return push_inst(compiler, lghi(target, (sljit_s16)v)); + + if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0) + return push_inst(compiler, llill(target, (sljit_u16)v)); + + if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0) + return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); + + if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0) + return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); + + if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) + return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); + + if (is_s32(v)) + return push_inst(compiler, lgfi(target, (sljit_s32)v)); + + if (((sljit_uw)v >> 32) == 0) + return push_inst(compiler, llilf(target, (sljit_u32)v)); + + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); + + FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); + return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); +} + +struct addr { + sljit_gpr base; + sljit_gpr index; + sljit_s32 offset; +}; + +/* transform memory operand into D(X,B) form with a signed 20-bit offset */ +static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_s20(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = (sljit_s32)off; + return SLJIT_SUCCESS; +} + +/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */ +static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_u12(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = (sljit_s32)off; + return SLJIT_SUCCESS; +} + +#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base) +#define WHEN(cond, r, i1, i2, addr) \ + (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) + +/* May clobber tmp1. */ +static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg, + sljit_s32 mem, sljit_sw memw, + sljit_s32 is_32bit, const sljit_ins* forms) +{ + struct addr addr; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static const sljit_ins load_forms[3] = { + 0x58000000 /* l */, + 0xe30000000058 /* ly */, + 0xe30000000004 /* lg */ +}; + +static const sljit_ins store_forms[3] = { + 0x50000000 /* st */, + 0xe30000000050 /* sty */, + 0xe30000000024 /* stg */ +}; + +static const sljit_ins load_halfword_forms[3] = { + 0x48000000 /* lh */, + 0xe30000000078 /* lhy */, + 0xe30000000015 /* lgh */ +}; + +/* May clobber tmp1. */ +static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw, + sljit_s32 is_32bit) +{ + return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms); +} + +/* May clobber tmp1. */ +static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(src & SLJIT_MEM); + + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + + ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */; + return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +/* May clobber tmp1. */ +static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 is_32bit) +{ + return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms); +} + +#undef WHEN + +static sljit_s32 emit_move(struct sljit_compiler *compiler, + sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r; + + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); + + if (src == SLJIT_IMM) + return push_load_imm_inst(compiler, dst_r, srcw); + + if (src & SLJIT_MEM) + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); + + src_r = gpr(src & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); +} + +static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = tmp0; + sljit_gpr src_r = tmp1; + sljit_s32 needs_move = 1; + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == src2) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (FAST_IS_REG(src2)) + src_r = gpr(src2); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r))); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); +} + +static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + + if (FAST_IS_REG(src1)) + src1_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (FAST_IS_REG(src2)) + src2_r = gpr(src2); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r)); +} + +typedef enum { + RI_A, + RIL_A, +} emit_ril_type; + +static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w, + emit_ril_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == RIL_A) + return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff)); +} + +static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp0; + + if (!FAST_IS_REG(src1)) + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + else + src_r = gpr(src1 & REG_MASK); + + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16); +} + +typedef enum { + RX_A, + RXY_A, +} emit_rx_type; + +static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w, + emit_rx_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + sljit_gpr base, index; + + SLJIT_ASSERT(src2 & SLJIT_MEM); + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + base = gpr(src2 & REG_MASK); + index = tmp0; + + if (src2 & OFFS_REG_MASK) { + index = gpr(OFFS_REG(src2)); + + if (src2w != 0) { + FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0))); + src2w = 0; + index = tmp1; + } + } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w)); + + if (src2 & REG_MASK) + index = tmp1; + else + base = tmp1; + src2w = 0; + } + + if (type == RX_A) + ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w; + else + ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w); + + FAIL_IF(push_inst(compiler, ins)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_sw srcw) +{ + sljit_gpr dst_r = tmp1; + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + sljit_gpr index = tmp1; + + if ((dstw & 0x3) == 0) + index = gpr(OFFS_REG(dst)); + else + FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0))); + + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index))); + dstw = 0; + } + else if (!is_s20(dstw)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw)); + + if (dst & REG_MASK) + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1))); + + dstw = 0; + } + else + dst_r = gpr(dst & REG_MASK); + + return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw)); +} + +struct ins_forms { + sljit_ins op_r; + sljit_ins op_gr; + sljit_ins op_rk; + sljit_ins op_grk; + sljit_ins op; + sljit_ins op_y; + sljit_ins op_g; +}; + +static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins, ins_k; + + if ((src1 | src2) & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_32) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + /* Extra instructions needed for address computation can be executed independently. */ + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + if (src1 & SLJIT_MEM) { + if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w)) + return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A); + + return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A); + } + } + else if (ins12 || ins20) { + emit_rx_type rx_type; + + if (ins12) { + rx_type = RX_A; + ins = ins12; + } + else { + rx_type = RXY_A; + ins = ins20; + } + + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w))))) + return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type); + + if (src1 & SLJIT_MEM) + return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type); + } + } + + if (mode & SLJIT_32) { + ins = forms->op_r; + ins_k = forms->op_rk; + } + else { + ins = forms->op_gr; + ins_k = forms->op_grk; + } + + SLJIT_ASSERT(ins != 0 || ins_k != 0); + + if (ins && FAST_IS_REG(dst)) { + if (dst == src1) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + if (dst == src2) + return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w); + } + + if (ins_k == 0) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins; + + if (src2 & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_32) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + else if (ins12) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + else if (ins20) + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk; + + if (ins == 0 || (FAST_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + sljit_sw executable_offset; + sljit_uw ins_size = compiler->size << 1; + sljit_uw pool_size = 0; /* literal pool */ + sljit_uw pad_size; + sljit_uw half_count; + SLJIT_NEXT_DEFINE_TYPES; + struct sljit_memory_fragment *buf; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_u16 *code; + sljit_u16 *code_ptr; + sljit_uw *pool, *pool_ptr; + sljit_ins ins; + sljit_sw source, offset; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + jump = compiler->jumps; + while (jump != NULL) { + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* encoded: */ + /* brasl %r14, (or brcl , ) */ + /* replace with: */ + /* lgrl %r1, */ + /* bras %r14, %r1 (or bcr , %r1) */ + pool_size += sizeof(*pool); + if (!(jump->flags & JUMP_MOV_ADDR)) + ins_size += 2; + } + jump = jump->next; + } + + const_ = compiler->consts; + while (const_) { + pool_size += sizeof(*pool); + const_ = const_->next; + } + + /* pad code size to 8 bytes so is accessible with half word offsets */ + /* the literal pool needs to be doubleword aligned */ + pad_size = ((ins_size + 7UL) & ~7UL) - ins_size; + SLJIT_ASSERT(pad_size < 8UL); + + /* allocate target buffer */ + code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + code_ptr = code; + + /* TODO(carenas): pool is optional, and the ABI recommends it to + * be created before the function code, instead of + * globally; if generated code is too big could + * need offsets bigger than 32bit words and asser() + */ + pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size); + pool_ptr = pool; + buf = compiler->buf; + half_count = 0; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + SLJIT_NEXT_INIT_TYPES(); + SLJIT_GET_NEXT_MIN(); + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 3); + do { + ins = *buf_ptr++; + + if (next_min_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + + if (next_min_addr == next_label_size) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr == next_jump_addr) { + if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) { + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + jump->addr = (sljit_uw)pool_ptr; + + /* store target into pool */ + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + pool_ptr++; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) { + sljit_ins arg; + + jump->addr = (sljit_uw)pool_ptr; + + /* load address into tmp1 */ + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + + code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */); + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; + code_ptr += 3; + pool_ptr++; + + /* branch to tmp1 */ + arg = (ins >> 36) & 0xf; + if (((ins >> 32) & 0xf) == 4) { + /* brcl -> bcr */ + ins = bcr(arg, tmp1); + } else { + SLJIT_ASSERT(((ins >> 32) & 0xf) == 5); + /* brasl -> basr */ + ins = basr(arg, tmp1); + } + + /* Adjust half_count. */ + half_count += 2; + } else + jump->addr = (sljit_uw)code_ptr; + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } else if (next_min_addr == next_const_addr) { + /* update instruction with relative address of constant */ + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + + SLJIT_ASSERT(!(offset & 0x1)); + offset >>= 1; /* halfword (not byte) offset */ + SLJIT_ASSERT(is_s32(offset)); + + ins |= (sljit_ins)offset & 0xffffffff; + + /* update address */ + const_->addr = (sljit_uw)pool_ptr; + + /* store initial value into pool and update pool address */ + *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value); + + /* move to next constant */ + const_ = const_->next; + next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_); + } + + SLJIT_GET_NEXT_MIN(); + } + + if (ins & 0xffff00000000L) { + *code_ptr++ = (sljit_u16)(ins >> 32); + half_count++; + } + + if (ins & 0xffffffff0000L) { + *code_ptr++ = (sljit_u16)(ins >> 16); + half_count++; + } + + *code_ptr++ = (sljit_u16)ins; + half_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (next_label_size == half_count) { + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr); + SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); + + jump = compiler->jumps; + while (jump != NULL) { + offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr); + + if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) { + /* Store jump target into pool. */ + *(sljit_uw*)(jump->addr) = (sljit_uw)offset; + } else { + code_ptr = (sljit_u16*)jump->addr; + offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + /* offset must be halfword aligned */ + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ + + code_ptr[1] = (sljit_u16)(offset >> 16); + code_ptr[2] = (sljit_u16)offset; + } + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = ins_size; + if (pool_size) + compiler->executable_size += (pad_size + pool_size); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + /* TODO(mundaym): implement all */ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#else + return 1; +#endif /* SLJIT_IS_FPU_AVAILABLE */ + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_SIMD: + case SLJIT_HAS_ATOMIC: + return 1; + + case SLJIT_HAS_CTZ: + return 2; + + case SLJIT_HAS_CMOV: + return have_lscond1() ? 1 : 0; + } + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_s32 offset, i, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Saved registers are stored in callee allocated save area. */ + SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); + + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + if (saved_arg_count == 0) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (8 - saved_arg_count) * SSIZE_OF(sw); + } + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (saved_arg_count == 0) { + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > saved_arg_count) { + if (saveds == saved_arg_count + 1) { + FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (saveds - saved_arg_count) * SSIZE_OF(sw); + } + } + } + + if (saved_arg_count > 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + compiler->local_size = local_size; + + if (is_s20(-local_size)) + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + else + FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size)); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp)))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg) +{ + sljit_s32 offset, i, tmp; + sljit_s32 local_size = compiler->local_size; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + + if (is_u12(local_size)) + FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); + else if (is_s20(local_size)) + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); + else + FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size)); + + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + if (kept_saveds_count == 0) { + FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (8 - kept_saveds_count) * SSIZE_OF(sw); + } + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (kept_saveds_count == 0) { + if (saveds == 0) { + if (last_reg == r14) + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (saveds == 1 && last_reg == r13) { + FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15))); + offset += 2 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > kept_saveds_count) { + if (saveds == kept_saveds_count + 1) { + FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (saveds - kept_saveds_count) * SSIZE_OF(sw); + } + } + } + + if (kept_saveds_count > 0) { + if (last_reg == r14) + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, r14)); + return push_inst(compiler, br(r14)); /* return */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, r13)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_gpr arg0 = gpr(SLJIT_R0); + sljit_gpr arg1 = gpr(SLJIT_R1); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op) | (op & SLJIT_32); + switch (op) { + case SLJIT_BREAKPOINT: + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); + case SLJIT_NOP: + return push_inst(compiler, 0x0700 /* 2-byte nop */); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + break; + case SLJIT_LMUL_SW: + /* signed multiplication from: */ + /* Hacker's Delight, Second Edition: Chapter 8-3. */ + FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0))); + FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0))); + FAIL_IF(push_inst(compiler, ngr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, ngr(tmp1, arg0))); + + /* unsigned multiplication */ + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + + FAIL_IF(push_inst(compiler, sgr(arg0, tmp0))); + FAIL_IF(push_inst(compiler, sgr(arg0, tmp1))); + break; + case SLJIT_DIV_U32: + case SLJIT_DIVMOD_U32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_U32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_S32: + case SLJIT_DIVMOD_S32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_S32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_UW: + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, lghi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_UW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_SW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_SW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + return SLJIT_SUCCESS; + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + /* swap result registers */ + FAIL_IF(push_inst(compiler, lgr(tmp0, arg0))); + FAIL_IF(push_inst(compiler, lgr(arg0, arg1))); + return push_inst(compiler, lgr(arg1, tmp0)); +} + +static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r) +{ + sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ); + + if ((op & SLJIT_32) && src_r != tmp0) { + FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r))); + src_r = tmp0; + } + + if (is_ctz) { + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r))); + + if (src_r == tmp0) + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1))); + else + FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r))); + + src_r = tmp0; + } + + FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r))); + + if (is_ctz) + FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16))); + + if (op & SLJIT_32) { + if (!is_ctz && dst_r != tmp0) + return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16)); + + FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32)); + } + + if (is_ctz) + FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8))); + + if (dst_r == tmp0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0)); +} + +static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + sljit_gpr reg; + sljit_ins ins; + sljit_s32 opcode = GET_OPCODE(op); + sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16); + + if (dst & SLJIT_MEM) { + if (src & SLJIT_MEM) { + FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms)); + reg = tmp0; + } else + reg = gpr(src); + + FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); + + if (is_16bit) + ins = 0xe3000000003f /* strvh */; + else + ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */; + + return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + } + + reg = gpr(dst); + + if (src & SLJIT_MEM) { + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + + if (is_16bit) + ins = 0xe3000000001f /* lrvh */; + else + ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */; + + FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset))); + + if (opcode == SLJIT_REV) + return SLJIT_SUCCESS; + + if (is_16bit) { + if (op & SLJIT_32) + ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */; + else + ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */; + } else + ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */; + + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); + } + + ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */; + FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src)))); + + if (opcode == SLJIT_REV) + return SLJIT_SUCCESS; + + if (!is_16bit) { + ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */; + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); + } + + if (op & SLJIT_32) { + ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */; + return push_inst(compiler, ins | R20A(reg) | 16); + } + + ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */; + return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16)); +} + +/* LEVAL will be defined later with different parameters as needed */ +#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + struct addr mem; + sljit_gpr dst_r; + sljit_gpr src_r; + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { + /* LOAD REGISTER */ + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { + dst_r = gpr(dst); + src_r = gpr(src); + switch (opcode | (op & SLJIT_32)) { + /* 32-bit */ + case SLJIT_MOV32_U8: + ins = llcr(dst_r, src_r); + break; + case SLJIT_MOV32_S8: + ins = lbr(dst_r, src_r); + break; + case SLJIT_MOV32_U16: + ins = llhr(dst_r, src_r); + break; + case SLJIT_MOV32_S16: + ins = lhr(dst_r, src_r); + break; + case SLJIT_MOV32: + if (dst_r == src_r) + return SLJIT_SUCCESS; + ins = lr(dst_r, src_r); + break; + /* 64-bit */ + case SLJIT_MOV_U8: + ins = llgcr(dst_r, src_r); + break; + case SLJIT_MOV_S8: + ins = lgbr(dst_r, src_r); + break; + case SLJIT_MOV_U16: + ins = llghr(dst_r, src_r); + break; + case SLJIT_MOV_S16: + ins = lghr(dst_r, src_r); + break; + case SLJIT_MOV_U32: + ins = llgfr(dst_r, src_r); + break; + case SLJIT_MOV_S32: + ins = lgfr(dst_r, src_r); + break; + case SLJIT_MOV: + case SLJIT_MOV_P: + if (dst_r == src_r) + return SLJIT_SUCCESS; + ins = lgr(dst_r, src_r); + break; + default: + ins = 0; + SLJIT_UNREACHABLE(); + break; + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* LOAD IMMEDIATE */ + if (FAST_IS_REG(dst) && src == SLJIT_IMM) { + switch (opcode) { + case SLJIT_MOV_U8: + srcw = (sljit_sw)((sljit_u8)(srcw)); + break; + case SLJIT_MOV_S8: + srcw = (sljit_sw)((sljit_s8)(srcw)); + break; + case SLJIT_MOV_U16: + srcw = (sljit_sw)((sljit_u16)(srcw)); + break; + case SLJIT_MOV_S16: + srcw = (sljit_sw)((sljit_s16)(srcw)); + break; + case SLJIT_MOV_U32: + srcw = (sljit_sw)((sljit_u32)(srcw)); + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + srcw = (sljit_sw)((sljit_s32)(srcw)); + break; + } + return push_load_imm_inst(compiler, gpr(dst), srcw); + } + /* LOAD */ + /* TODO(carenas): avoid reg being defined later */ + #define LEVAL(i) EVAL(i, reg, mem) + if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) { + sljit_gpr reg = gpr(dst); + + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + /* TODO(carenas): convert all calls below to LEVAL */ + switch (opcode | (op & SLJIT_32)) { + case SLJIT_MOV32_U8: + ins = llc(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S8: + ins = lb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_U16: + ins = llh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S16: + ins = WHEN2(is_u12(mem.offset), lh, lhy); + break; + case SLJIT_MOV32: + ins = WHEN2(is_u12(mem.offset), l, ly); + break; + case SLJIT_MOV_U8: + ins = LEVAL(llgc); + break; + case SLJIT_MOV_S8: + ins = lgb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U16: + ins = LEVAL(llgh); + break; + case SLJIT_MOV_S16: + ins = lgh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U32: + ins = LEVAL(llgf); + break; + case SLJIT_MOV_S32: + ins = lgf(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_P: + case SLJIT_MOV: + ins = lg(reg, mem.offset, mem.index, mem.base); + break; + default: + ins = 0; + SLJIT_UNREACHABLE(); + break; + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* STORE and STORE IMMEDIATE */ + if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) { + struct addr mem; + sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0; + + if (src == SLJIT_IMM) { + /* TODO(mundaym): MOVE IMMEDIATE? */ + FAIL_IF(push_load_imm_inst(compiler, reg, srcw)); + } + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), stc, stcy)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), sth, sthy)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), st, sty)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, LEVAL(stg))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + #undef LEVAL + /* MOVE CHARACTERS */ + if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) { + struct addr mem; + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + FAIL_IF(push_inst(compiler, + EVAL(llgc, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(stcy, tmp0, mem)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(push_inst(compiler, + EVAL(llgh, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sthy, tmp0, mem)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + FAIL_IF(push_inst(compiler, + EVAL(ly, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sty, tmp0, mem)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, + EVAL(lg, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + FAIL_IF(push_inst(compiler, + EVAL(stg, tmp0, mem))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + SLJIT_UNREACHABLE(); + } + + SLJIT_ASSERT(src != SLJIT_IMM); + + dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; + + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + /* TODO(mundaym): optimize loads and stores */ + switch (opcode) { + case SLJIT_CLZ: + case SLJIT_CTZ: + if (src & SLJIT_MEM) + FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32)); + + FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r)); + break; + case SLJIT_REV_U32: + case SLJIT_REV_S32: + op |= SLJIT_32; + /* fallthrough */ + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + return sljit_emit_rev(compiler, op, dst, dstw, src, srcw); + default: + SLJIT_UNREACHABLE(); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int is_commutative(sljit_s32 op) +{ + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return 1; + } + return 0; +} + +static const struct ins_forms add_forms = { + 0x1a00, /* ar */ + 0xb9080000, /* agr */ + 0xb9f80000, /* ark */ + 0xb9e80000, /* agrk */ + 0x5a000000, /* a */ + 0xe3000000005a, /* ay */ + 0xe30000000008, /* ag */ +}; + +static const struct ins_forms logical_add_forms = { + 0x1e00, /* alr */ + 0xb90a0000, /* algr */ + 0xb9fa0000, /* alrk */ + 0xb9ea0000, /* algrk */ + 0x5e000000, /* al */ + 0xe3000000005e, /* aly */ + 0xe3000000000a, /* alg */ +}; + +static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW; + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (src2 == SLJIT_IMM) { + if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_overflow) + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, src2w); + } + + if (is_s16(src2w)) { + if (sets_overflow) + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); + goto done; + } + + if (!sets_overflow) { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(-src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + } + + forms = sets_overflow ? &add_forms : &logical_add_forms; + FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); + +done: + if (sets_zero_overflow) + FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms sub_forms = { + 0x1b00, /* sr */ + 0xb9090000, /* sgr */ + 0xb9f90000, /* srk */ + 0xb9e90000, /* sgrk */ + 0x5b000000, /* s */ + 0xe3000000005b, /* sy */ + 0xe30000000009, /* sg */ +}; + +static const struct ins_forms logical_sub_forms = { + 0x1f00, /* slr */ + 0xb90b0000, /* slgr */ + 0xb9fb0000, /* slrk */ + 0xb9eb0000, /* slgrk */ + 0x5f000000, /* sl */ + 0xe3000000005f, /* sly */ + 0xe3000000000b, /* slg */ +}; + +static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; + + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; + + if (src2 == SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) { + if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + } + else { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + if (is_s16(src2w)) + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); + } + } + else if (src2 & SLJIT_MEM) { + if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); + } + + if (compare_signed) + ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + else + ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); + } + + if (compare_signed) + ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + else + ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); + } + + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + + if (src2 == SLJIT_IMM) { + sljit_sw neg_src2w = -src2w; + + if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { + if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_signed) + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, neg_src2w); + } + + if (is_s16(neg_src2w)) { + if (sets_signed) + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); + goto done; + } + } + + if (!sets_signed) { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(neg_src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_32) || is_s32(neg_src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + + forms = sets_signed ? &sub_forms : &logical_sub_forms; + FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); + +done: + if (sets_signed) { + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { + /* In case of overflow, the sign bit of the two source operands must be different, and + - the first operand is greater if the sign bit of the result is set + - the first operand is less if the sign bit of the result is not set + The -result operation sets the corrent sign, because the result cannot be zero. + The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ + FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2)))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms multiply_forms = { + 0xb2520000, /* msr */ + 0xb90c0000, /* msgr */ + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0x71000000, /* ms */ + 0xe30000000051, /* msy */ + 0xe3000000000c, /* msg */ +}; + +static const struct ins_forms multiply_overflow_forms = { + 0, + 0, + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0, + 0xe30000000053, /* msc */ + 0xe30000000083, /* msgc */ +}; + +static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins; + + if (HAS_FLAGS(op)) { + /* if have_misc2 fails, this operation should be emulated. 32 bit emulation: + FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); + FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); + if (dst_r != tmp0) { + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); + } + FAIL_IF(push_inst(compiler, aih(tmp0, 1))); + FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */ + + return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); + } + + if (src2 == SLJIT_IMM) { + if (is_s16(src2w)) { + ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); + } + + if (is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); + } + } + + return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_uw imm, sljit_s32 count16) +{ + sljit_s32 mode = compiler->mode; + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (IS_GPR_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == SLJIT_AND) { + if (!(mode & SLJIT_32)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + else if (type == SLJIT_OR) { + if (count16 >= 3) { + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + + if (count16 >= 2) { + if ((imm & 0x00000000ffffffffull) == 0) + return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)); + if ((imm & 0xffffffff00000000ull) == 0) + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + + if ((imm & 0xffff000000000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48))); + if ((imm & 0x0000ffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff))); + if ((imm & 0x00000000ffff0000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff))); + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff)); + return SLJIT_SUCCESS; + } + + if ((imm & 0xffffffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32))); + if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) + return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff)); + return SLJIT_SUCCESS; +} + +static const struct ins_forms bitwise_and_forms = { + 0x1400, /* nr */ + 0xb9800000, /* ngr */ + 0xb9f40000, /* nrk */ + 0xb9e40000, /* ngrk */ + 0x54000000, /* n */ + 0xe30000000054, /* ny */ + 0xe30000000080, /* ng */ +}; + +static const struct ins_forms bitwise_or_forms = { + 0x1600, /* or */ + 0xb9810000, /* ogr */ + 0xb9f60000, /* ork */ + 0xb9e60000, /* ogrk */ + 0x56000000, /* o */ + 0xe30000000056, /* oy */ + 0xe30000000081, /* og */ +}; + +static const struct ins_forms bitwise_xor_forms = { + 0x1700, /* xr */ + 0xb9820000, /* xgr */ + 0xb9f70000, /* xrk */ + 0xb9e70000, /* xgrk */ + 0x57000000, /* x */ + 0xe30000000057, /* xy */ + 0xe30000000082, /* xg */ +}; + +static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + const struct ins_forms *forms; + + if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) { + sljit_s32 count16 = 0; + sljit_uw imm = (sljit_uw)src2w; + + if (op & SLJIT_32) + imm &= 0xffffffffull; + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + count16++; + if ((imm & 0x00000000ffff0000ull) != 0) + count16++; + if ((imm & 0x0000ffff00000000ull) != 0) + count16++; + if ((imm & 0xffff000000000000ull) != 0) + count16++; + + if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) { + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm); + if ((imm & 0x00000000ffff0000ull) != 0) + return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16)); + if ((imm & 0x0000ffff00000000ull) != 0) + return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48)); + } + + if (!(op & SLJIT_SET_Z)) + return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16); + } + + if (type == SLJIT_AND) + forms = &bitwise_and_forms; + else if (type == SLJIT_OR) + forms = &bitwise_or_forms; + else + forms = &bitwise_xor_forms; + + return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (src2 != SLJIT_IMM) { + if (FAST_IS_REG(src2)) + base_r = gpr(src2); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; + } + + if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) { + if (base_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16))); + base_r = tmp1; + } else + FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f)); + } + } else + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); + + if ((op & SLJIT_32) && dst_r == src_r) { + if (type == SLJIT_SHL || type == SLJIT_MSHL) + ins = 0x89000000 /* sll */; + else if (type == SLJIT_LSHR || type == SLJIT_MLSHR) + ins = 0x88000000 /* srl */; + else + ins = 0x8a000000 /* sra */; + + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm)); + } else { + if (type == SLJIT_SHL || type == SLJIT_MSHL) + ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + else if (type == SLJIT_LSHR || type == SLJIT_MLSHR) + ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + else + ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16))); + } + + if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) + return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (src2 != SLJIT_IMM) { + if (FAST_IS_REG(src2)) + base_r = gpr(src2); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; + } + } + + if (GET_OPCODE(op) == SLJIT_ROTR) { + if (src2 != SLJIT_IMM) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r))); + base_r = tmp1; + } else + src2w = -src2w; + } + + if (src2 == SLJIT_IMM) + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); + + ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */; + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)); +} + +static const struct ins_forms addc_forms = { + 0xb9980000, /* alcr */ + 0xb9880000, /* alcgr */ + 0, + 0, + 0, + 0xe30000000098, /* alc */ + 0xe30000000088, /* alcg */ +}; + +static const struct ins_forms subc_forms = { + 0xb9990000, /* slbr */ + 0xb9890000, /* slbgr */ + 0, + 0, + 0, + 0xe30000000099, /* slb */ + 0xe30000000089, /* slbg */ +}; + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->mode = op & SLJIT_32; + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) { + src1 ^= src2; + src2 ^= src1; + src1 ^= src2; + + src1w ^= src2w; + src2w ^= src1w; + src1w ^= src2w; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; + case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; + case SLJIT_MUL: + FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_ROTL: + case SLJIT_ROTR: + FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w)); + break; + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w)); + return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_right; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; + sljit_gpr dst_r = gpr(dst_reg); + sljit_gpr src1_r = gpr(src1_reg); + sljit_gpr src2_r = gpr(src2_reg); + sljit_gpr src3_r = tmp1; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + if (src3 == SLJIT_IMM) { + src3w &= bit_length - 1; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (op & SLJIT_32) { + if (dst_r == src1_r) { + ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */; + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w)); + } else { + ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */; + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16))); + } + } else { + ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */; + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16))); + } + + ins = 0xec0000000055 /* risbg */; + + if (is_right) { + src3w = bit_length - src3w; + ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8); + } else + ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8); + + return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r)); + } + + if (!(src3 & SLJIT_MEM)) { + src3_r = gpr(src3); + + if (dst_r == src3_r) { + FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r))); + src3_r = tmp1; + } + } else + FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32)); + + if (op & SLJIT_32) { + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) { + if (src3_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16))); + src3_r = tmp1; + } else + FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f)); + } + + if (dst_r == src1_r) { + ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */; + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r))); + } else { + ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */; + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r))); + } + + if (src3_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f)); + FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r))); + } else + FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f)); + + ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */; + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16))); + + return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0)); + } + + ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */; + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r))); + + ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */; + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + if (src3_r != tmp1) + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f)); + + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16))); + src2_r = tmp0; + + if (src3_r != tmp1) + FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r))); + else + FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f)); + } else + FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r))); + + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1))); + return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r; + struct addr addr; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0)); + + return push_inst(compiler, br(src_r)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + default: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_gpr dst_r = link_r; + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, lgr(gpr(dst), link_r)); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + + size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2); + FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0)); + break; + } + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return (sljit_s32)gpr(reg); + + if (type != SLJIT_FLOAT_REGISTER) + return -1; + + return (sljit_s32)freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size); + return push_inst(compiler, ins); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_LOAD 0 +#define FLOAT_STORE 1 + +static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */; + else + ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */; + + return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */; + else + ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */; + + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins, + sljit_s32 reg, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + + if (!(src & SLJIT_MEM)) + return push_inst(compiler, ins_r | F4(reg) | F0(src)); + + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_ins ins; + + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + /* M3 is set to 5 */ + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */; + else + ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */; + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src))); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64); + + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src == SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src = (sljit_s32)tmp0; + } + else if (src & SLJIT_MEM) { + FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000)); + src = (sljit_s32)tmp0; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; + else + ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) + srcw = (sljit_u32)srcw; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW) + ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */; + else + ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */; + + return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins_r, ins; + + if (src1 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (op & SLJIT_32) { + ins_r = 0xb3090000 /* cebr */; + ins = 0xed0000000009 /* ceb */; + } else { + ins_r = 0xb3190000 /* cdbr */; + ins = 0xed0000000019 /* cdb */; + } + + return emit_float(compiler, ins_r, ins, src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + sljit_ins ins; + + CHECK_ERROR(); + + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (op == SLJIT_CONV_F64_FROM_F32) + FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw)); + else { + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (FAST_IS_REG(dst)) { + if (dst == src) + return SLJIT_SUCCESS; + + ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + break; + } + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw); + case SLJIT_CONV_F64_FROM_F32: + /* Only SLJIT_CONV_F32_FROM_F64. */ + ins = 0xb3440000 /* ledbr */; + break; + case SLJIT_NEG_F64: + ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64); + ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */; + break; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); + } + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +#define FLOAT_MOV(op, dst_r, src_r) \ + (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r)) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r = TMP_FREG1; + sljit_ins ins_r, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + do { + if (FAST_IS_REG(dst)) { + dst_r = dst; + + if (dst == src1) + break; + + if (dst == src2) { + if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) { + src2 = src1; + src2w = src1w; + src1 = dst; + break; + } + + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2))); + src2 = TMP_FREG1; + } + } + + if (src1 & SLJIT_MEM) + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1))); + } while (0); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */; + ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */; + break; + case SLJIT_SUB_F64: + ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */; + ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */; + break; + case SLJIT_MUL_F64: + ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */; + ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64); + ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */; + ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */; + break; + } + + FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w)); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w)); + src2 = TMP_FREG1; + } + + if (src1 & SLJIT_MEM) { + reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg; + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w)); + src1 = reg; + } + + return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32)))); + return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm)); + return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_gpr gen_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + gen_r = gpr(reg); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) { + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16))); + gen_r = tmp0; + } + + return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r)); + } + + FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg))); + + if (!(op & SLJIT_32)) + return SLJIT_SUCCESS; + + return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + /* record jump */ + jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->addr = compiler->size; + + /* emit jump instruction */ + type &= 0xff; + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0))); + else + PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0))); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, r14)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src == SLJIT_IMM) { + SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ + FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); + } + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + } + + /* emit jump instruction */ + if (type >= SLJIT_FAST_CALL) + return push_inst(compiler, basr(link_r, src_r)); + + return push_inst(compiler, br(src_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(gpr(TMP_REG2) == tmp1); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + srcw = 0; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, r14)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr loc_r = tmp1; + sljit_u8 mask = get_cc(compiler, type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + compiler->status_flags_state = op & SLJIT_SET_Z; + + /* dst is also source operand */ + if (dst & SLJIT_MEM) + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); + + break; + case SLJIT_MOV32: + op |= SLJIT_32; + /* fallthrough */ + case SLJIT_MOV: + /* can write straight into destination */ + loc_r = dst_r; + break; + default: + SLJIT_UNREACHABLE(); + } + + /* TODO(mundaym): fold into cmov helper function? */ + #define LEVAL(i) i(loc_r, 1, mask) + if (have_lscond2()) { + FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, lochi, locghi))); + } else { + FAIL_IF(push_load_imm_inst(compiler, loc_r, 1)); + FAIL_IF(push_inst(compiler, brc(mask, 2 + 2))); + FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); + } + #undef LEVAL + + /* apply bitwise op and set condition codes */ + switch (GET_OPCODE(op)) { + #define LEVAL(i) i(dst_r, loc_r) + case SLJIT_AND: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, nr, ngr))); + break; + case SLJIT_OR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, or, ogr))); + break; + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, xr, xgr))); + break; + #undef LEVAL + } + + /* store result to memory if required */ + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_ins mask; + sljit_gpr src_r; + sljit_gpr dst_r = gpr(dst_reg); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_reg != src2_reg) { + if (src1 == dst_reg) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else { + if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32)); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg)))); + } + } + + mask = get_cc(compiler, type & ~SLJIT_32); + + if (src1 & SLJIT_MEM) { + if (src1 & OFFS_REG_MASK) { + src_r = gpr(OFFS_REG(src1)); + + if (src1w != 0) { + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16))); + src_r = tmp1; + } + + FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK)))); + src_r = tmp1; + src1w = 0; + } else if (!is_s20(src1w)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w)); + + if (src1 & REG_MASK) + FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK)))); + + src_r = tmp1; + src1w = 0; + } else + src_r = gpr(src1 & REG_MASK); + + ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */; + return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w)); + } + + if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + + if (have_lscond2() && is_s16(src1w)) { + ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */; + return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16); + } + + FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w)); + src_r = tmp1; + } else + src_r = gpr(src1); + + ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */; + return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins ins; + struct sljit_label *label; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else { + ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg))); + } + } + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + if (!(src1 & SLJIT_MEM)) { + ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1))); + } else + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w)); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_ins ins, reg1, reg2, base, offs = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + base = gpr(mem & REG_MASK); + reg1 = gpr(REG_PAIR_FIRST(reg)); + reg2 = gpr(REG_PAIR_SECOND(reg)); + + if (mem & OFFS_REG_MASK) { + memw &= 0x3; + offs = gpr(OFFS_REG(mem)); + + if (memw != 0) { + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16))); + offs = tmp1; + } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) { + FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs))); + base = tmp1; + offs = 0; + } + + memw = 0; + } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, memw)); + + if (base == 0) + base = tmp1; + else + offs = tmp1; + + memw = 0; + } + + if (offs == 0 && reg2 == (reg1 + 1)) { + ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */; + return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw)); + } + + ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base); + + if (!(type & SLJIT_MEM_STORE) && base == reg1) { + FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)))); + return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)); + } + + FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw))); + return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + struct addr addr; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = F36(srcdst) | F32(freg); + else + ins = F36(freg) | F32(srcdst); + + return push_inst(compiler, 0xe70000000056 /* vlr */ | ins); + } + + FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + + if (alignment >= 4) + ins |= 4 << 12; + else if (alignment == 3) + ins |= 3 << 12; + + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + struct addr addr; + sljit_gpr reg; + sljit_sw sign_ext; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg) + | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)); + + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12)); + } + + if (src == SLJIT_IMM) { + sign_ext = 0x10000; + + switch (elem_size) { + case 0: + srcw &= 0xff; + sign_ext = (sljit_s8)srcw; + break; + case 1: + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + break; + case 2: + if ((sljit_s32)srcw == (sljit_s16)srcw) { + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + } else + srcw &= 0xffffffff; + break; + default: + if (srcw == (sljit_s16)srcw) { + srcw &= 0xffff; + sign_ext = (sljit_s16)srcw; + } + break; + } + + if (sign_ext != 0x10000) { + if (sign_ext == 0 || sign_ext == -1) + return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg) + | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16))); + + return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg) + | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12)); + } + + push_load_imm_inst(compiler, tmp0, srcw); + reg = tmp0; + } else + reg = gpr(src); + + FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12))); + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + struct addr addr; + sljit_gpr reg; + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (srcdst & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + } + + if (type & SLJIT_SIMD_LANE_ZERO) { + if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1)) + return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12)); + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg))); + } + + if (srcdst & SLJIT_MEM) { + switch (elem_size) { + case 0: + ins |= 0xe70000000000 /* vleb */; + break; + case 1: + ins |= 0xe70000000001 /* vleh */; + break; + case 2: + ins |= 0xe70000000003 /* vlef */; + break; + default: + ins |= 0xe70000000002 /* vleg */; + break; + } + + /* Convert to vsteb - vsteg */ + if (type & SLJIT_SIMD_STORE) + ins |= 0x8; + + return push_inst(compiler, ins | ((sljit_ins)lane_index << 12)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (type & SLJIT_SIMD_STORE) + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12)); + + if (elem_size == 3) { + if (lane_index == 0) + ins = F32(srcdst) | F28(freg) | (1 << 12); + else + ins = F32(freg) | F28(srcdst); + + return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins); + } + + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12))); + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12)); + } + + if (srcdst == SLJIT_IMM) { + switch (elem_size) { + case 0: + ins = 0xe70000000040 /* vleib */; + srcdstw &= 0xff; + break; + case 1: + ins = 0xe70000000041 /* vleih */; + srcdstw &= 0xffff; + break; + case 2: + if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) { + srcdstw &= 0xffff; + ins = 0xe70000000043 /* vleif */; + } else + srcdstw &= 0xffffffff; + break; + default: + if (srcdstw == (sljit_s16)srcdstw) { + srcdstw &= 0xffff; + ins = 0xe70000000042 /* vleig */; + } + break; + } + + if (ins != 0) + return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12)); + + push_load_imm_inst(compiler, tmp0, srcdstw); + reg = tmp0; + } else + reg = gpr(srcdst); + + ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12); + + if (!(type & SLJIT_SIMD_STORE)) + return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins); + + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins)); + + if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + ins = 0xb9060000 /* lgbr */; + break; + case 1: + ins = 0xb9070000 /* lghr */; + break; + default: + ins = 0xb9140000 /* lgfr */; + break; + } + + return push_inst(compiler, ins | R4A(reg) | R0A(reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) + | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + struct addr addr; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset); + + switch (elem2_size - elem_size) { + case 1: + ins |= 0xe70000000002 /* vleg */; + break; + case 2: + ins |= 0xe70000000003 /* vlef */; + break; + default: + ins |= 0xe70000000001 /* vleh */; + break; + } + + FAIL_IF(push_inst(compiler, ins)); + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12))); + FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12))); + return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12)); + } + + ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg); + + do { + FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_gpr dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078); + push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038); + FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0))); + break; + case 1: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070); + break; + case 2: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060); + break; + default: + push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040); + break; + } + + if (elem_size != 0) + FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12))); + + FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1))); + + dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1) + | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16)))); + + if (dst_r == tmp0) + return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = 0xe70000000068 /* vn */; + break; + case SLJIT_SIMD_OP2_OR: + ins = 0xe7000000006a /* vo */; + break; + case SLJIT_SIMD_OP2_XOR: + ins = 0xe7000000006d /* vx */; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins mask; + sljit_gpr tmp_r = gpr(temp_reg); + sljit_gpr mem_r = gpr(mem_reg); + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r)); + case SLJIT_MOV_U8: + mask = 0xff; + break; + case SLJIT_MOV_U16: + mask = 0xffff; + break; + default: + return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r)); + } + + /* tmp0 = (src_reg ^ tmp_r) & mask */ + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask)); + FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r))); + FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc)); + FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1))); + + /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */ + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10))); + FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r))); + FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8))); + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1))); + + /* Already computed: tmp_r = mem_r & ~0x3 */ + + FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r))); + FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1))); + return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r)); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +/* On s390x we build a literal pool to hold constants. This has two main + advantages: + + 1. we only need one instruction in the instruction stream (LGRL) + 2. we can store 64 bit addresses and use 32 bit offsets + + To retrofit the extra information needed to build the literal pool we + add a new sljit_s390x_const struct that contains the initial value but + can still be cast to a sljit_const. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_s390x_const *const_; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + + const_ = (struct sljit_s390x_const*)ensure_abuf(compiler, + sizeof(struct sljit_s390x_const)); + PTR_FAIL_IF(!const_); + set_const((struct sljit_const*)const_, compiler); + const_->init_value = init_value; + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */)); + + return (struct sljit_const*)const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + /* Update the constant pool. */ + sljit_uw *ptr = (sljit_uw *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + *ptr = new_target; + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + SLJIT_CACHE_FLUSH(ptr, ptr + 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); + + return jump; +} + +/* TODO(carenas): EVAL probably should move up or be refactored */ +#undef WHEN2 +#undef EVAL + +#undef tmp1 +#undef tmp0 + +/* TODO(carenas): undef other macros that spill like is_u12? */ diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c new file mode 100644 index 0000000..59ea04a --- /dev/null +++ b/src/sljit/sljitNativeX86_32.c @@ -0,0 +1,1685 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 32-bit arch dependent functions. */ + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(1 + sizeof(sljit_sw)); + *inst++ = opcode; + sljit_unaligned_store_sw(inst, imm); + return SLJIT_SUCCESS; +} + +/* Size contains the flags as well. */ +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 reg_map_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2)); + SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT); + + size &= 0xf; + /* The mod r/m byte is always present. */ + inst_size = size + 1; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + inst_size++; + + /* Calculate size of b. */ + if (b & SLJIT_MEM) { + if (!(b & REG_MASK)) + inst_size += sizeof(sljit_sw); + else { + if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_sw); + } else if (reg_map[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) + inst_size += 1; /* SIB byte. */ + } + } + + /* Calculate size of a. */ + if (a == SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } else if (flags & EX86_SHIFT_INS) { + SLJIT_ASSERT(imma <= 0x1f); + if (imma != 1) { + inst_size++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_sw); + } else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + else if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + else if (flags & EX86_PREF_66) + *inst++ = 0x66; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && a == SLJIT_IMM) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a == SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_map[a] << 3); + else + *buf_ptr = U8(freg_map[a] << 3); + } else { + if (a == SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b])); + buf_ptr++; + } else if (b & REG_MASK) { + reg_map_b = reg_map[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_map_b; + else { + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); + buf_ptr += 2; + } + + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + } + } else { + if (reg_map_b == 5) + *buf_ptr |= 0x40; + + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr += 2; + + if (reg_map_b == 5) + *buf_ptr++ = 0; + } + } else { + *buf_ptr++ |= 0x05; + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + + if (a == SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_sw(buf_ptr, imma); + } + + return inst; +} + +static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op, + /* The first and second register operand. */ + sljit_s32 a, sljit_s32 v, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 vex = 0; + sljit_u8 vex_m = 0; + sljit_uw size; + + SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + + if (op & VEX_OP_0F38) + vex_m = 0x2; + else if (op & VEX_OP_0F3A) + vex_m = 0x3; + + if (op & VEX_W) { + if (vex_m == 0) + vex_m = 0x1; + + vex |= 0x80; + } + + if (op & EX86_PREF_66) + vex |= 0x1; + else if (op & EX86_PREF_F2) + vex |= 0x3; + else if (op & EX86_PREF_F3) + vex |= 0x2; + + op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3); + + if (op & VEX_256) + vex |= 0x4; + + vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3)); + + size = op & ~(sljit_uw)0xff; + size |= (vex_m == 0) ? 3 : 4; + + inst = emit_x86_instruction(compiler, size, a, 0, b, immb); + FAIL_IF(!inst); + + if (vex_m == 0) { + inst[0] = 0xc5; + inst[1] = U8(vex | 0x80); + inst[2] = U8(op); + return SLJIT_SUCCESS; + } + + inst[0] = 0xc4; + inst[1] = U8(vex_m | 0xe0); + inst[2] = vex; + inst[3] = U8(op); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + } else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + } else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + } + + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_ADDR) + sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + else + jump->flags |= PATCH_MW; + code_ptr += 4; + + return code_ptr; +} + +#define ENTER_TMP_TO_R4 0x00001 +#define ENTER_TMP_TO_S 0x00002 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 size, args_size, types, status; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_u8 *inst; +#ifdef _WIN32 + sljit_s32 r2_offset = -1; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR32 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + status = 0; + + if (options & SLJIT_ENTER_REG_ARG) { + args_size = 3 * SSIZE_OF(sw); + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + if (word_arg_count >= 4) + status |= ENTER_TMP_TO_R4; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = 0; + } else { + types = arg_types; + saved_arg_count = 0; + float_arg_count = 0; + args_size = SSIZE_OF(sw); + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) + saved_arg_count++; + + if (word_arg_count == 4) { + if (types & SLJIT_ARG_TYPE_SCRATCH_REG) { + status |= ENTER_TMP_TO_R4; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } else if (saved_arg_count == 4) { + status |= ENTER_TMP_TO_S; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } + } + + args_size += SSIZE_OF(sw); + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + args_size -= SSIZE_OF(sw); + compiler->args_size = args_size; + } + + size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count; + if (!(options & SLJIT_ENTER_REG_ARG)) + size++; + + if (size != 0) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); + FAIL_IF(!inst); + + INC_SIZE((sljit_uw)size); + + if (!(options & SLJIT_ENTER_REG_ARG)) + PUSH_REG(reg_map[TMP_REG1]); + + if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + + size *= SSIZE_OF(sw); + } + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); + + size += SSIZE_OF(sw); + + local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size; + compiler->local_size = local_size; + + word_arg_count = 0; + saved_arg_count = 0; + args_size = size; + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)))); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { +#ifdef _WIN32 + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else +#endif + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + saved_arg_count++; + } + + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + +#ifdef _WIN32 + SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096); + + if (local_size > 4096) { + if (local_size <= 4 * 4096) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + + if (local_size > 2 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + if (options & SLJIT_ENTER_REG_ARG) { + SLJIT_ASSERT(r2_offset == -1); + + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1)); + FAIL_IF(!inst); + INC_SIZE(1); + PUSH_REG(reg_map[SLJIT_R2]); + + local_size -= SSIZE_OF(sw); + r2_offset = local_size; + } + + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); + + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = LOOP_i8; + inst[1] = (sljit_u8)-16; + local_size &= 0xfff; + } + } + + if (local_size > 0) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + } + + if (r2_offset != -1) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + +#else /* !_WIN32 */ + + SLJIT_ASSERT(local_size > 0); + + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#endif /* _WIN32 */ + + size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw); + kept_saveds_count = SLJIT_R3 - kept_saveds_count; + + while (saved_arg_count > 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0); + kept_saveds_count++; + size -= SSIZE_OF(sw); + saved_arg_count--; + } + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { + if (status & ENTER_TMP_TO_R4) + size = 2 * SSIZE_OF(sw); + + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + arg_types >>= SLJIT_ARG_SHIFT; + args_size = 0; + + if (!(options & SLJIT_ENTER_REG_ARG)) { + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + } + + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and for function calls. */ + + saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw); + + /* Saving ebp. */ + if (!(options & SLJIT_ENTER_REG_ARG)) + saveds += SSIZE_OF(sw); + + compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + sljit_s32 local_size, saveds; + sljit_uw size; + sljit_u8 *inst; + + size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count); + + local_size = compiler->local_size; + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + size++; + else if (is_return_to && size == 0) { + local_size += SSIZE_OF(sw); + is_return_to = 0; + } + + if (local_size > 0) + BINARY_IMM32(ADD, local_size, SLJIT_SP, 0); + + if (size == 0) + return SLJIT_SUCCESS; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + saveds = compiler->saveds; + + if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) + POP_REG(reg_map[SLJIT_S0]); + if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) + POP_REG(reg_map[SLJIT_S1]); + if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S2]); + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + POP_REG(reg_map[TMP_REG1]); + + if (is_return_to) + BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + return emit_byte(compiler, RET_near); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1; + + EMIT_MOV(compiler, src_r, 0, src, srcw); + src = src_r; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_sw stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + stack_size += SSIZE_OF(sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + if (stack_size <= 4 * SSIZE_OF(sw)) + return 0; + + return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1) +{ + sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset; + sljit_u8 *inst; + + if (word_arg_count >= 4) { + arg4_reg = SLJIT_R0; + + if (!keep_tmp1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + arg4_reg = TMP_REG1; + } + } + + if (stack_size > 0) + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); + + arg_offset = 0; + word_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count)); + arg_offset += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count)); + arg_offset += SSIZE_OF(f32); + break; + default: + word_arg_count++; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0); + + if (word_arg_count == 1 && arg4_reg == SLJIT_R0) + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size); + + arg_offset += SSIZE_OF(sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size) +{ + sljit_u8 *inst; + sljit_s32 single; + + if (stack_size > 0) + BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0); + + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + return SLJIT_SUCCESS; + + single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = single ? FSTPS : FSTPD; + inst[1] = (0x03 << 3) | 0x04; + inst[2] = (0x04 << 3) | reg_map[SLJIT_SP]; + + return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); +} + +static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, + sljit_s32 *extra_space, sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw args_size, saved_regs_size; + sljit_sw types, word_arg_count, float_arg_count; + sljit_sw stack_size, prev_stack_size, min_size, offset; + sljit_sw word_arg4_offset; + sljit_u8 r2_offset = 0; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + sljit_u8* inst; + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw); + + word_arg_count = 0; + float_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + types = 0; + args_size = 0; + + while (arg_types != 0) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + float_arg_count++; + break; + default: + word_arg_count++; + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (args_size <= compiler->args_size) { + *extra_space = 0; + stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; + + offset = stack_size + compiler->local_size; + + if (src != SLJIT_IMM && src != SLJIT_R0) { + if (word_arg_count >= 1) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + r2_offset = sizeof(sljit_sw); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + break; + case 4: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + return emit_stack_frame_release(compiler, 0); + } + + stack_size = args_size + SSIZE_OF(sw); + + if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) { + r2_offset = SSIZE_OF(sw); + stack_size += SSIZE_OF(sw); + } + + if (word_arg_count >= 3) + stack_size += SSIZE_OF(sw); + + prev_stack_size = SSIZE_OF(sw) + saved_regs_size; + min_size = prev_stack_size + compiler->local_size; + + word_arg4_offset = 2 * SSIZE_OF(sw); + + if (stack_size > min_size) { + BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0); + if (src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size - min_size; + word_arg4_offset += stack_size - min_size; + } + else + stack_size = min_size; + + if (word_arg_count >= 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); + + if (word_arg_count >= 4) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); + } + + if (src != SLJIT_IMM && src != SLJIT_R0) { + if (word_arg_count >= 1) { + SLJIT_ASSERT(r2_offset == sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + /* Restore saved registers. */ + offset = stack_size - 2 * SSIZE_OF(sw); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + + if (compiler->saveds > 2 || compiler->scratches > 9) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + /* Copy fourth argument and return address. */ + offset = stack_size - SSIZE_OF(sw); + *extra_space = args_size; + + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(offset >= 0); + + if (offset == 0) + return SLJIT_SUCCESS; + + BINARY_IMM32(ADD, offset, SLJIT_SP, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) +{ + /* Called when stack consumption cannot be reduced to 0. */ + sljit_u8 *inst; + + BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); + return emit_byte(compiler, RET_near); +} + +static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 word_arg_count = 0; + sljit_s32 kept_saveds_count, offset; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + word_arg_count++; + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count < 4) + return SLJIT_SUCCESS; + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + + kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + offset = compiler->local_size + 3 * SSIZE_OF(sw); + + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) + offset += SSIZE_OF(sw); + + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_sw stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types)); + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); + } + + stack_size = type; + PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0)); + + SLJIT_SKIP_CHECKS(compiler); + + if (stack_size == 0) + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size)); + return jump; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); + } + + stack_size = call_get_stack_size(arg_types, &word_arg_count); + PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types)); + + if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + } + + stack_size = type; + FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); + + if (src != SLJIT_IMM) { + src = SLJIT_R0; + srcw = 0; + } + + SLJIT_SKIP_CHECKS(compiler); + + if (stack_size == 0) + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + return emit_tail_call_end(compiler, stack_size); + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src & SLJIT_MEM) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + stack_size = call_get_stack_size(arg_types, &word_arg_count); + FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1)); + + if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return post_call_with_args(compiler, arg_types, stack_size); +} + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src)); + + src = SLJIT_MEM1(SLJIT_SP); + srcw = 0; + } else { + ADJUST_LOCAL_OFFSET(src, srcw); + } + + inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw); + *inst = (op & SLJIT_32) ? FLDS : FLDL; + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + /* Unused dest is possible here. */ + if (FAST_IS_REG(dst)) + return emit_byte(compiler, U8(POP_r + reg_map[dst])); + + /* Memory. */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (FAST_IS_REG(src)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_map[src]); + } + else { + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_FF; + inst[1] |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 options = compiler->options; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; + + saveds = ((scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw); + + /* Saving ebp. */ + if (!(options & SLJIT_ENTER_REG_ARG)) + saveds += SSIZE_OF(sw); + + return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saveds); +} + +/* --------------------------------------------------------------------- */ +/* Other operations */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + sljit_s32 dst = dst_reg; + sljit_sw dstw = 0; + sljit_sw src2w = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); + + type &= ~SLJIT_32; + + if (dst & SLJIT_MEM) { + if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); + + dst_reg = TMP_REG1; + } else { + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = src2w; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w); + } + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && (src1 != SLJIT_IMM || dst_reg != TMP_REG1)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + + FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w)); + } else + FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w)); + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u8* inst; + sljit_s32 i, next, reg_idx, offset; + sljit_u8 regs[2]; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + regs[0] = U8(REG_PAIR_FIRST(reg)); + regs[1] = U8(REG_PAIR_SECOND(reg)); + + next = SSIZE_OF(sw); + + if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) { + if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) { + /* None of them are virtual register so TMP_REG1 will not be used. */ + EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0); + + if (regs[1] == OFFS_REG(mem)) + next = -SSIZE_OF(sw); + + mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } else { + next = -SSIZE_OF(sw); + + if (!(mem & OFFS_REG_MASK)) + memw += SSIZE_OF(sw); + } + } + + for (i = 0; i < 2; i++) { + reg_idx = next > 0 ? i : (i ^ 0x1); + reg = regs[reg_idx]; + + offset = -1; + + if (reg >= SLJIT_R3 && reg <= SLJIT_S3) { + offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw); + reg = TMP_REG1; + + if (type & SLJIT_MEM_STORE) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4)); + FAIL_IF(!inst); + + INC_SIZE(4); + + inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm; + inst[1] = 0x44 | U8(reg_map[reg] << 3); + inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK]; + inst[3] = sizeof(sljit_sw); + } else if (type & SLJIT_MEM_STORE) { + EMIT_MOV(compiler, mem, memw, reg, 0); + } else { + EMIT_MOV(compiler, reg, 0, mem, memw); + } + + if (!(mem & OFFS_REG_MASK)) + memw += next; + + if (!(type & SLJIT_MEM_STORE) && offset != -1) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst, *jump_inst1, *jump_inst2; + sljit_uw size1, size2; + + /* Binary representation of 0x80000000. */ + static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (!(op & SLJIT_32)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= ROL; + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10); + + size1 = compiler->size; + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit)); + + inst[1] = U8(compiler->size - size1); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (!FAST_IS_REG(src)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + } + + BINARY_IMM32(CMP, 0, src, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JL_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + jump_inst2 = inst; + + size2 = compiler->size; + + jump_inst1[1] = U8(size2 - size1); + + if (src != TMP_REG1) + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JNC_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + BINARY_IMM32(OR, 1, TMP_REG1, 0); + jump_inst1[1] = U8(compiler->size - size1); + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0)); + + jump_inst2[1] = U8(compiler->size - size2); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + sljit_u8 *inst; + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + inst[0] = GROUP_66; + inst[1] = GROUP_0F; + + if (u.imm == 0) { + inst[2] = PXOR_x_xm; + inst[3] = U8(freg_map[freg] | (freg_map[freg] << 3) | MOD_REG); + } else { + inst[2] = MOVD_x_rm; + inst[3] = U8(reg_map[TMP_REG1] | (freg_map[freg] << 3) | MOD_REG); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + sljit_u8 *inst; + union { + sljit_s32 imm[2]; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm[0] == 0) { + if (u.imm[1] == 0) + return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); + } else { + SLJIT_ASSERT(cpu_feature_list != 0); + + if (!(cpu_feature_list & CPU_FEATURE_SSE41) && u.imm[1] != 0 && u.imm[0] != u.imm[1]) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, u.imm[0]); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, u.imm[1]); + + return emit_groupf(compiler, MOVLPD_x_m | EX86_SSE2, freg, SLJIT_MEM1(SLJIT_SP), 0); + } + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]); + } + + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + + if (u.imm[1] == 0) + return SLJIT_SUCCESS; + + if (u.imm[0] == 0) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + inst[0] = GROUP_0F; + inst[1] = SHUFPS_x_xm; + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); + inst[3] = 0x51; + return SLJIT_SUCCESS; + } + + if (u.imm[0] != u.imm[1]) { + SLJIT_ASSERT(cpu_feature_list & CPU_FEATURE_SSE41); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]); + + FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0)); + return emit_byte(compiler, 1); + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + inst[0] = GROUP_0F; + inst[1] = UNPCKLPS_x_xm; + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 reg2; + sljit_sw regw, reg2w; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + regw = 0; + reg2 = 0; + reg2w = 0; + + SLJIT_ASSERT(cpu_feature_list != 0); + + if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) { + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_FIRST(reg); + reg = REG_PAIR_SECOND(reg); + + CHECK_EXTRA_REGS(reg, regw, (void)0); + + FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw)); + } else + reg2 = reg; + + CHECK_EXTRA_REGS(reg2, reg2w, (void)0); + + FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8) + | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w)); + return emit_byte(compiler, 1); + } + + if (reg & REG_PAIR_MASK) { + reg2 = REG_PAIR_SECOND(reg); + reg = REG_PAIR_FIRST(reg); + + if (reg == reg2) + reg = 0; + + CHECK_EXTRA_REGS(reg2, reg2w, (void)0); + } + + CHECK_EXTRA_REGS(reg, regw, (void)0); + + if (op & SLJIT_32) + return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw); + + if (op == SLJIT_COPY_FROM_F64) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + + inst[0] = GROUP_66; + inst[1] = GROUP_0F; + inst[2] = PSHUFD_x_xm; + inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg_map[freg]); + inst[4] = 1; + } else if (reg != 0) + FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); + + if (reg2 != 0) + FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x) + | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + inst[0] = GROUP_0F; + inst[1] = UNPCKLPS_x_xm; + inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[reg == 0 ? freg : TMP_FREG]); + } else + FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw)); + + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_sw size; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack()) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + size = compiler->local_size; + size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); +} diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c new file mode 100644 index 0000000..1ab7929 --- /dev/null +++ b/src/sljit/sljitNativeX86_64.c @@ -0,0 +1,1398 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 64-bit arch dependent functions. */ + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(2 + sizeof(sljit_sw)); + inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + inst[1] = U8(MOV_r_i32 | reg_lmap[reg]); + sljit_unaligned_store_sw(inst + 2, imm); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + sljit_unaligned_store_s32(inst, (sljit_s32)imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_u8 reg_lmap_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2)); + SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT); + + size &= 0xf; + /* The mod r/m byte is always present. */ + inst_size = size + 1; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + inst_size++; + + /* Calculate size of b. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; + } + + if (!(b & REG_MASK)) + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ + else { + if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_s32); + } else if (reg_lmap[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } + } + } else if (!(flags & EX86_SSE2_OP2)) { + if (reg_map[b] >= 8) + rex |= REX_B; + } else if (freg_map[b] >= 8) + rex |= REX_B; + + if ((flags & EX86_VEX_EXT) && (rex & 0x3)) { + SLJIT_ASSERT(size == 2); + size++; + inst_size++; + } + + if (a == SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } else if (flags & EX86_SHIFT_INS) { + SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f)); + if (imma != 1) { + inst_size++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_s32); + } else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ + if (!(flags & EX86_SSE2_OP1)) { + if (reg_map[a] >= 8) + rex |= REX_R; + } + else if (freg_map[a] >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding prefixes. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + else if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + else if (flags & EX86_PREF_66) + *inst++ = 0x66; + + /* Rex is always the last prefix. */ + if (rex) + *inst++ = rex; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && a == SLJIT_IMM) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a == SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_lmap[a] << 3); + else + *buf_ptr = U8(freg_lmap[a] << 3); + } else { + if (a == SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b])); + buf_ptr++; + } else if (b & REG_MASK) { + reg_lmap_b = reg_lmap[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_lmap_b; + else { + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + buf_ptr += 2; + } + + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + } + } else { + if (reg_lmap_b == 5) + *buf_ptr |= 0x40; + + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr += 2; + + if (reg_lmap_b == 5) + *buf_ptr++ = 0; + } + } else { + buf_ptr[0] |= 0x04; + buf_ptr[1] = 0x25; + buf_ptr += 2; + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + + if (a == SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); + } + + return inst; +} + +static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op, + /* The first and second register operand. */ + sljit_s32 a, sljit_s32 v, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 vex = 0; + sljit_u8 vex_m = 0; + sljit_uw size; + + SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + + op |= EX86_REX; + + if (op & VEX_OP_0F38) + vex_m = 0x2; + else if (op & VEX_OP_0F3A) + vex_m = 0x3; + + if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) { + if (vex_m == 0) + vex_m = 0x1; + + vex |= 0x80; + } + + if (op & EX86_PREF_66) + vex |= 0x1; + else if (op & EX86_PREF_F2) + vex |= 0x3; + else if (op & EX86_PREF_F3) + vex |= 0x2; + + op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3); + + if (op & VEX_256) + vex |= 0x4; + + vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3)); + + size = op & ~(sljit_uw)0xff; + size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3; + + inst = emit_x86_instruction(compiler, size, a, 0, b, immb); + FAIL_IF(!inst); + + SLJIT_ASSERT((inst[-1] & 0xf0) == REX); + + /* If X or B is present in REX prefix. */ + if (vex_m == 0 && inst[-1] & 0x3) + vex_m = 0x1; + + if (vex_m == 0) { + vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7); + + inst[-1] = 0xc5; + inst[0] = vex; + inst[1] = U8(op); + return SLJIT_SUCCESS; + } + + vex_m |= U8((inst[-1] ^ 0x7) << 5); + inst[-1] = 0xc4; + inst[0] = vex_m; + inst[1] = vex; + inst[2] = U8(op); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff); + + /* The relative jump below specialized for this case. */ + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG); + + if (type < SLJIT_JUMP) { + /* Invert type. */ + code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10); + code_ptr[1] = short_addr ? (6 + 3) : (10 + 3); + code_ptr += 2; + } + + code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B); + code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr += 2; + jump->addr = (sljit_uw)code_ptr; + + if (!(jump->flags & JUMP_ADDR)) + jump->flags |= PATCH_MD; + else if (short_addr) + sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target); + + code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); + + code_ptr[0] = REX_B; + code_ptr[1] = GROUP_FF; + code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + + return code_ptr + 3; +} + +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +{ + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size; + + if (addr > 0xffffffffl) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7); + code_ptr -= SSIZE_OF(s32) - 1; + + SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32); + + code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2)); + code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5); + code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m; + + jump->flags |= PATCH_MW; + return code_ptr; + } + + jump->flags |= PATCH_MD; + return code_ptr; + } + + code_ptr -= 2 + sizeof(sljit_uw); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + + if ((code_ptr[0] & 0x07) != 0) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6); + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } else { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5); + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); + } + + return code_ptr; +} + +#ifdef _WIN64 +typedef struct { + sljit_sw regs[2]; +} sljit_sse2_reg; +#endif /* _WIN64 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw size; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_s32 saved_regs_size, tmp, i; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; + sljit_s32 saved_float_regs_offset = 0; + sljit_s32 float_arg_count = 0; +#endif /* _WIN64 */ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + /* Emit ENDBR64 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + compiler->mode32 = 0; + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); + + if (saved_float_regs_size > 0) { + saved_float_regs_offset = ((local_size + 0xf) & ~0xf); + local_size = saved_float_regs_offset + saved_float_regs_size; + } +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + tmp = 0; +#ifndef _WIN64 + switch (word_arg_count) { + case 0: + tmp = SLJIT_R2; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = TMP_REG1; + break; + default: + tmp = SLJIT_R3; + break; + } +#else /* !_WIN64 */ + switch (word_arg_count + float_arg_count) { + case 0: + tmp = SLJIT_R3; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = SLJIT_R2; + break; + default: + tmp = TMP_REG1; + break; + } +#endif /* _WIN64 */ + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (tmp != SLJIT_R0 + word_arg_count) + EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0); + saved_arg_count++; + } + word_arg_count++; + } else { +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + float_arg_count++; + if (float_arg_count != float_arg_count + word_arg_count) + FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32, + float_arg_count, float_arg_count + word_arg_count, 0)); +#endif /* _WIN64 */ + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + compiler->local_size = local_size; + +#ifdef _WIN64 + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12); + + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + BINARY_IMM32(SUB, 1, TMP_REG1, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_u8)-21; + local_size &= 0xfff; + } + + if (local_size > 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif /* _WIN64 */ + + if (local_size > 0) + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#ifdef _WIN64 + if (saved_float_regs_size > 0) { + compiler->mode32 = 1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); + saved_float_regs_offset += 16; + } + } +#endif /* _WIN64 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; +#endif /* _WIN64 */ + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); + + if (saved_float_regs_size > 0) + local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size; +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_uw size; + sljit_s32 local_size, i, tmp; + sljit_u8 *inst; +#ifdef _WIN64 + sljit_s32 saved_float_regs_offset; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 fsaveds = compiler->fsaveds; +#endif /* _WIN64 */ + +#ifdef _WIN64 + saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); + + if (saved_float_regs_offset > 0) { + compiler->mode32 = 1; + saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); + saved_float_regs_offset += 16; + } + + compiler->mode32 = 0; + } +#endif /* _WIN64 */ + + local_size = compiler->local_size; + + if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + local_size += SSIZE_OF(sw); + is_return_to = 0; + } + + if (local_size > 0) + BINARY_IMM32(ADD, local_size, SLJIT_SP, 0); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + if (is_return_to) + BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + compiler->mode32 = 0; + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return emit_byte(compiler, RET_near); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + compiler->mode32 = 0; + + if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +#ifndef _WIN64 + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 word_arg_count = 0; + + SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); + SLJIT_ASSERT(!(src & SLJIT_MEM)); + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + word_arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count == 0) + return SLJIT_SUCCESS; + + if (word_arg_count >= 3) { + if (src == SLJIT_R2) + *src_ptr = TMP_REG1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); + } + + return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); +} + +#else + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 data_trandfer = 0; + static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; + + SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + SLJIT_ASSERT(!(src & SLJIT_MEM)); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + float_arg_count++; + + if (arg_count != float_arg_count) + data_trandfer = 1; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) { + data_trandfer = 1; + + if (src == word_arg_regs[arg_count]) { + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + *src_ptr = TMP_REG2; + } + } + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (!data_trandfer) + return SLJIT_SUCCESS; + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) + EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0); + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + compiler->mode32 = 0; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + compiler->mode32 = 0; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + } + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); + + if (type & SLJIT_CALL_RETURN) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + if (FAST_IS_REG(dst)) { + if (reg_map[dst] < 8) + return emit_byte(compiler, U8(POP_r + reg_lmap[dst])); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_B; + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + if (FAST_IS_REG(src)) { + if (reg_map[src] < 8) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_lmap[src]); + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!inst); + + INC_SIZE(2 + 1); + *inst++ = REX_B; + PUSH_REG(reg_lmap[src]); + } + } + else { + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_FF; + inst[1] |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 saved_regs_size; + + compiler->mode32 = 0; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0); + return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size); +} + +/* --------------------------------------------------------------------- */ +/* Other operations */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + compiler->mode32 = type & SLJIT_32; + type &= ~SLJIT_32; + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w); + src1 = TMP_REG2; + src1w = 0; + } + + return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w); + } + + return emit_cmov_generic(compiler, type, dst_reg, src1, src1w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u8* inst; + sljit_s32 i, next, reg_idx; + sljit_u8 regs[2]; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + compiler->mode32 = 0; + + if ((mem & REG_MASK) == 0) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw); + + mem = SLJIT_MEM1(TMP_REG1); + memw = 0; + } else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw); + + mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1); + memw = 0; + } + + regs[0] = U8(REG_PAIR_FIRST(reg)); + regs[1] = U8(REG_PAIR_SECOND(reg)); + + next = SSIZE_OF(sw); + + if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) { + if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) { + /* Base and offset cannot be TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0); + + if (regs[1] == OFFS_REG(mem)) + next = -SSIZE_OF(sw); + + mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } else { + next = -SSIZE_OF(sw); + + if (!(mem & OFFS_REG_MASK)) + memw += SSIZE_OF(sw); + } + } + + for (i = 0; i < 2; i++) { + reg_idx = next > 0 ? i : (i ^ 0x1); + reg = regs[reg_idx]; + + if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5)); + FAIL_IF(!inst); + + INC_SIZE(5); + + inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0)); + inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm; + inst[2] = 0x44 | U8(reg_lmap[reg] << 3); + inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK]; + inst[4] = sizeof(sljit_sw); + } else if (type & SLJIT_MEM_STORE) { + EMIT_MOV(compiler, mem, memw, reg, 0); + } else { + EMIT_MOV(compiler, reg, 0, mem, memw); + } + + if (!(mem & OFFS_REG_MASK)) + memw += next; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + + compiler->mode32 = 0; + + if (src == SLJIT_IMM) { + if (FAST_IS_REG(dst)) { + if (!sign || ((sljit_u32)srcw <= 0x7fffffff)) + return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw); + + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + compiler->mode32 = 0; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + if (sign) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOVSXD_r_rm; + } else { + compiler->mode32 = 1; + EMIT_MOV(compiler, dst_r, 0, src, srcw); + compiler->mode32 = 0; + } + } + + if (dst & SLJIT_MEM) { + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + compiler->mode32 = 0; + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst, *jump_inst1, *jump_inst2; + sljit_uw size1, size2; + + compiler->mode32 = 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src != SLJIT_IMM) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + compiler->mode32 = 0; + } else + FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw)); + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + + compiler->mode32 = 1; + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (!FAST_IS_REG(src)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + } + + BINARY_IMM32(CMP, 0, src, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JL_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + compiler->mode32 = 0; + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + jump_inst2 = inst; + + size2 = compiler->size; + + jump_inst1[1] = U8(size2 - size1); + + if (src != TMP_REG1) + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + compiler->mode32 = 1; + BINARY_IMM32(AND, 1, TMP_REG2, 0); + + compiler->mode32 = 0; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0); + FAIL_IF(!inst); + inst[0] = OR_r_rm; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + compiler->mode32 = 1; + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0)); + + jump_inst2[1] = U8(compiler->size - size2); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero) +{ + sljit_u8 *inst; + sljit_u32 size; + + if (is_zero) { + rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0; + } else { + if (freg_map[freg] >= 8) + rex |= REX_R; + if (reg_map[TMP_REG1] >= 8) + rex |= REX_B; + } + + size = (rex != 0) ? 5 : 4; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + + *inst++ = GROUP_66; + if (rex != 0) + *inst++ = rex; + inst[0] = GROUP_0F; + + if (is_zero) { + inst[1] = PXOR_x_xm; + inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG); + } else { + inst[1] = MOVD_x_rm; + inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, 0, u.imm == 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_u32 size; + sljit_u8 rex = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (!(op & SLJIT_32)) + rex = REX_W; + + if (freg_map[freg] >= 8) + rex |= REX_R; + + if (reg_map[reg] >= 8) + rex |= REX_B; + + size = (rex != 0) ? 5 : 4; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + + *inst++ = GROUP_66; + if (rex != 0) + *inst++ = rex; + inst[0] = GROUP_0F; + inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x; + inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG); + + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_s32 tmp, size; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack()) + return SLJIT_SUCCESS; + + size = compiler->local_size; + tmp = compiler->scratches; + if (tmp >= SLJIT_FIRST_SAVED_REG) + size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw); + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + if (SLJIT_S0 >= tmp) + size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw); + + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); +} diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c new file mode 100644 index 0000000..ecb7e9b --- /dev/null +++ b/src/sljit/sljitNativeX86_common.c @@ -0,0 +1,5001 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "x86" SLJIT_CPUINFO; +} + +/* + 32b register indexes: + 0 - EAX + 1 - ECX + 2 - EDX + 3 - EBX + 4 - ESP + 5 - EBP + 6 - ESI + 7 - EDI +*/ + +/* + 64b register indexes: + 0 - RAX + 1 - RCX + 2 - RDX + 3 - RBX + 4 - RSP + 5 - RBP + 6 - RSI + 7 - RDI + 8 - R8 - From now on REX prefix is required + 9 - R9 + 10 - R10 + 11 - R11 + 12 - R12 + 13 - R13 + 14 - R14 + 15 - R15 +*/ + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 1, 2, 3, 4, 5, 6, 7, 0 +}; + +#define CHECK_EXTRA_REGS(p, w, do) \ + if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ + w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ + do; \ + } + +#else /* SLJIT_CONFIG_X86_32 */ + +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + +/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present + Note: avoid to use r12 and r13 for memory addressing + therefore r12 is better to be a higher saved register. */ +#ifndef _WIN64 +/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 +}; +#else +/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 +}; +#endif + +/* Args: xmm0-xmm3 */ +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4 +}; +/* low-map. freg_map & 0x7. */ +static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = { + 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4 +}; + +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REX 0x40 + +#ifndef _WIN64 +#define HALFWORD_MAX 0x7fffffffl +#define HALFWORD_MIN -0x80000000l +#else +#define HALFWORD_MAX 0x7fffffffll +#define HALFWORD_MIN -0x80000000ll +#endif + +#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) +#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) + +#define CHECK_EXTRA_REGS(p, w, do) + +#endif /* SLJIT_CONFIG_X86_32 */ + +#define U8(v) ((sljit_u8)(v)) + +/* Size flags for emit_x86_instruction: */ +#define EX86_BIN_INS ((sljit_uw)0x000010) +#define EX86_SHIFT_INS ((sljit_uw)0x000020) +#define EX86_BYTE_ARG ((sljit_uw)0x000040) +#define EX86_HALF_ARG ((sljit_uw)0x000080) +/* Size flags for both emit_x86_instruction and emit_vex_instruction: */ +#define EX86_REX ((sljit_uw)0x000100) +#define EX86_NO_REXW ((sljit_uw)0x000200) +#define EX86_PREF_66 ((sljit_uw)0x000400) +#define EX86_PREF_F2 ((sljit_uw)0x000800) +#define EX86_PREF_F3 ((sljit_uw)0x001000) +#define EX86_SSE2_OP1 ((sljit_uw)0x002000) +#define EX86_SSE2_OP2 ((sljit_uw)0x004000) +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) +#define EX86_VEX_EXT ((sljit_uw)0x008000) +/* Op flags for emit_vex_instruction: */ +#define VEX_OP_0F38 ((sljit_uw)0x010000) +#define VEX_OP_0F3A ((sljit_uw)0x020000) +#define VEX_SSE2_OPV ((sljit_uw)0x040000) +#define VEX_AUTO_W ((sljit_uw)0x080000) +#define VEX_W ((sljit_uw)0x100000) +#define VEX_256 ((sljit_uw)0x200000) + +#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66) +#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) + +/* --------------------------------------------------------------------- */ +/* Instruction forms */ +/* --------------------------------------------------------------------- */ + +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define BSF_r_rm (/* GROUP_0F */ 0xbc) +#define BSWAP_r (/* GROUP_0F */ 0xc8) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVE_r_rm (/* GROUP_0F */ 0x44) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define CMPS_x_xm 0xc2 +#define CMPXCHG_rm_r 0xb1 +#define CMPXCHG_rm8_r 0xb0 +#define CVTPD2PS_x_xm 0x5a +#define CVTPS2PD_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define EXTRACTPS_x_xm 0x17 +#define FLDS 0xd9 +#define FLDL 0xdd +#define FSTPS 0xd9 +#define FSTPD 0xdd +#define INSERTPS_x_xm 0x21 +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JL_i8 0x7c +#define JE_i8 0x74 +#define JNC_i8 0x73 +#define JNE_i8 0x75 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define LOOP_i8 0xe2 +#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 +#define MOVD_x_rm 0x6e +#define MOVD_rm_x 0x7e +#define MOVDDUP_x_xm 0x12 +#define MOVDQA_x_xm 0x6f +#define MOVDQA_xm_x 0x7f +#define MOVHLPS_x_x 0x12 +#define MOVHPD_m_x 0x17 +#define MOVHPD_x_m 0x16 +#define MOVLHPS_x_x 0x16 +#define MOVLPD_m_x 0x13 +#define MOVLPD_x_m 0x12 +#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50) +#define MOVQ_x_xm (/* GROUP_0F */ 0x7e) +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSHDUP_x_xm 0x16 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVUPS_x_xm 0x10 +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define ORPD_x_xm 0x56 +#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63) +#define PAND_x_xm 0xdb +#define PCMPEQD_x_xm 0x76 +#define PINSRB_x_rm_i8 0x20 +#define PINSRW_x_rm_i8 0xc4 +#define PINSRD_x_rm_i8 0x22 +#define PEXTRB_rm_x_i8 0x14 +#define PEXTRW_rm_x_i8 0x15 +#define PEXTRD_rm_x_i8 0x16 +#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7) +#define PMOVSXBD_x_xm 0x21 +#define PMOVSXBQ_x_xm 0x22 +#define PMOVSXBW_x_xm 0x20 +#define PMOVSXDQ_x_xm 0x25 +#define PMOVSXWD_x_xm 0x23 +#define PMOVSXWQ_x_xm 0x24 +#define PMOVZXBD_x_xm 0x31 +#define PMOVZXBQ_x_xm 0x32 +#define PMOVZXBW_x_xm 0x30 +#define PMOVZXDQ_x_xm 0x35 +#define PMOVZXWD_x_xm 0x33 +#define PMOVZXWQ_x_xm 0x34 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define POR_x_xm 0xeb +#define PREFETCH 0x18 +#define PSHUFB_x_xm 0x00 +#define PSHUFD_x_xm 0x70 +#define PSHUFLW_x_xm 0x70 +#define PSRLDQ_x 0x73 +#define PSLLD_x_i8 0x72 +#define PSLLQ_x_i8 0x73 +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define PXOR_x_xm 0xef +#define ROL (/* SHIFT */ 0 << 3) +#define ROR (/* SHIFT */ 1 << 3) +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHLD (/* GROUP_0F */ 0xa5) +#define SHRD (/* GROUP_0F */ 0xad) +#define SHR (/* SHIFT */ 5 << 3) +#define SHUFPS_x_xm 0xc6 +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) +#define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 +#define UNPCKLPS_x_xm 0x14 +#define VBROADCASTSD_x_xm 0x19 +#define VBROADCASTSS_x_xm 0x18 +#define VEXTRACTF128_x_ym 0x19 +#define VEXTRACTI128_x_ym 0x39 +#define VINSERTF128_y_y_xm 0x18 +#define VINSERTI128_y_y_xm 0x38 +#define VPBROADCASTB_x_xm 0x78 +#define VPBROADCASTD_x_xm 0x58 +#define VPBROADCASTQ_x_xm 0x59 +#define VPBROADCASTW_x_xm 0x79 +#define VPERMPD_y_ym 0x01 +#define VPERMQ_y_ym 0x00 +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_66 0x66 +#define GROUP_F3 0xf3 +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 +#define GROUP_LOCK 0xf0 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) + +#define SLJIT_INST_LABEL 255 +#define SLJIT_INST_JUMP 254 +#define SLJIT_INST_MOV_ADDR 253 +#define SLJIT_INST_CONST 252 + +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#define CPU_FEATURE_DETECTED 0x001 +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#define CPU_FEATURE_SSE2 0x002 +#endif +#define CPU_FEATURE_SSE41 0x004 +#define CPU_FEATURE_LZCNT 0x008 +#define CPU_FEATURE_TZCNT 0x010 +#define CPU_FEATURE_CMOV 0x020 +#define CPU_FEATURE_AVX 0x040 +#define CPU_FEATURE_AVX2 0x080 +#define CPU_FEATURE_OSXSAVE 0x100 + +static sljit_u32 cpu_feature_list = 0; + +#ifdef _WIN32_WCE +#include +#elif defined(_MSC_VER) && _MSC_VER >= 1400 +#include +#endif + +/******************************************************/ +/* Unaligned-store functions */ +/******************************************************/ + +static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +/******************************************************/ +/* Utility functions */ +/******************************************************/ + +static void execute_cpu_id(sljit_u32 info[4]) +{ +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + __cpuidex((int*)info, (int)info[0], (int)info[2]); + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) + + /* AT&T syntax. */ + __asm__ ( +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + "movl %0, %%esi\n" + "movl (%%esi), %%eax\n" + "movl 8(%%esi), %%ecx\n" + "pushl %%ebx\n" + "cpuid\n" + "movl %%eax, (%%esi)\n" + "movl %%ebx, 4(%%esi)\n" + "popl %%ebx\n" + "movl %%ecx, 8(%%esi)\n" + "movl %%edx, 12(%%esi)\n" +#else /* !SLJIT_CONFIG_X86_32 */ + "movq %0, %%rsi\n" + "movl (%%rsi), %%eax\n" + "movl 8(%%rsi), %%ecx\n" + "cpuid\n" + "movl %%eax, (%%rsi)\n" + "movl %%ebx, 4(%%rsi)\n" + "movl %%ecx, 8(%%rsi)\n" + "movl %%edx, 12(%%rsi)\n" +#endif /* SLJIT_CONFIG_X86_32 */ + : + : "r" (info) +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "memory", "eax", "ecx", "edx", "esi" +#else /* !SLJIT_CONFIG_X86_32 */ + : "memory", "rax", "rbx", "rcx", "rdx", "rsi" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#else /* _MSC_VER < 1400 */ + + /* Intel syntax. */ + __asm { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + mov esi, info + mov eax, [esi] + mov ecx, [esi + 8] + cpuid + mov [esi], eax + mov [esi + 4], ebx + mov [esi + 8], ecx + mov [esi + 12], edx +#else /* !SLJIT_CONFIG_X86_32 */ + mov rsi, info + mov eax, [rsi] + mov ecx, [rsi + 8] + cpuid + mov [rsi], eax + mov [rsi + 4], ebx + mov [rsi + 8], ecx + mov [rsi + 12], edx +#endif /* SLJIT_CONFIG_X86_32 */ + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ +} + +static sljit_u32 execute_get_xcr0_low(void) +{ + sljit_u32 xcr0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + xcr0 = (sljit_u32)_xgetbv(0); + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__) + + /* AT&T syntax. */ + __asm__ ( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a" (xcr0) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "ecx", "edx" +#else /* !SLJIT_CONFIG_X86_32 */ + : "rcx", "rdx" +#endif /* SLJIT_CONFIG_X86_32 */ + ); + +#else /* _MSC_VER < 1400 */ + + /* Intel syntax. */ + __asm { + mov ecx, 0 + xgetbv + mov xcr0, eax + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + return xcr0; +} + +static void get_cpu_features(void) +{ + sljit_u32 feature_list = CPU_FEATURE_DETECTED; + sljit_u32 info[4] = {0}; + sljit_u32 max_id; + + execute_cpu_id(info); + max_id = info[0]; + + if (max_id >= 7) { + info[0] = 7; + info[2] = 0; + execute_cpu_id(info); + + if (info[1] & 0x8) + feature_list |= CPU_FEATURE_TZCNT; + if (info[1] & 0x20) + feature_list |= CPU_FEATURE_AVX2; + } + + if (max_id >= 1) { + info[0] = 1; + execute_cpu_id(info); + + if (info[2] & 0x80000) + feature_list |= CPU_FEATURE_SSE41; + if (info[2] & 0x8000000) + feature_list |= CPU_FEATURE_OSXSAVE; + if (info[2] & 0x10000000) + feature_list |= CPU_FEATURE_AVX; +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (info[3] & 0x4000000) + feature_list |= CPU_FEATURE_SSE2; +#endif + if (info[3] & 0x8000) + feature_list |= CPU_FEATURE_CMOV; + } + + info[0] = 0x80000001; + execute_cpu_id(info); + + if (info[2] & 0x20) + feature_list |= CPU_FEATURE_LZCNT; + + if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0) + feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2); + + cpu_feature_list = feature_list; +} + +static sljit_u8 get_jump_code(sljit_uw type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return 0x84 /* je */; + + case SLJIT_NOT_EQUAL: + case SLJIT_ATOMIC_NOT_STORED: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0x85 /* jne */; + + case SLJIT_LESS: + case SLJIT_CARRY: + case SLJIT_F_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_GREATER: + return 0x82 /* jc */; + + case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x83 /* jae */; + + case SLJIT_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + return 0x87 /* jnbe */; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0x86 /* jbe */; + + case SLJIT_SIG_LESS: + return 0x8c /* jl */; + + case SLJIT_SIG_GREATER_EQUAL: + return 0x8d /* jnl */; + + case SLJIT_SIG_GREATER: + return 0x8f /* jnle */; + + case SLJIT_SIG_LESS_EQUAL: + return 0x8e /* jle */; + + case SLJIT_OVERFLOW: + return 0x80 /* jo */; + + case SLJIT_NOT_OVERFLOW: + return 0x81 /* jno */; + + case SLJIT_UNORDERED: + case SLJIT_ORDERED_EQUAL: /* NaN. */ + return 0x8a /* jp */; + + case SLJIT_ORDERED: + case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */ + return 0x8b /* jpo */; + } + return 0; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr); +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset); +#endif /* SLJIT_CONFIG_X86_32 */ + +static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + sljit_s32 short_jump; + sljit_uw label_addr; + + if (jump->flags & JUMP_ADDR) + label_addr = jump->u.target - (sljit_uw)executable_offset; + else + label_addr = (sljit_uw)(code + jump->u.label->size); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN) + return detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_64 */ + + short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f; + + if (type == SLJIT_JUMP) { + if (short_jump) + *code_ptr++ = JMP_i8; + else + *code_ptr++ = JMP_i32; + } else if (type > SLJIT_JUMP) { + short_jump = 0; + *code_ptr++ = CALL_i32; + } else if (short_jump) { + *code_ptr++ = U8(get_jump_code(type) - 0x10); + } else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + } + + jump->addr = (sljit_uw)code_ptr; + + if (short_jump) { + jump->flags |= PATCH_MB; + code_ptr += sizeof(sljit_s8); + } else { + jump->flags |= PATCH_MW; + code_ptr += sizeof(sljit_s32); + } + + return code_ptr; +} + +static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_uw flags = jump->flags; + sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr; + sljit_uw jump_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + + if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); +#else /* SLJIT_CONFIG_X86_32 */ + if (flags & PATCH_MD) { + SLJIT_ASSERT(addr > HALFWORD_MAX); + sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr); + return; + } + + if (flags & PATCH_MW) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + } else { + SLJIT_ASSERT(addr <= HALFWORD_MAX); + } + sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr); +#endif /* !SLJIT_CONFIG_X86_32 */ + return; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (SLJIT_UNLIKELY(flags & PATCH_MD)) { + SLJIT_ASSERT(!(flags & JUMP_ADDR)); + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); + return; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset); + + if (flags & PATCH_MB) { + addr -= sizeof(sljit_s8); + SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80); + *(sljit_u8*)jump_addr = U8(addr); + return; + } else if (flags & PATCH_MW) { + addr -= sizeof(sljit_s32); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr); +#else /* !SLJIT_CONFIG_X86_32 */ + SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN); + sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr); +#endif /* SLJIT_CONFIG_X86_32 */ + } +} + +static void reduce_code_size(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_uw next_label_size; + sljit_uw next_jump_addr; + sljit_uw next_min_addr; + sljit_uw size_reduce = 0; + sljit_sw diff; + sljit_uw type; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw size_reduce_max; +#endif /* SLJIT_DEBUG */ + + label = compiler->labels; + jump = compiler->jumps; + + next_label_size = SLJIT_GET_NEXT_SIZE(label); + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + + while (1) { + next_min_addr = next_label_size; + if (next_jump_addr < next_min_addr) + next_min_addr = next_jump_addr; + + if (next_min_addr == SLJIT_MAX_ADDRESS) + break; + + if (next_min_addr == next_label_size) { + label->size -= size_reduce; + + label = label->next; + next_label_size = SLJIT_GET_NEXT_SIZE(label); + } + + if (next_min_addr != next_jump_addr) + continue; + + if (!(jump->flags & JUMP_MOV_ADDR)) { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE); +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) { + if (jump->flags & JUMP_ADDR) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (jump->u.target <= 0xffffffffl) + size_reduce += sizeof(sljit_s32); +#endif /* SLJIT_CONFIG_X86_64 */ + } else { + /* Unit size: instruction. */ + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce); + type = jump->flags >> TYPE_SHIFT; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6) + size_reduce += CJUMP_MAX_SIZE - 6; + } else { + if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5) + size_reduce += JUMP_MAX_SIZE - 5; + } +#else /* !SLJIT_CONFIG_X86_64 */ + if (type == SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += JUMP_MAX_SIZE - 2; + } else if (type < SLJIT_JUMP) { + if (diff <= 0x7f + 2 && diff >= -0x80 + 2) + size_reduce += CJUMP_MAX_SIZE - 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + } + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + } else { +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + size_reduce_max = size_reduce + 10; +#endif /* SLJIT_DEBUG */ + + if (!(jump->flags & JUMP_ADDR)) { + diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3); + + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) + size_reduce += 3; + } else if (jump->u.target <= 0xffffffffl) + size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5; + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT; +#endif /* SLJIT_DEBUG */ +#endif /* SLJIT_CONFIG_X86_64 */ + } + + jump = jump->next; + next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump); + } + + compiler->size -= size_reduce; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data) +{ + struct sljit_memory_fragment *buf; + sljit_u8 *code; + sljit_u8 *code_ptr; + sljit_u8 *buf_ptr; + sljit_u8 *buf_end; + sljit_u8 len; + sljit_sw executable_offset; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + sljit_uw addr; +#endif /* SLJIT_DEBUG */ + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + + reduce_code_size(compiler); + + /* Second code generation pass. */ + code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset); + PTR_FAIL_WITH_EXEC_IF(code); + + reverse_buf(compiler); + buf = compiler->buf; + + code_ptr = code; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + do { + buf_ptr = buf->memory; + buf_end = buf_ptr + buf->used_size; + do { + len = *buf_ptr++; + SLJIT_ASSERT(len > 0); + if (len < SLJIT_INST_CONST) { + /* The code is already generated. */ + SLJIT_MEMCPY(code_ptr, buf_ptr, len); + code_ptr += len; + buf_ptr += len; + } else { + switch (len) { + case SLJIT_INST_LABEL: + label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + break; + case SLJIT_INST_JUMP: +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + addr = (sljit_uw)code_ptr; +#endif /* SLJIT_DEBUG */ + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset); + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset); +#else /* !SLJIT_CONFIG_X86_32 */ + code_ptr = detect_far_jump_type(jump, code_ptr); +#endif /* SLJIT_CONFIG_X86_32 */ + } + + SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f)); + jump = jump->next; + break; + case SLJIT_INST_MOV_ADDR: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset); +#endif /* SLJIT_CONFIG_X86_64 */ + jump->addr = (sljit_uw)code_ptr; + jump = jump->next; + break; + default: + SLJIT_ASSERT(len == SLJIT_INST_CONST); + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); + const_ = const_->next; + break; + } + } + } while (buf_ptr < buf_end); + + SLJIT_ASSERT(buf_ptr == buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr <= code + compiler->size); + + jump = compiler->jumps; + while (jump) { + generate_jump_or_mov_addr(jump, executable_offset); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code); + + code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + + SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1); + return (void*)code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return (SLJIT_IS_FPU_AVAILABLE) != 0; +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; +#else /* SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_HAS_VIRTUAL_REGISTERS: + return 1; +#endif /* SLJIT_CONFIG_X86_32 */ + + case SLJIT_HAS_CLZ: + if (cpu_feature_list == 0) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2; + + case SLJIT_HAS_CTZ: + if (cpu_feature_list == 0) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2; + + case SLJIT_HAS_CMOV: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_CMOV) != 0; + + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: + return 1; + +#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE + case SLJIT_HAS_AVX: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_AVX) != 0; + case SLJIT_HAS_AVX2: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_AVX2) != 0; + case SLJIT_HAS_SIMD: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_SSE41) != 0; +#endif /* SLJIT_IS_FPU_AVAILABLE */ + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return 2; + } + + return 0; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) + +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + do { \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + BINARY_IMM32(op_imm, immw, arg, argw); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } \ + } while (0) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else /* !SLJIT_CONFIG_X86_64 */ + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + BINARY_IMM32(op_imm, immw, arg, argw) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif /* SLJIT_CONFIG_X86_64 */ + +static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte) +{ + sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = byte; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + +static sljit_s32 emit_groupf(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw); + +static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) + /* Emit endbr32/endbr64 when CET is enabled. */ + sljit_u8 *inst; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + inst[0] = GROUP_F3; + inst[1] = GROUP_0F; + inst[2] = 0x1e; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst[3] = 0xfb; +#else /* !SLJIT_CONFIG_X86_32 */ + inst[3] = 0xfa; +#endif /* SLJIT_CONFIG_X86_32 */ +#else /* !SLJIT_CONFIG_X86_CET */ + SLJIT_UNUSED_ARG(compiler); +#endif /* SLJIT_CONFIG_X86_CET */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + +static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = GROUP_F3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + inst[0] = GROUP_0F; + inst[1] = 0x1e; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]); +#else + inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]); +#endif + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = GROUP_F3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + inst[0] = GROUP_0F; + inst[1] = 0xae; + inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7); + return SLJIT_SUCCESS; +} + +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + +static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + return _get_ssp() != 0; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + return 0; +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ +} + +static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + sljit_u8 *inst, *jz_after_cmp_inst; + sljit_uw size_jz_after_cmp_inst; + + sljit_uw size_before_rdssp_inst = compiler->size; + + /* Generate "RDSSP TMP_REG1". */ + FAIL_IF(emit_rdssp(compiler, TMP_REG1)); + + /* Load return address on shadow stack into TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); + + /* Compare return address against TMP_REG1. */ + FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); + + /* Generate JZ to skip shadow stack ajdustment when shadow + stack matches normal stack. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10; + size_jz_after_cmp_inst = compiler->size; + jz_after_cmp_inst = inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary. */ + compiler->mode32 = 1; +#endif + /* Load 1 into TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + + /* Generate "INCSSP TMP_REG1". */ + FAIL_IF(emit_incssp(compiler, TMP_REG1)); + + /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + inst[1] = size_before_rdssp_inst - compiler->size; + + *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#include "sljitNativeX86_32.c" +#else +#include "sljitNativeX86_64.c" +#endif + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (FAST_IS_REG(src)) { + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } + + if (src == SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + if (!compiler->mode32) { + if (NOT_HALFWORD(srcw)) + return emit_load_imm64(compiler, dst, srcw); + } + else + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); +#endif + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32 && NOT_HALFWORD(srcw)) { + /* Immediate to memory move. Only SLJIT_MOV operation copies + an immediate directly into memory so TMP_REG1 can be used. */ + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } +#endif + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + return SLJIT_SUCCESS; + } + + /* Memory to memory move. Only SLJIT_MOV operation copies + data from memory to memory so TMP_REG1 can be used. */ + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_uw size; + + SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10); + + size = compiler->size; + EMIT_MOV(compiler, dst_reg, 0, src, srcw); + + inst[1] = U8(compiler->size - size); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_uw size; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return emit_byte(compiler, INT3); + case SLJIT_NOP: + return emit_byte(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 + && reg_map[TMP_REG1] > 7); +#else + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 + && reg_map[TMP_REG1] == 2); +#endif + compiler->mode32 = op & SLJIT_32; +#endif + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + + op = GET_OPCODE(op); + if ((op | 0x2) == SLJIT_DIV_UW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); +#else + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); +#endif + FAIL_IF(!inst); + *inst = XOR_r_rm; + } + + if ((op | 0x2) == SLJIT_DIV_SW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + FAIL_IF(emit_byte(compiler, CDQ)); +#else + if (!compiler->mode32) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = REX_W; + inst[1] = CDQ; + } else + FAIL_IF(emit_byte(compiler, CDQ)); +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = GROUP_F7; + inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); +#else /* !SLJIT_CONFIG_X86_32 */ +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; +#else /* !_WIN64 */ + size = (!compiler->mode32) ? 3 : 2; +#endif /* _WIN64 */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); + else if (op >= SLJIT_DIVMOD_UW) + *inst++ = REX_B; + inst[0] = GROUP_F7; + inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); +#else /* !_WIN64 */ + if (!compiler->mode32) + *inst++ = REX_W; + inst[0] = GROUP_F7; + inst[1] = MOD_REG | reg_map[SLJIT_R1]; +#endif /* _WIN64 */ +#endif /* SLJIT_CONFIG_X86_32 */ + switch (op) { + case SLJIT_LMUL_UW: + inst[1] |= MUL; + break; + case SLJIT_LMUL_SW: + inst[1] |= IMUL; + break; + case SLJIT_DIVMOD_UW: + case SLJIT_DIV_UW: + inst[1] |= DIV; + break; + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_SW: + inst[1] |= IDIV; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + if (op <= SLJIT_DIVMOD_SW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#else + if (op >= SLJIT_DIV_UW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#endif + break; + case SLJIT_ENDBR: + return emit_endbranch(compiler); + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return skip_frames_before_return(compiler); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src == SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg_map[src] >= 4) { + SLJIT_ASSERT(dst_r == TMP_REG1); + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + } else + dst_r = src; +#else + dst_r = src; +#endif + } else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (FAST_IS_REG(src) && reg_map[src] >= 4) { + /* Both src and dst are registers. */ + SLJIT_ASSERT(FAST_IS_REG(dst)); + + if (src == dst && !sign) { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + src = TMP_REG1; + srcw = 0; + } +#endif /* !SLJIT_CONFIG_X86_32 */ + + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ + FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw)); + } + + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = PREFETCH; + + if (op == SLJIT_PREFETCH_L1) + inst[2] |= (1 << 3); + else if (op == SLJIT_PREFETCH_L2) + inst[2] |= (2 << 3); + else if (op == SLJIT_PREFETCH_L3) + inst[2] |= (3 << 3); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src == SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else + FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw)); + + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (dst == src && dstw == srcw) { + /* Same input and output */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + inst[0] = GROUP_F7; + inst[1] |= opcode; + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); + FAIL_IF(!inst); + inst[0] = GROUP_F7; + inst[1] |= opcode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + inst[0] = GROUP_F7; + inst[1] |= opcode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static const sljit_sw emit_clz_arg = 32 + 31; +static const sljit_sw emit_ctz_arg = 32; +#endif + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + sljit_sw max; + + SLJIT_ASSERT(cpu_feature_list != 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) { + FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw)); + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw)); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + max = is_clz ? (32 + 31) : 32; + + if (cpu_feature_list & CPU_FEATURE_CMOV) { + if (dst_r != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); + } + else + inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg); + + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = CMOVE_r_rm; + } + else + FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + + if (is_clz) { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); + FAIL_IF(!inst); + *(inst + 1) |= XOR; + } +#else + if (is_clz) + max = compiler->mode32 ? (32 + 31) : (64 + 63); + else + max = compiler->mode32 ? 32 : 64; + + if (cpu_feature_list & CPU_FEATURE_CMOV) { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max); + FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0)); + } else + FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + + if (is_clz) { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0); + FAIL_IF(!inst); + *(inst + 1) |= XOR; + } +#endif + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_bswap(struct sljit_compiler *compiler, + sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_uw size; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_u8 rex = 0; +#else /* !SLJIT_CONFIG_X86_64 */ + sljit_s32 dst_is_ereg = op & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32) + compiler->mode32 = 1; +#else /* !SLJIT_CONFIG_X86_64 */ + op &= ~SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src != dst_r) { + /* Only the lower 16 bit is read for eregs. */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw)); + else + EMIT_MOV(compiler, dst_r, 0, src, srcw); + } + + size = 2; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32) + rex = REX_W; + + if (reg_map[dst_r] >= 8) + rex |= REX_B; + + if (rex != 0) + size++; +#endif /* SLJIT_CONFIG_X86_64 */ + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (rex != 0) + *inst++ = rex; + + inst[0] = GROUP_0F; + inst[1] = BSWAP_r | reg_lmap[dst_r]; +#else /* !SLJIT_CONFIG_X86_64 */ + inst[0] = GROUP_0F; + inst[1] = BSWAP_r | reg_map[dst_r]; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = compiler->mode32 ? 16 : 48; +#else /* !SLJIT_CONFIG_X86_64 */ + size = 16; +#endif /* SLJIT_CONFIG_X86_64 */ + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0); + FAIL_IF(!inst); + if (op == SLJIT_REV_U16) + inst[1] |= SHR; + else + inst[1] |= SAR; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_is_ereg) + op = SLJIT_REV; +#endif /* SLJIT_CONFIG_X86_32 */ + if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) + return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0); + + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_REV_S32) { + compiler->mode32 = 0; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = MOVSXD_r_rm; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 dst_is_ereg = 0; +#else /* !SLJIT_CONFIG_X86_32 */ + sljit_s32 op_flags = GET_ALL_FLAGS(op); +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); + CHECK_EXTRA_REGS(src, srcw, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op_flags & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + op = GET_OPCODE(op); + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op_flags & SLJIT_32) { + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src == SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src == SLJIT_IMM) { + switch (op) { + case SLJIT_MOV_U8: + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + srcw = (sljit_s16)srcw; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + srcw = (sljit_s32)srcw; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg)) + return emit_mov(compiler, dst, dstw, src, srcw); +#endif /* SLJIT_CONFIG_X86_32 */ + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); + dst = TMP_REG1; + } +#endif /* SLJIT_CONFIG_X86_32 */ + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif /* SLJIT_CONFIG_X86_32 */ + EMIT_MOV(compiler, dst, dstw, src, srcw); + break; + case SLJIT_MOV_U8: + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S8: + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_U16: + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S16: + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S32: + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV32: + compiler->mode32 = 1; + EMIT_MOV(compiler, dst, dstw, src, srcw); + compiler->mode32 = 0; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_32 */ + return SLJIT_SUCCESS; + } + + switch (op) { + case SLJIT_CLZ: + case SLJIT_CTZ: + return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw); + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_is_ereg) + op |= SLJIT_32; +#endif /* SLJIT_CONFIG_X86_32 */ + return emit_bswap(compiler, op, dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); + + if (dst == src1 && dstw == src1w) { + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + /* Special exception for sljit_emit_op_flags. */ + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* Only for cumulative operations. */ + if (dst == src2 && dstw == src2w) { + if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src1w); + } + else { + BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 == SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 == SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); + + if (dst == src1 && dstw == src1w) { + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst) && dst != src2) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 == SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 == SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mul(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + /* Register destination. */ + if (dst_r == src1 && src2 != SLJIT_IMM) { + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); + } else if (dst_r == src2 && src1 != SLJIT_IMM) { + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w)); + } else if (src1 == SLJIT_IMM) { + if (src2 == SLJIT_IMM) { + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); + src2 = dst_r; + src2w = 0; + } + + if (src1w <= 127 && src1w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + + FAIL_IF(emit_byte(compiler, U8(src1w))); + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src1w); + } +#else + else if (IS_HALFWORD(src1w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src1w); + } + else { + if (dst_r != src2) + EMIT_MOV(compiler, dst_r, 0, src2, src2w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); + } +#endif + } + else if (src2 == SLJIT_IMM) { + /* Note: src1 is NOT immediate. */ + + if (src2w <= 127 && src2w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + + FAIL_IF(emit_byte(compiler, U8(src2w))); + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src2w); + } +#else + else if (IS_HALFWORD(src2w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src2w); + } else { + if (dst_r != src1) + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0)); + } +#endif + } else { + /* Neither argument is immediate. */ + if (ADDRESSING_DEPENDS_ON(src2, dst_r)) + dst_r = TMP_REG1; + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w)); + } + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r, done = 0; + + /* These cases better be left to handled by normal way. */ + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (FAST_IS_REG(src1)) { + if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); +#else + if (src2 == SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + else if (FAST_IS_REG(src2)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); +#else + if (src1 == SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + + if (done) { + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + return SLJIT_ERR_UNSUPPORTED; +} + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(CMP_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src1)) { + if (src2 == SLJIT_IMM) { + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; + return SLJIT_SUCCESS; + } + + if (src2 == SLJIT_IMM) { + if (src1 == SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src1w); + return SLJIT_SUCCESS; + } + + if (src1 != SLJIT_IMM) { + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } else { + FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + if (src2 != SLJIT_IMM) { + if (src1 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src1w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift(struct sljit_compiler *compiler, + sljit_u8 mode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 mode32; +#endif + sljit_u8* inst; + + if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) { + if (dst == src1 && dstw == src1w) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + inst[1] |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + inst[1] |= mode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= mode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + if (dst == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= mode; + return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + + if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + if (src1 != dst) + EMIT_MOV(compiler, dst, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + mode32 = compiler->mode32; + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = mode32; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + inst[1] |= mode; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = mode32; +#endif + return SLJIT_SUCCESS; + } + + /* This case is complex since ecx itself may be used for + addressing, and this case must be supported as well. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); +#else /* !SLJIT_CONFIG_X86_32 */ + mode32 = compiler->mode32; + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); + compiler->mode32 = mode32; +#endif /* SLJIT_CONFIG_X86_32 */ + + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= mode; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); +#else + compiler->mode32 = 0; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); + compiler->mode32 = mode32; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (dst != TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_u8 mode, sljit_s32 set_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* The CPU does not set flags if the shift count is 0. */ + if (src2 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + src2w &= compiler->mode32 ? 0x1f : 0x3f; +#else /* !SLJIT_CONFIG_X86_64 */ + src2w &= 0x1f; +#endif /* SLJIT_CONFIG_X86_64 */ + if (src2w != 0) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!set_flags) + return emit_mov(compiler, dst, dstw, src1, src1w); + /* OR dst, src, 0 */ + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, SLJIT_IMM, 0); + } + + if (!set_flags) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!FAST_IS_REG(dst)) + FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); + + FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); + + if (FAST_IS_REG(dst)) + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!HAS_FLAGS(op)) { + if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + return emit_cum_binary(compiler, BINARY_OPCODE(ADD), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + return emit_cum_binary(compiler, BINARY_OPCODE(ADC), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUB: + if (src1 == SLJIT_IMM && src1w == 0) + return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); + + if (!HAS_FLAGS(op)) { + if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + if (FAST_IS_REG(dst) && src2 == dst) { + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); + return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); + } + } + + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_MUL: + return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_AND: + return emit_cum_binary(compiler, BINARY_OPCODE(AND), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_XOR: + if (!HAS_FLAGS(op)) { + if (src2 == SLJIT_IMM && src2w == -1) + return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w); + if (src1 == SLJIT_IMM && src1w == -1) + return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w); + } + + return emit_cum_binary(compiler, BINARY_OPCODE(XOR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SHL: + case SLJIT_MSHL: + return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_LSHR: + case SLJIT_MLSHR: + return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ASHR: + case SLJIT_MASHR: + return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ROTL: + return emit_shift_with_flags(compiler, ROL, 0, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ROTR: + return emit_shift_with_flags(compiler, ROR, 0, + dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + if (opcode == SLJIT_SUB) + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + + return emit_test_binary(compiler, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_sw dstw = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MULADD: + FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw); + FAIL_IF(!inst); + *inst = ADD_rm_r; + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_s32 is_rotate, is_left, move_src1; + sljit_u8* inst; + sljit_sw src1w = 0; + sljit_sw dstw = 0; + /* The whole register must be saved even for 32 bit operations. */ + sljit_u8 restore_ecx = 0; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_sw src2w = 0; + sljit_s32 restore_sp4 = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + ADJUST_LOCAL_OFFSET(src3, src3w); + + CHECK_EXTRA_REGS(dst_reg, dstw, (void)0); + CHECK_EXTRA_REGS(src3, src3w, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src3 == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + src3w &= 0x1f; +#else /* !SLJIT_CONFIG_X86_32 */ + src3w &= (op & SLJIT_32) ? 0x1f : 0x3f; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (src3w == 0) + return SLJIT_SUCCESS; + } + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + is_rotate = (src1_reg == src2_reg); + CHECK_EXTRA_REGS(src1_reg, src1w, (void)0); + CHECK_EXTRA_REGS(src2_reg, src2w, (void)0); + + if (is_rotate) + return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (src2_reg & SLJIT_MEM) { + EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w); + src2_reg = TMP_REG1; + } +#endif /* SLJIT_CONFIG_X86_32 */ + + if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); + src1_reg = TMP_REG1; + src1w = 0; +#else /* !SLJIT_CONFIG_X86_64 */ + if (src2_reg != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); + src1_reg = TMP_REG1; + src1w = 0; + } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) { + restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0); + EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w); + src1_reg = restore_sp4; + src1w = 0; + } else { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0); + restore_sp4 = src1_reg; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src3 != SLJIT_PREF_SHIFT_REG) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w); + } else { + if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif /* SLJIT_CONFIG_X86_64 */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + src2_reg = TMP_REG1; + restore_ecx = 1; + } + + move_src1 = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst_reg != src1_reg) { + if (dst_reg != src3) { + EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); + src1_reg = dst_reg; + src1w = 0; + } else + move_src1 = 1; + } +#else /* !SLJIT_CONFIG_X86_64 */ + if (dst_reg & SLJIT_MEM) { + if (src2_reg != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w); + src1_reg = TMP_REG1; + src1w = 0; + } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) { + restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0); + EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w); + src1_reg = restore_sp4; + src1w = 0; + } else { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0); + restore_sp4 = src1_reg; + } + } else if (dst_reg != src1_reg) { + if (dst_reg != src3) { + EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); + src1_reg = dst_reg; + src1w = 0; + } else + move_src1 = 1; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) { + if (!restore_ecx) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); + compiler->mode32 = op & SLJIT_32; + restore_ecx = 1; +#else /* !SLJIT_CONFIG_X86_64 */ + if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); + restore_ecx = 1; + } else { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); + restore_ecx = 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w); + } + + if (move_src1) { + EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w); + src1_reg = dst_reg; + src1w = 0; + } + } + + inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + + if (src3 == SLJIT_IMM) { + inst[1] = U8((is_left ? SHLD : SHRD) - 1); + + /* Immediate argument is added separately. */ + FAIL_IF(emit_byte(compiler, U8(src3w))); + } else + inst[1] = U8(is_left ? SHLD : SHRD); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (restore_ecx) { + compiler->mode32 = 0; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + + if (src1_reg != dst_reg) { + compiler->mode32 = op & SLJIT_32; + return emit_mov(compiler, dst_reg, dstw, src1_reg, 0); + } +#else /* !SLJIT_CONFIG_X86_64 */ + if (restore_ecx) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0); + + if (src1_reg != dst_reg) + EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0); + + if (restore_sp4) + return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32)); +#endif /* SLJIT_CONFIG_X86_32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + switch (op) { + case SLJIT_FAST_RETURN: + return emit_fast_return(compiler, src, srcw); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack ()) + return SLJIT_SUCCESS; + return adjust_shadow_stack(compiler, src, srcw); + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, op, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + switch (op) { + case SLJIT_FAST_ENTER: + return emit_fast_enter(compiler, dst, dstw); + case SLJIT_GET_RETURN_ADDRESS: + return sljit_emit_get_return_address(compiler, dst, dstw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg >= SLJIT_R3 && reg <= SLJIT_R8) + return -1; +#endif /* SLJIT_CONFIG_X86_32 */ + return reg_map[reg]; + } + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512) + return -1; + + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + SLJIT_MEMCPY(inst, instruction, size); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +/* Alignment(3) + 4 * 16 bytes. */ +static sljit_u32 sse2_data[3 + (4 * 4)]; +static sljit_u32 *sse2_buffer; + +static void init_compiler(void) +{ + get_cpu_features(); + + /* Align to 16 bytes. */ + sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); + + /* Single precision constants (each constant is 16 byte long). */ + sse2_buffer[0] = 0x80000000; + sse2_buffer[4] = 0x7fffffff; + /* Double precision constants (each constant is 16 byte long). */ + sse2_buffer[8] = 0; + sse2_buffer[9] = 0x80000000; + sse2_buffer[12] = 0xffffffff; + sse2_buffer[13] = 0x7fffffff; +} + +static sljit_s32 emit_groupf(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = op & 0xff; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler, + sljit_uw op, + sljit_s32 dst, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A))); + + inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A); + inst[2] = op & 0xff; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) +{ + return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw); +} + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) +{ + return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + compiler->mode32 = 0; +#endif + + FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw)); + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + compiler->mode32 = 0; +#endif + + if (src == SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + switch (GET_FLAG_TYPE(op)) { + case SLJIT_ORDERED_EQUAL: + /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */ + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w)); + + /* EQ */ + FAIL_IF(emit_byte(compiler, 0)); + + src1 = TMP_FREG; + src2 = TMP_FREG; + src2w = 0; + break; + + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER: + /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */ + if (!FAST_IS_REG(src2)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); + src2 = TMP_FREG; + } + + return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w); + } + + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + } + + return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + sljit_u8 *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_MOV_F64) { + if (FAST_IS_REG(dst)) + return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); + if (FAST_IS_REG(src)) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0)); + } else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(dst)) { + dst_r = (dst == src) ? TMP_FREG : dst; + + if (src & SLJIT_MEM) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + + FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0)); + + inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0); + inst[0] = GROUP_0F; + /* Same as PSRLD_x / PSRLQ_x */ + inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8; + + if (GET_OPCODE(op) == SLJIT_ABS_F64) { + inst[2] |= 2 << 3; + FAIL_IF(emit_byte(compiler, 1)); + } else { + inst[2] |= 6 << 3; + FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63))); + } + + if (dst_r != TMP_FREG) + dst_r = (src & SLJIT_MEM) ? TMP_FREG : src; + return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0); + } + + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + + switch (GET_OPCODE(op)) { + case SLJIT_NEG_F64: + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); + break; + + case SLJIT_ABS_F64: + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12))); + break; + } + + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst == src1) + ; /* Do nothing here. */ + else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) { + /* Swap arguments. */ + src2 = src1; + src2w = src1w; + } else if (dst != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + } + } else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w)); + break; + } + + if (dst_r != dst) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_uw pref; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (dst_freg == src1) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); + pref = EX86_SELECT_66(op) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w)); + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); + return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0); + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + src1w = 0; + } + + if (dst_freg != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w)); + + pref = EX86_SELECT_66(op) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w)); + FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8))); + return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + + inst = (sljit_u8*)ensure_buf(compiler, 1); + PTR_FAIL_IF(!inst); + inst[0] = SLJIT_INST_LABEL; + + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF_NULL(jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); + type &= 0xff; + + jump->addr = compiler->size; + /* Worst case size. */ + compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); + PTR_FAIL_IF_NULL(inst); + + inst[0] = SLJIT_INST_JUMP; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF_NULL(jump); + set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + /* Worst case size. */ + compiler->size += JUMP_MAX_SIZE; + inst = (sljit_u8*)ensure_buf(compiler, 1); + FAIL_IF_NULL(inst); + + inst[0] = SLJIT_INST_JUMP; + } else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_FF; + inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_u8 *inst; + sljit_u8 cond_set; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif /* !SLJIT_CONFIG_X86_64 */ + /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ + sljit_s32 dst_save = dst; + sljit_sw dstw_save = dstw; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + /* setcc = jcc + 0x10. */ + cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); + FAIL_IF(!inst); + INC_SIZE(4 + 3); + /* Set low register to conditional flag. */ + inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; + inst[1] = GROUP_0F; + inst[2] = cond_set; + inst[3] = MOD_REG | reg_lmap[TMP_REG1]; + inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); + inst[5] = OR_rm8_r8; + inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); + INC_SIZE(4 + 4); + /* Set low register to conditional flag. */ + inst[0] = (reg_map[reg] <= 7) ? REX : REX_B; + inst[1] = GROUP_0F; + inst[2] = cond_set; + inst[3] = MOD_REG | reg_lmap[reg]; + inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + /* The movzx instruction does not affect flags. */ + inst[5] = GROUP_0F; + inst[6] = MOVZX_r_rm8; + inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); + + if (reg != TMP_REG1) + return SLJIT_SUCCESS; + + if (GET_OPCODE(op) < SLJIT_ADD) { + compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); + +#else /* !SLJIT_CONFIG_X86_64 */ + SLJIT_ASSERT(reg_map[TMP_REG1] < 4); + + /* The SLJIT_CONFIG_X86_32 code path starts here. */ + if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) { + /* Low byte is accessible. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + inst[0] = GROUP_0F; + inst[1] = cond_set; + inst[2] = U8(MOD_REG | reg_map[dst]); + + inst[3] = GROUP_0F; + inst[4] = MOVZX_r_rm8; + inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2); + FAIL_IF(!inst); + INC_SIZE(3 + 2); + + /* Set low byte to conditional flag. */ + inst[0] = GROUP_0F; + inst[1] = cond_set; + inst[2] = U8(MOD_REG | reg_map[TMP_REG1]); + + inst[3] = OR_rm8_r8; + inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]); + return SLJIT_SUCCESS; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + inst[0] = GROUP_0F; + inst[1] = cond_set; + inst[2] = U8(MOD_REG | reg_map[TMP_REG1]); + + inst[3] = GROUP_0F; + inst[4] = MOVZX_r_rm8; + inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]); + + if (GET_OPCODE(op) < SLJIT_ADD) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_u8* inst; + sljit_uw size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (dst_freg != src2_freg) { + if (dst_freg == src1) { + src1 = src2_freg; + src1w = 0; + type ^= 0x1; + } else + FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0)); + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10); + + size = compiler->size; + FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w)); + + inst[1] = U8(compiler->size - size); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_uw op; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + switch (reg_size) { + case 4: + op = EX86_SSE2; + break; + case 5: + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + op = EX86_SSE2 | VEX_256; + break; + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (!(srcdst & SLJIT_MEM)) + alignment = reg_size; + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 2 || elem_size == 3) { + op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm; + + if (elem_size == 3) + op |= EX86_PREF_66; + + if (type & SLJIT_SIMD_STORE) + op += 1; + } else + return SLJIT_ERR_UNSUPPORTED; + } else { + op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm) + | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3); + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) + return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw); + + return emit_groupf(compiler, op, freg, srcdst, srcdstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); + sljit_u8 *inst; + sljit_u8 opcode = 0; + sljit_uw op; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (!(type & SLJIT_SIMD_FLOAT)) { + CHECK_EXTRA_REGS(src, srcw, (void)0); + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2)) + return SLJIT_ERR_UNSUPPORTED; +#else /* !SLJIT_CONFIG_X86_32 */ + compiler->mode32 = 1; + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) + use_vex = 1; + + if (use_vex && src != SLJIT_IMM) { + op = 0; + + switch (elem_size) { + case 0: + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 1: + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 2: + if (type & SLJIT_SIMD_FLOAT) { + if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM))) + op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + } else if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + default: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!(type & SLJIT_SIMD_FLOAT)) { + if (cpu_feature_list & CPU_FEATURE_AVX2) + op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) + op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } + + if (op != 0) { + if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size >= 3) + compiler->mode32 = 0; +#endif /* SLJIT_CONFIG_X86_64 */ + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + src = freg; + srcw = 0; + } + + if (reg_size == 5) + op |= VEX_256; + + return emit_vex_instruction(compiler, op, freg, 0, src, srcw); + } + } + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) { + if (use_vex) + return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + + return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0); + } + + SLJIT_ASSERT(reg_size == 4); + + if (use_vex) { + if (elem_size == 3) + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw); + + SLJIT_ASSERT(!(src & SLJIT_MEM)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + return emit_byte(compiler, 0); + } + + if (elem_size == 2 && freg != src) { + FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw)); + src = freg; + srcw = 0; + } + + op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2; + FAIL_IF(emit_groupf(compiler, op, freg, src, srcw)); + + if (elem_size == 2) + return emit_byte(compiler, 0); + return SLJIT_SUCCESS; + } + + if (src == SLJIT_IMM) { + if (elem_size == 0) { + srcw = (sljit_u8)srcw; + srcw |= srcw << 8; + srcw |= srcw << 16; + elem_size = 2; + } else if (elem_size == 1) { + srcw = (sljit_u16)srcw; + srcw |= srcw << 16; + elem_size = 2; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 2 && (sljit_s32)srcw == -1) + srcw = -1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (srcw == 0 || srcw == -1) { + if (use_vex) + return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0); + + return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0); + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 3) + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + else +#endif /* SLJIT_CONFIG_X86_64 */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + + src = TMP_REG1; + srcw = 0; + + } + + op = 2; + opcode = MOVD_x_rm; + + switch (elem_size) { + case 0: + if (!FAST_IS_REG(src)) { + opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */; + op = 3; + } + break; + case 1: + if (!FAST_IS_REG(src)) + opcode = PINSRW_x_rm_i8; + break; + case 2: + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + /* MOVQ */ + compiler->mode32 = 0; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } + + if (use_vex) { + if (opcode != MOVD_x_rm) { + op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode; + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw)); + } else + FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw)); + } else { + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + inst[1] = opcode; + + if (op == 3) { + SLJIT_ASSERT(opcode == 0x3a); + inst[2] = PINSRB_x_rm_i8; + } + } + + if (use_vex && elem_size >= 2) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + op = VPBROADCASTD_x_xm; +#else /* !SLJIT_CONFIG_X86_32 */ + op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm; +#endif /* SLJIT_CONFIG_X86_32 */ + return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + } + + SLJIT_ASSERT(reg_size == 4); + + if (opcode != MOVD_x_rm) + FAIL_IF(emit_byte(compiler, 0)); + + switch (elem_size) { + case 0: + if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); + return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0); + } + FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + case 1: + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0)); + FAIL_IF(emit_byte(compiler, 0)); + /* fallthrough */ + default: + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + return emit_byte(compiler, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + compiler->mode32 = 1; + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0)); + return emit_byte(compiler, 0x44); +#endif /* SLJIT_CONFIG_X86_64 */ + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); + sljit_u8 *inst; + sljit_u8 opcode = 0; + sljit_uw op; + sljit_s32 freg_orig = freg; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 srcdst_is_ereg = 0; + sljit_s32 srcdst_orig = 0; + sljit_sw srcdstw_orig = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2) + return SLJIT_ERR_UNSUPPORTED; +#else /* SLJIT_CONFIG_X86_32 */ + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#else /* !SLJIT_CONFIG_X86_64 */ + if (!(type & SLJIT_SIMD_FLOAT)) { + CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1); + + if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) { + srcdst_orig = srcdst; + srcdstw_orig = srcdstw; + srcdst = TMP_REG1; + srcdstw = 0; + } + } +#endif /* SLJIT_CONFIG_X86_64 */ + + if (type & SLJIT_SIMD_LANE_ZERO) { + if (lane_index == 0) { + if (!(type & SLJIT_SIMD_FLOAT)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size == 3) { + compiler->mode32 = 0; + elem_size = 2; + } +#endif /* SLJIT_CONFIG_X86_64 */ + if (srcdst == SLJIT_IMM) { + if (elem_size == 0) + srcdstw = (sljit_u8)srcdstw; + else if (elem_size == 1) + srcdstw = (sljit_u16)srcdstw; + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); + srcdst = TMP_REG1; + srcdstw = 0; + elem_size = 2; + } + + if (elem_size == 2) { + if (use_vex) + return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw); + } + } else if (srcdst & SLJIT_MEM) { + SLJIT_ASSERT(elem_size == 2 || elem_size == 3); + + if (use_vex) + return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw); + return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw); + } else if (elem_size == 3) { + if (use_vex) + return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0); + return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0); + } else if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0)); + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0); + } + } + + if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { + freg = TMP_FREG; + lane_index -= (1 << (4 - elem_size)); + } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw)); + else + FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw)); + srcdst = TMP_FREG; + srcdstw = 0; + } + + op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0) + | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2; + + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, op, freg, freg, 0)); + } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) { + FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_byte(compiler, 1)); + + freg = TMP_FREG; + lane_index -= (1 << (4 - elem_size)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size == 3) { + if (srcdst & SLJIT_MEM) { + if (type & SLJIT_SIMD_STORE) + op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x; + else + op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m; + + /* VEX prefix clears upper bits of the target register. */ + if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG)) + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2 + | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw)); + else + FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw)); + + /* In case of store, freg is not TMP_FREG. */ + } else if (type & SLJIT_SIMD_STORE) { + if (lane_index == 1) { + if (use_vex) + return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); + return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0); + } + if (use_vex) + return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0); + return emit_sse2_load(compiler, 0, srcdst, freg, 0); + } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) { + if (lane_index == 1) + FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); + else + FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0)); + } else { + if (lane_index == 1) + FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0)); + else + FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0)); + } + } else if (type & SLJIT_SIMD_STORE) { + if (lane_index == 0) { + if (use_vex) + return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2 + | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw); + return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg); + } + + if (srcdst & SLJIT_MEM) { + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw)); + else + FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + return emit_byte(compiler, U8(lane_index)); + } + + if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0)); + return emit_byte(compiler, U8(lane_index)); + } + + if (srcdst == freg) + op = SHUFPS_x_xm | EX86_SSE2; + else { + switch (lane_index) { + case 1: + op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2; + break; + case 2: + op = MOVHLPS_x_x | EX86_SSE2; + break; + default: + SLJIT_ASSERT(lane_index == 3); + op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2; + break; + } + } + + FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0)); + + op &= 0xff; + if (op == SHUFPS_x_xm || op == PSHUFD_x_xm) + return emit_byte(compiler, U8(lane_index)); + + return SLJIT_SUCCESS; + } else { + if (lane_index != 0 || (srcdst & SLJIT_MEM)) { + FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw)); + FAIL_IF(emit_byte(compiler, U8(lane_index << 4))); + } else + FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst)); + } + + if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(reg_size == 5); + + if (type & SLJIT_SIMD_LANE_ZERO) { + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + return emit_byte(compiler, 0x4e); + } + + FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + return emit_byte(compiler, 1); + } + + if (srcdst == SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw); + srcdst = TMP_REG1; + srcdstw = 0; + } + + op = 3; + + switch (elem_size) { + case 0: + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8; + break; + case 1: + if (!(type & SLJIT_SIMD_STORE)) { + op = 2; + opcode = PINSRW_x_rm_i8; + } else + opcode = PEXTRW_rm_x_i8; + break; + case 2: + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case 3: + /* PINSRQ / PEXTRQ */ + opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8; + compiler->mode32 = 0; + break; +#endif /* SLJIT_CONFIG_X86_64 */ + } + + if (use_vex && (type & SLJIT_SIMD_STORE)) { + op = opcode | ((op == 3) ? VEX_OP_0F3A : 0); + FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw)); + } else { + inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + + if (op == 3) { + inst[1] = 0x3a; + inst[2] = opcode; + } else + inst[1] = opcode; + } + + FAIL_IF(emit_byte(compiler, U8(lane_index))); + + if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) { + if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) { + SLJIT_ASSERT(reg_size == 5); + + if (type & SLJIT_SIMD_LANE_ZERO) { + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0)); + return emit_byte(compiler, 0x4e); + } + + FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0)); + return emit_byte(compiler, 1); + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (srcdst_orig & SLJIT_MEM) + return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_32 */ + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (elem_size >= 3) + return SLJIT_SUCCESS; + + compiler->mode32 = (type & SLJIT_32); + + op = 2; + + if (elem_size == 0) + op |= EX86_REX; + + if (elem_size == 2) { + if (type & SLJIT_32) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(!(compiler->mode32)); + op = 1; + } + + inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0); + FAIL_IF(!inst); + + if (op != 1) { + inst[0] = GROUP_0F; + inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16); + } else + inst[0] = MOVSXD_r_rm; +#else /* !SLJIT_CONFIG_X86_64 */ + if (elem_size >= 2) + return SLJIT_SUCCESS; + + FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16, + (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0)); + + if (srcdst_orig & SLJIT_MEM) + return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); + sljit_uw pref; + sljit_u8 byte; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 opcode3 = TMP_REG1; +#else /* !SLJIT_CONFIG_X86_32 */ + sljit_s32 opcode3 = SLJIT_S0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + SLJIT_ASSERT(reg_map[opcode3] == 3); + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_FLOAT) { + pref = 0; + byte = U8(src_lane_index); + + if (elem_size == 3) { + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) { + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + + byte = U8(byte | (byte << 2)); + return emit_byte(compiler, U8(byte | (byte << 4))); + } + + if (src_lane_index == 0) { + if (use_vex) + return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0); + return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0); + } + + /* Changes it to SHUFPD_x_xm. */ + pref = EX86_PREF_66; + } else if (elem_size != 2) + return SLJIT_ERR_UNSUPPORTED; + else if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5) { + SLJIT_ASSERT(elem_size == 2); + + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + + byte = 0x44; + if (src_lane_index >= 4) { + byte = 0xee; + src_lane_index -= 4; + } + + FAIL_IF(emit_byte(compiler, byte)); + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0)); + byte = U8(src_lane_index); + } else if (use_vex) { + FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0)); + } else { + if (freg != src) + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0)); + + FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0)); + } + + if (elem_size == 2) { + byte = U8(byte | (byte << 2)); + byte = U8(byte | (byte << 4)); + } else + byte = U8(byte | (byte << 1)); + + return emit_byte(compiler, U8(byte)); + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (elem_size == 0) { + if (reg_size == 5 && src_lane_index >= 16) { + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa)); + src_lane_index &= 0x7; + src = freg; + } + + if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) { + pref = 0; + + if ((src_lane_index & 0x3) == 0) { + pref = EX86_PREF_66; + byte = U8(src_lane_index >> 2); + } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) { + pref = EX86_PREF_F2; + byte = U8(src_lane_index >> 1); + } else { + if (!use_vex) { + if (freg != src) + FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + + FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0)); + } else + FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0)); + + FAIL_IF(emit_byte(compiler, U8(src_lane_index))); + } + + if (pref != 0) { + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + FAIL_IF(emit_byte(compiler, byte)); + } + + src = freg; + } + + if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2)) + return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0); + + SLJIT_ASSERT(reg_size == 4); + FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0)); + return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0); + } + + if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) { + switch (elem_size) { + case 1: + pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + case 2: + pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + default: + pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2; + break; + } + + if (reg_size == 5) + pref |= VEX_256; + + return emit_vex_instruction(compiler, pref, freg, 0, src, 0); + } + + if (reg_size == 5) { + switch (elem_size) { + case 1: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 2; + pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2; + break; + case 2: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 1; + pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2; + break; + case 3: + pref = 0; + break; + default: + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee)); + } + + if (pref != 0) { + FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0)); + byte = U8(byte | (byte << 2)); + FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); + + if (src_lane_index == 0) + return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + + src = freg; + } + + FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0)); + byte = U8(src_lane_index); + byte = U8(byte | (byte << 2)); + return emit_byte(compiler, U8(byte | (byte << 4))); + } + + switch (elem_size) { + case 1: + byte = U8(src_lane_index & 0x3); + src_lane_index >>= 1; + pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3; + + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0)); + byte = U8(byte | (byte << 2)); + FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4)))); + + if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2) + return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0); + + src = freg; + /* fallthrough */ + case 2: + byte = U8(src_lane_index); + byte = U8(byte | (byte << 2)); + break; + default: + byte = U8(src_lane_index << 1); + byte = U8(byte | (byte << 2) | 0x4); + break; + } + + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0)); + else + FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0)); + return emit_byte(compiler, U8(byte | (byte << 4))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); + sljit_u8 opcode; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + use_vex = 1; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_FLOAT) { + if (elem_size != 2 || elem2_size != 3) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (use_vex) + return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw); + return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw); + } + + switch (elem_size) { + case 0: + if (elem2_size == 1) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm; + else if (elem2_size == 2) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm; + else if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + case 1: + if (elem2_size == 2) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm; + else if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + case 2: + if (elem2_size == 3) + opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm; + else + return SLJIT_ERR_UNSUPPORTED; + break; + default: + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (use_vex) + return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw); + return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX); + sljit_s32 dst_r; + sljit_uw op; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)) + return SLJIT_ERR_UNSUPPORTED; + + if (reg_size == 4) { + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + op = EX86_PREF_66 | EX86_SSE2_OP2; + + switch (elem_size) { + case 1: + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0)); + freg = TMP_FREG; + break; + case 2: + op = EX86_SSE2_OP2; + break; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x; + + if (use_vex) + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); + else + FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = type & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (elem_size == 1) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + } + + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; + } + + if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (elem_size == 1) { + FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0)); + FAIL_IF(emit_byte(compiler, 1)); + FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0)); + FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0)); + } else { + op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2; + + if (elem_size == 0) + op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2; + else if (elem_size == 3) + op |= EX86_PREF_66; + + FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0)); + } + + if (dst_r == TMP_REG1) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = type & SLJIT_32; +#endif /* SLJIT_CONFIG_X86_64 */ + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src_freg) +{ + sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2; + + SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4); + + if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3) + op |= EX86_PREF_66; + + return emit_groupf(compiler, op, dst_freg, src_freg, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_uw op = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif /* SLJIT_CONFIG_X86_64 */ + + if (reg_size == 5) { + if (!(cpu_feature_list & CPU_FEATURE_AVX2)) + return SLJIT_ERR_UNSUPPORTED; + } else if (reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + case SLJIT_SIMD_OP2_OR: + op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + case SLJIT_SIMD_OP2_XOR: + op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm; + + if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3) + op |= EX86_PREF_66; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) { + if (reg_size == 5) + op |= VEX_256; + + return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0); + } + + if (dst_freg != src1_freg) { + if (dst_freg == src2_freg) + src2_freg = src1_freg; + else + FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg)); + } + + FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_uw pref; + sljit_s32 free_reg = TMP_REG1; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_sw srcw = 0; + sljit_sw tempw = 0; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + CHECK_EXTRA_REGS(src_reg, srcw, (void)0); + CHECK_EXTRA_REGS(temp_reg, tempw, (void)0); + + SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP)); + SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP)); + + op = GET_OPCODE(op); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) { + /* Src is virtual register or its low byte is not accessible. */ + SLJIT_ASSERT(src_reg != SLJIT_R1); + free_reg = src_reg; + + EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw); + src_reg = TMP_REG1; + + if (mem_reg == src_reg) + mem_reg = TMP_REG1; + } +#endif /* SLJIT_CONFIG_X86_32 */ + + if (temp_reg != SLJIT_R0) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + + EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0); + + if (src_reg == SLJIT_R0) + src_reg = free_reg; + if (mem_reg == SLJIT_R0) + mem_reg = free_reg; +#else /* !SLJIT_CONFIG_X86_64 */ + if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + + mem_reg = SLJIT_R1; + free_reg = SLJIT_R1; + } else { + EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw); + + if (src_reg == SLJIT_R0) + src_reg = free_reg; + if (mem_reg == SLJIT_R0) + mem_reg = free_reg; + } +#endif /* SLJIT_CONFIG_X86_64 */ + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P; +#endif /* SLJIT_CONFIG_X86_64 */ + + /* Lock prefix. */ + FAIL_IF(emit_byte(compiler, GROUP_LOCK)); + + pref = 0; + if (op == SLJIT_MOV_U16) + pref = EX86_HALF_ARG | EX86_PREF_66; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_MOV_U8) + pref = EX86_REX; +#endif /* SLJIT_CONFIG_X86_64 */ + + FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0)); + + if (temp_reg != SLJIT_R0) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0); +#else /* !SLJIT_CONFIG_X86_64 */ + EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0); + if (free_reg != TMP_REG1) + return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (NOT_HALFWORD(offset)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + return compiler->error; +#else + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); +#endif + } +#endif + + if (offset != 0) + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + sljit_u8 *inst; + struct sljit_const *const_; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, init_value)) + return NULL; +#else + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) + return NULL; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1); + PTR_FAIL_IF(!inst); + + inst[0] = SLJIT_INST_CONST; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; +#endif + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_jump *jump; + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif /* SLJIT_CONFIG_X86_64 */ + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_mov_addr(jump, compiler, 0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0)); + jump->addr = compiler->size; + + if (reg_map[reg] >= 8) + jump->flags |= MOV_ADDR_HI; +#else /* !SLJIT_CONFIG_X86_64 */ + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ + + inst = (sljit_u8*)ensure_buf(compiler, 1); + PTR_FAIL_IF(!inst); + + inst[0] = SLJIT_INST_MOV_ADDR; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0)); +#endif /* SLJIT_CONFIG_X86_64 */ + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); +#else + sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); +#endif + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0); + sljit_unaligned_store_sw((void*)addr, new_constant); + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1); +} diff --git a/src/sljit/sljitSerialize.c b/src/sljit/sljitSerialize.c new file mode 100644 index 0000000..6ef161f --- /dev/null +++ b/src/sljit/sljitSerialize.c @@ -0,0 +1,516 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump) +{ + return !(jump->flags & JUMP_ADDR) && (jump->u.label != NULL); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_ADDR) != 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump) +{ + return (jump->flags & JUMP_MOV_ADDR) != 0; +} + +#define SLJIT_SERIALIZE_DEBUG ((sljit_u16)0x1) + +struct sljit_serialized_compiler { + sljit_u32 signature; + sljit_u16 version; + sljit_u16 cpu_type; + + sljit_uw buf_segment_count; + sljit_uw label_count; + sljit_uw jump_count; + sljit_uw const_count; + + sljit_s32 options; + sljit_s32 scratches; + sljit_s32 saveds; + sljit_s32 fscratches; + sljit_s32 fsaveds; + sljit_s32 local_size; + sljit_uw size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args_size; +#endif /* SLJIT_CONFIG_X86_32 */ + +#if ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif /* (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + sljit_uw cpool_diff; + sljit_uw cpool_fill; + sljit_uw patches; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +}; + +struct sljit_serialized_debug_info { + sljit_sw last_flags; + sljit_s32 last_return; + sljit_s32 logical_local_size; +}; + +struct sljit_serialized_label { + sljit_uw size; +}; + +struct sljit_serialized_jump { + sljit_uw addr; + sljit_uw flags; + sljit_uw value; +}; + +struct sljit_serialized_const { + sljit_uw addr; +}; + +#define SLJIT_SERIALIZE_ALIGN(v) (((v) + sizeof(sljit_uw) - 1) & ~(sljit_uw)(sizeof(sljit_uw) - 1)) +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_SERIALIZE_SIGNATURE 0x534c4a54 +#else /* !SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_SIGNATURE 0x544a4c53 +#endif /* SLJIT_LITTLE_ENDIAN */ +#define SLJIT_SERIALIZE_VERSION 1 + +SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, + sljit_s32 options, sljit_uw *size) +{ + sljit_uw serialized_size = sizeof(struct sljit_serialized_compiler); + struct sljit_memory_fragment *buf; + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + sljit_uw counter, used_size; + sljit_u8 *result; + sljit_u8 *ptr; + SLJIT_UNUSED_ARG(options); + + if (size != NULL) + *size = 0; + + PTR_FAIL_IF(compiler->error); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) + serialized_size += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + serialized_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + + /* Compute the size of the data. */ + buf = compiler->buf; + while (buf != NULL) { + serialized_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size); + buf = buf->next; + } + + serialized_size += compiler->label_count * sizeof(struct sljit_serialized_label); + + jump = compiler->jumps; + while (jump != NULL) { + serialized_size += sizeof(struct sljit_serialized_jump); + jump = jump->next; + } + + const_ = compiler->consts; + while (const_ != NULL) { + serialized_size += sizeof(struct sljit_serialized_const); + const_ = const_->next; + } + + result = (sljit_u8*)SLJIT_MALLOC(serialized_size, compiler->allocator_data); + PTR_FAIL_IF_NULL(result); + + if (size != NULL) + *size = serialized_size; + + ptr = result; + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + ptr += sizeof(struct sljit_serialized_compiler); + + serialized_compiler->signature = SLJIT_SERIALIZE_SIGNATURE; + serialized_compiler->version = SLJIT_SERIALIZE_VERSION; + serialized_compiler->cpu_type = 0; + serialized_compiler->label_count = compiler->label_count; + serialized_compiler->options = compiler->options; + serialized_compiler->scratches = compiler->scratches; + serialized_compiler->saveds = compiler->saveds; + serialized_compiler->fscratches = compiler->fscratches; + serialized_compiler->fsaveds = compiler->fsaveds; + serialized_compiler->local_size = compiler->local_size; + serialized_compiler->size = compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + serialized_compiler->status_flags_state = compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + serialized_compiler->args_size = compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + serialized_compiler->cpool_diff = compiler->cpool_diff; + serialized_compiler->cpool_fill = compiler->cpool_fill; + serialized_compiler->patches = compiler->patches; + + SLJIT_MEMCPY(ptr, compiler->cpool, compiler->cpool_fill * sizeof(sljit_uw)); + SLJIT_MEMCPY(ptr + compiler->cpool_fill * sizeof(sljit_uw), compiler->cpool_unique, compiler->cpool_fill); + ptr += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1)); +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + serialized_compiler->delay_slot = compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + + buf = compiler->buf; + counter = 0; + while (buf != NULL) { + used_size = buf->used_size; + *(sljit_uw*)ptr = used_size; + ptr += sizeof(sljit_uw); + SLJIT_MEMCPY(ptr, buf->memory, used_size); + ptr += SLJIT_SERIALIZE_ALIGN(used_size); + buf = buf->next; + counter++; + } + serialized_compiler->buf_segment_count = counter; + + label = compiler->labels; + while (label != NULL) { + serialized_label = (struct sljit_serialized_label*)ptr; + serialized_label->size = label->size; + ptr += sizeof(struct sljit_serialized_label); + label = label->next; + } + + jump = compiler->jumps; + counter = 0; + while (jump != NULL) { + serialized_jump = (struct sljit_serialized_jump*)ptr; + serialized_jump->addr = jump->addr; + serialized_jump->flags = jump->flags; + + if (jump->flags & JUMP_ADDR) + serialized_jump->value = jump->u.target; + else if (jump->u.label != NULL) + serialized_jump->value = jump->u.label->u.index; + else + serialized_jump->value = SLJIT_MAX_ADDRESS; + + ptr += sizeof(struct sljit_serialized_jump); + jump = jump->next; + counter++; + } + serialized_compiler->jump_count = counter; + + const_ = compiler->consts; + counter = 0; + while (const_ != NULL) { + serialized_const = (struct sljit_serialized_const*)ptr; + serialized_const->addr = const_->addr; + ptr += sizeof(struct sljit_serialized_const); + const_ = const_->next; + counter++; + } + serialized_compiler->const_count = counter; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) { + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + serialized_debug_info->last_flags = compiler->last_flags; + serialized_debug_info->last_return = compiler->last_return; + serialized_debug_info->logical_local_size = compiler->logical_local_size; + serialized_compiler->cpu_type |= SLJIT_SERIALIZE_DEBUG; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + ptr += sizeof(struct sljit_serialized_debug_info); +#endif /* SLJIT_DEBUG */ + } +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + SLJIT_ASSERT((sljit_uw)(ptr - result) == serialized_size); + return (sljit_uw*)result; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, + sljit_s32 options, void *allocator_data) +{ + struct sljit_compiler *compiler; + struct sljit_serialized_compiler *serialized_compiler; + struct sljit_serialized_label *serialized_label; + struct sljit_serialized_jump *serialized_jump; + struct sljit_serialized_const *serialized_const; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_serialized_debug_info *serialized_debug_info; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *last_buf; + struct sljit_label *label; + struct sljit_label *last_label; + struct sljit_label **label_list = NULL; + struct sljit_jump *jump; + struct sljit_jump *last_jump; + struct sljit_const *const_; + struct sljit_const *last_const; + sljit_u8 *ptr = (sljit_u8*)buffer; + sljit_u8 *end = ptr + size; + sljit_uw i, used_size, aligned_size, label_count; + SLJIT_UNUSED_ARG(options); + + if (size < sizeof(struct sljit_serialized_compiler) || (size & (sizeof(sljit_uw) - 1)) != 0) + return NULL; + + serialized_compiler = (struct sljit_serialized_compiler*)ptr; + + if (serialized_compiler->signature != SLJIT_SERIALIZE_SIGNATURE || serialized_compiler->version != SLJIT_SERIALIZE_VERSION) + return NULL; + + compiler = sljit_create_compiler(allocator_data); + PTR_FAIL_IF(compiler == NULL); + + compiler->label_count = serialized_compiler->label_count; + compiler->options = serialized_compiler->options; + compiler->scratches = serialized_compiler->scratches; + compiler->saveds = serialized_compiler->saveds; + compiler->fscratches = serialized_compiler->fscratches; + compiler->fsaveds = serialized_compiler->fsaveds; + compiler->local_size = serialized_compiler->local_size; + compiler->size = serialized_compiler->size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = serialized_compiler->status_flags_state; +#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->args_size = serialized_compiler->args_size; +#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */ + +#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) + used_size = serialized_compiler->cpool_fill; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size * (sizeof(sljit_uw) + 1)); + compiler->cpool_diff = serialized_compiler->cpool_diff; + compiler->cpool_fill = used_size; + compiler->patches = serialized_compiler->patches; + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + SLJIT_MEMCPY(compiler->cpool, ptr, used_size * sizeof(sljit_uw)); + SLJIT_MEMCPY(compiler->cpool_unique, ptr + used_size * sizeof(sljit_uw), used_size); + ptr += aligned_size; +#endif /* SLJIT_CONFIG_ARM_V6 */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = serialized_compiler->delay_slot; +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (!(serialized_compiler->cpu_type & SLJIT_SERIALIZE_DEBUG)) + goto error; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + ptr += sizeof(struct sljit_serialized_compiler); + i = serialized_compiler->buf_segment_count; + last_buf = NULL; + while (i > 0) { + if ((sljit_uw)(end - ptr) < sizeof(sljit_uw)) + goto error; + + used_size = *(sljit_uw*)ptr; + aligned_size = SLJIT_SERIALIZE_ALIGN(used_size); + ptr += sizeof(sljit_uw); + + if ((sljit_uw)(end - ptr) < aligned_size) + goto error; + + if (last_buf == NULL) { + SLJIT_ASSERT(compiler->buf != NULL && compiler->buf->next == NULL); + buf = compiler->buf; + } else { + buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + if (!buf) + goto error; + buf->next = NULL; + } + + buf->used_size = used_size; + SLJIT_MEMCPY(buf->memory, ptr, used_size); + + if (last_buf != NULL) + last_buf->next = buf; + last_buf = buf; + + ptr += aligned_size; + i--; + } + + last_label = NULL; + label_count = serialized_compiler->label_count; + if ((sljit_uw)(end - ptr) < label_count * sizeof(struct sljit_serialized_label)) + goto error; + + label_list = (struct sljit_label **)SLJIT_MALLOC(label_count * sizeof(struct sljit_label*), allocator_data); + if (label_list == NULL) + goto error; + + for (i = 0; i < label_count; i++) { + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + if (label == NULL) + goto error; + + serialized_label = (struct sljit_serialized_label*)ptr; + label->next = NULL; + label->u.index = i; + label->size = serialized_label->size; + + if (last_label != NULL) + last_label->next = label; + else + compiler->labels = label; + last_label = label; + + label_list[i] = label; + ptr += sizeof(struct sljit_serialized_label); + } + compiler->last_label = last_label; + + last_jump = NULL; + i = serialized_compiler->jump_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_jump)) + goto error; + + while (i > 0) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + if (jump == NULL) + goto error; + + serialized_jump = (struct sljit_serialized_jump*)ptr; + jump->next = NULL; + jump->addr = serialized_jump->addr; + jump->flags = serialized_jump->flags; + + if (!(serialized_jump->flags & JUMP_ADDR)) { + if (serialized_jump->value != SLJIT_MAX_ADDRESS) { + if (serialized_jump->value >= label_count) + goto error; + jump->u.label = label_list[serialized_jump->value]; + } else + jump->u.label = NULL; + } else + jump->u.target = serialized_jump->value; + + if (last_jump != NULL) + last_jump->next = jump; + else + compiler->jumps = jump; + last_jump = jump; + + ptr += sizeof(struct sljit_serialized_jump); + i--; + } + compiler->last_jump = last_jump; + + SLJIT_FREE(label_list, allocator_data); + label_list = NULL; + + last_const = NULL; + i = serialized_compiler->const_count; + if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_const)) + goto error; + + while (i > 0) { + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + if (const_ == NULL) + goto error; + + serialized_const = (struct sljit_serialized_const*)ptr; + const_->next = NULL; + const_->addr = serialized_const->addr; + + if (last_const != NULL) + last_const->next = const_; + else + compiler->consts = const_; + last_const = const_; + + ptr += sizeof(struct sljit_serialized_const); + i--; + } + compiler->last_const = last_const; + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if ((sljit_uw)(end - ptr) < sizeof(struct sljit_serialized_debug_info)) + goto error; + + serialized_debug_info = (struct sljit_serialized_debug_info*)ptr; + compiler->last_flags = (sljit_s32)serialized_debug_info->last_flags; + compiler->last_return = serialized_debug_info->last_return; + compiler->logical_local_size = serialized_debug_info->logical_local_size; +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ + + return compiler; + +error: + sljit_free_compiler(compiler); + if (label_list != NULL) + SLJIT_FREE(label_list, allocator_data); + return NULL; +} diff --git a/src/sljit/sljitUtils.c b/src/sljit/sljitUtils.c new file mode 100644 index 0000000..967593b --- /dev/null +++ b/src/sljit/sljitUtils.c @@ -0,0 +1,344 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ------------------------------------------------------------------------ */ +/* Locks */ +/* ------------------------------------------------------------------------ */ + +/* Executable Allocator */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !(defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#define SLJIT_ALLOCATOR_LOCK() +#define SLJIT_ALLOCATOR_UNLOCK() +#elif !(defined _WIN32) +#include + +static pthread_mutex_t allocator_lock = PTHREAD_MUTEX_INITIALIZER; + +#define SLJIT_ALLOCATOR_LOCK() pthread_mutex_lock(&allocator_lock) +#define SLJIT_ALLOCATOR_UNLOCK() pthread_mutex_unlock(&allocator_lock) +#else /* windows */ +static HANDLE allocator_lock; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + HANDLE lock; + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { + lock = CreateMutex(NULL, FALSE, NULL); + if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) + CloseHandle(lock); + } + WaitForSingleObject(allocator_lock, INFINITE); +} + +#define SLJIT_ALLOCATOR_LOCK() allocator_grab_lock() +#define SLJIT_ALLOCATOR_UNLOCK() ReleaseMutex(allocator_lock) +#endif /* thread implementation */ +#endif /* SLJIT_EXECUTABLE_ALLOCATOR && !SLJIT_WX_EXECUTABLE_ALLOCATOR */ + +/* ------------------------------------------------------------------------ */ +/* Stack */ +/* ------------------------------------------------------------------------ */ + +#if ((defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + && !(defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION)) \ + || ((defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !((defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) \ + || (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR))) + +#ifndef _WIN32 +/* Provides mmap function. */ +#include +#include + +#ifndef MAP_ANON +#ifdef MAP_ANONYMOUS +#define MAP_ANON MAP_ANONYMOUS +#endif /* MAP_ANONYMOUS */ +#endif /* !MAP_ANON */ + +#ifndef MAP_ANON + +#include + +#ifdef O_CLOEXEC +#define SLJIT_CLOEXEC O_CLOEXEC +#else /* !O_CLOEXEC */ +#define SLJIT_CLOEXEC 0 +#endif /* O_CLOEXEC */ + +/* Some old systems do not have MAP_ANON. */ +static int dev_zero = -1; + +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + +static SLJIT_INLINE int open_dev_zero(void) +{ + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + return dev_zero < 0; +} + +#else /* !SLJIT_SINGLE_THREADED */ + +#include + +static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE int open_dev_zero(void) +{ + pthread_mutex_lock(&dev_zero_mutex); + if (SLJIT_UNLIKELY(dev_zero < 0)) + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + pthread_mutex_unlock(&dev_zero_mutex); + return dev_zero < 0; +} + +#endif /* SLJIT_SINGLE_THREADED */ +#undef SLJIT_CLOEXEC +#endif /* !MAP_ANON */ +#endif /* !_WIN32 */ +#endif /* open_dev_zero */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#ifdef _WIN32 + +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + SYSTEM_INFO si; + static sljit_uw sljit_page_align = 0; + if (!sljit_page_align) { + GetSystemInfo(&si); + sljit_page_align = (sljit_uw)si.dwPageSize - 1; + } + return sljit_page_align; +} + +#else + +#include + +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + static sljit_uw sljit_page_align = 0; + + sljit_sw align; + + if (!sljit_page_align) { +#ifdef _SC_PAGESIZE + align = sysconf(_SC_PAGESIZE); +#else + align = getpagesize(); +#endif + /* Should never happen. */ + if (align < 0) + align = 4096; + sljit_page_align = (sljit_uw)align - 1; + } + return sljit_page_align; +} + +#endif /* _WIN32 */ + +#endif /* get_page_alignment() */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +#if (defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + ptr = SLJIT_MALLOC(max_size, allocator_data); + if (ptr == NULL) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_FREE((void*)stack->min_start, allocator_data); + SLJIT_FREE(stack, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + stack->start = new_start; + return new_start; +} + +#else /* !SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#ifdef _WIN32 + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + VirtualFree((void*)stack->min_start, 0, MEM_RELEASE); + SLJIT_FREE(stack, allocator_data); +} + +#else /* !_WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start)); + SLJIT_FREE(stack, allocator_data); +} + +#endif /* _WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + sljit_uw page_align; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + /* Align max_size. */ + page_align = get_page_alignment(); + max_size = (max_size + page_align) & ~page_align; + +#ifdef _WIN32 + ptr = VirtualAlloc(NULL, max_size, MEM_RESERVE, PAGE_READWRITE); + if (!ptr) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end; + + if (sljit_stack_resize(stack, stack->end - start_size) == NULL) { + sljit_free_stack(stack, allocator_data); + return NULL; + } +#else /* !_WIN32 */ +#ifdef MAP_ANON + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); +#endif /* MAP_ANON */ + if (ptr == MAP_FAILED) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; +#endif /* _WIN32 */ + + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ +#if defined _WIN32 || defined(POSIX_MADV_DONTNEED) + sljit_uw aligned_old_start; + sljit_uw aligned_new_start; + sljit_uw page_align; +#endif + + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + +#ifdef _WIN32 + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + if (aligned_new_start != aligned_old_start) { + if (aligned_new_start < aligned_old_start) { + if (!VirtualAlloc((void*)aligned_new_start, aligned_old_start - aligned_new_start, MEM_COMMIT, PAGE_READWRITE)) + return NULL; + } + else { + if (!VirtualFree((void*)aligned_old_start, aligned_new_start - aligned_old_start, MEM_DECOMMIT)) + return NULL; + } + } +#elif defined(POSIX_MADV_DONTNEED) + if (stack->start < new_start) { + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + + if (aligned_new_start > aligned_old_start) { + posix_madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, POSIX_MADV_DONTNEED); +#ifdef MADV_FREE + madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, MADV_FREE); +#endif /* MADV_FREE */ + } + } +#endif /* _WIN32 */ + + stack->start = new_start; + return new_start; +} + +#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#endif /* SLJIT_UTIL_STACK */ diff --git a/test-driver b/test-driver new file mode 100755 index 0000000..be73b80 --- /dev/null +++ b/test-driver @@ -0,0 +1,153 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <"$log_file" +"$@" >>"$log_file" 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>"$log_file" + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/testdata/grepbinary b/testdata/grepbinary new file mode 100644 index 0000000000000000000000000000000000000000..5efa1302049e4541d1acf53b63d6caaf5f9c439c GIT binary patch literal 45 zcmWH^NL45-%}mZ#NGi%N&r?Was8Gl%%`GTa$S+GRQYZmR=Ok8DDx~D6>u~`9Z_*E= literal 0 HcmV?d00001 diff --git a/testdata/grepfilelist b/testdata/grepfilelist new file mode 100644 index 0000000..dd73ec7 --- /dev/null +++ b/testdata/grepfilelist @@ -0,0 +1,3 @@ +testdata/grepinputv + +testdata/grepinputx diff --git a/testdata/grepinput b/testdata/grepinput new file mode 100644 index 0000000..1e2ceb4 --- /dev/null +++ b/testdata/grepinput @@ -0,0 +1,624 @@ +This is a file of miscellaneous text that is used as test data for checking +that the pcregrep command is working correctly. The file must be more than +24KiB long so that it needs more than a single read() call to process it. New +features should be added at the end, because some of the tests involve the +output of line numbers, and we don't want these to change. + +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. + +This pattern is in lower case. + +Here follows a whole lot of stuff that makes the file over 24KiB long. + +------------------------------------------------------------------------------- +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. + +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick +brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. +------------------------------------------------------------------------------- + +aaaaa0 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +bbbbbb +cccccccccccccccccccccccccccccccccccccccccc +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +eeeee +aaaaa2 +ffffffffff + +This is a line before the binary zero. +This line contains a binary zero here >< for testing. +This is a line after the binary zero. + +ABOVE the elephant +ABOVE +ABOVE theatre +AB.VE +AB.VE the turtle + +010203040506 + +match 1: + a +match 2: + b +match 3: + c +match 4: + d +match 5: + e +Rhubarb +Custard Tart + +PUT NEW DATA ABOVE THIS LINE. +============================= + +Check up on PATTERN near the end. +This is the last line of this file. diff --git a/testdata/grepinput3 b/testdata/grepinput3 new file mode 100644 index 0000000..7409cfc --- /dev/null +++ b/testdata/grepinput3 @@ -0,0 +1,15 @@ +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt diff --git a/testdata/grepinput8 b/testdata/grepinput8 new file mode 100644 index 0000000..a5cfee3 --- /dev/null +++ b/testdata/grepinput8 @@ -0,0 +1,17 @@ +X one +X two X three X four X five +X six +X seven…X eight
X nine
X ten + +Before 111 +Before 222
Before 333…Match +After 111 +After 222
After 333 +And so on and so on +And so on and so on +Å¿ +ſſſſſ +ÁabcÁ KkK + +A1 +A1 diff --git a/testdata/grepinputC.bz2 b/testdata/grepinputC.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..457047ad79db1a54081ce7d7acb9c6e21a801d27 GIT binary patch literal 313 zcmV-90mlA9T4*^jL0KkKS!;yVs{jB#*MI;JKm{N`L;yeU|KKnITS1`GpQxUuN2#Mm zfB)%BK@n&rn7%IEaJII!85qAQC4) zatXr7o{Uma0&sCLS6CoposBnRD!bX%A>lmGbQglzso?4AjD6YtfL|gZ(?WQL`n2tg z^RjuIzHM9RwLuNM*zR3XUx%UF4ep*r#^^r^zvAvl LrwS4caGJGXhzXKq literal 0 HcmV?d00001 diff --git a/testdata/grepinputC.gz b/testdata/grepinputC.gz new file mode 100644 index 0000000000000000000000000000000000000000..c7ff390e08374563b11de5e5108ab79d9a54fd6d GIT binary patch literal 311 zcmV-70m%LziwFn|E$(6f17~t&aA|IEb#!<`0DV$HZp0uAy!#b?z-s@{TaP{D18iy% zs{~STlIHgvlT}ZvN>Q+3JYx?Gvo^*TxXD=ZCr3@fC9>vqGmfRQ&BE(gWva5WNwan* zdY39**AIi4)XZTDnsN4VRAskY2S@10o-lhG+_68#W#EAHxe0bESyLo@( zEaTTTN8Xl)RHrIg$c{obR}qS3s}yw?YWhP>cF80<%|=-r=xOS^c6c)=(`tg;E3ZIf zUqNuK4)0MX^1!)z0FrE{y8y>}W4KF0zwPgG+}oG_56>6iMyJs10>BIN1Ah*8`UO_| JRoNy2006TFl4k$_ literal 0 HcmV?d00001 diff --git a/testdata/grepinputM b/testdata/grepinputM new file mode 100644 index 0000000..9119e3d --- /dev/null +++ b/testdata/grepinputM @@ -0,0 +1,17 @@ +Data file for multiline tests of multiple matches. + +start end in between start +end and following +Other stuff + +start end in between start +end and following start +end other stuff + +start end in between start + +end + +** These two lines must be last. +start end in between start +end diff --git a/testdata/grepinputv b/testdata/grepinputv new file mode 100644 index 0000000..366d4fb --- /dev/null +++ b/testdata/grepinputv @@ -0,0 +1,9 @@ +The quick brown +fox jumps +over the lazy dog. +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate diff --git a/testdata/grepinputx b/testdata/grepinputx new file mode 100644 index 0000000..730cc8a --- /dev/null +++ b/testdata/grepinputx @@ -0,0 +1,43 @@ +This is a second file of input for the pcregrep tests. + +Here is the pattern again. + +Pattern +That time it was on a line by itself. + +To pat or not to pat, that is the question. + +complete pair +of lines + +That was a complete pair +of lines all by themselves. + +complete pair +of lines + +And there they were again, to check line numbers. + +one +two +three +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. diff --git a/testdata/greplist b/testdata/greplist new file mode 100644 index 0000000..1434ae9 --- /dev/null +++ b/testdata/greplist @@ -0,0 +1,7 @@ +This is a file of patterns for testing the -f option. Don't include any blank +lines because they will match everything! This is no longer true, so have one. + +pattern +line by itself + +End of the list of patterns. diff --git a/testdata/grepnot.bz2 b/testdata/grepnot.bz2 new file mode 100644 index 0000000..730cc8a --- /dev/null +++ b/testdata/grepnot.bz2 @@ -0,0 +1,43 @@ +This is a second file of input for the pcregrep tests. + +Here is the pattern again. + +Pattern +That time it was on a line by itself. + +To pat or not to pat, that is the question. + +complete pair +of lines + +That was a complete pair +of lines all by themselves. + +complete pair +of lines + +And there they were again, to check line numbers. + +one +two +three +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. diff --git a/testdata/grepoutput b/testdata/grepoutput new file mode 100644 index 0000000..d9233c2 --- /dev/null +++ b/testdata/grepoutput @@ -0,0 +1,1255 @@ +---------------------------- Test 1 ------------------------------ +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. +Check up on PATTERN near the end. +RC=0 +---------------------------- Test 2 ------------------------------ +PATTERN at the start of a line. +RC=0 +---------------------------- Test 3 ------------------------------ +7:PATTERN at the start of a line. +8:In the middle of a line, PATTERN appears. +10:This pattern is in lower case. +623:Check up on PATTERN near the end. +RC=0 +---------------------------- Test 4 ------------------------------ +4 +RC=0 +---------------------------- Test 5 ------------------------------ +./testdata/grepinput:7:PATTERN at the start of a line. +./testdata/grepinput:8:In the middle of a line, PATTERN appears. +./testdata/grepinput:10:This pattern is in lower case. +./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinputx:3:Here is the pattern again. +./testdata/grepinputx:5:Pattern +./testdata/grepinputx:42:This line contains pattern not on a line by itself. +RC=0 +---------------------------- Test 6 ------------------------------ +7:PATTERN at the start of a line. +8:In the middle of a line, PATTERN appears. +10:This pattern is in lower case. +623:Check up on PATTERN near the end. +3:Here is the pattern again. +5:Pattern +42:This line contains pattern not on a line by itself. +RC=0 +---------------------------- Test 7 ------------------------------ +./testdata/grepinput +./testdata/grepinputx +RC=0 +---------------------------- Test 8 ------------------------------ +./testdata/grepinput +RC=0 +---------------------------- Test 9 ------------------------------ +RC=0 +---------------------------- Test 10 ----------------------------- +RC=1 +---------------------------- Test 11 ----------------------------- +1:This is a second file of input for the pcregrep tests. +2: +4: +5:Pattern +6:That time it was on a line by itself. +7: +8:To pat or not to pat, that is the question. +9: +10:complete pair +11:of lines +12: +13:That was a complete pair +14:of lines all by themselves. +15: +16:complete pair +17:of lines +18: +19:And there they were again, to check line numbers. +20: +21:one +22:two +23:three +24:four +25:five +26:six +27:seven +28:eight +29:nine +30:ten +31:eleven +32:twelve +33:thirteen +34:fourteen +35:fifteen +36:sixteen +37:seventeen +38:eighteen +39:nineteen +40:twenty +41: +43:This is the last line of this file. +RC=0 +---------------------------- Test 12 ----------------------------- +Pattern +RC=0 +---------------------------- Test 13 ----------------------------- +Here is the pattern again. +That time it was on a line by itself. +seventeen +This line contains pattern not on a line by itself. +RC=0 +---------------------------- Test 14 ----------------------------- +./testdata/grepinputx:To pat or not to pat, that is the question. +RC=0 +---------------------------- Test 15 ----------------------------- +pcre2grep: Error in command-line regex at offset 4: quantifier does not follow a repeatable item +RC=2 +---------------------------- Test 16 ----------------------------- +pcre2grep: Failed to open ./testdata/nonexistfile: No such file or directory +RC=2 +---------------------------- Test 17 ----------------------------- +features should be added at the end, because some of the tests involve the +output of line numbers, and we don't want these to change. +RC=0 +---------------------------- Test 18 ----------------------------- +4:features should be added at the end, because some of the tests involve the +output of line numbers, and we don't want these to change. +583:brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. +------------------------------------------------------------------------------- +RC=0 +---------------------------- Test 19 ----------------------------- +Pattern +RC=0 +---------------------------- Test 20 ----------------------------- +10:complete pair +of lines +16:complete pair +of lines +RC=0 +---------------------------- Test 21 ----------------------------- +24:four +25-five +26-six +27-seven +-- +34:fourteen +35-fifteen +36-sixteen +37-seventeen +RC=0 +---------------------------- Test 22 ----------------------------- +21-one +22-two +23-three +24:four +-- +31-eleven +32-twelve +33-thirteen +34:fourteen +RC=0 +---------------------------- Test 23 ----------------------------- +one +two +three +four +five +six +seven +-- +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +RC=0 +---------------------------- Test 24 ----------------------------- +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. +RC=0 +---------------------------- Test 25 ----------------------------- +15- +16-complete pair +17-of lines +18- +19-And there they were again, to check line numbers. +20- +21-one +22-two +23-three +24:four +25-five +26-six +27-seven +28-eight +29-nine +30-ten +31-eleven +32-twelve +33-thirteen +34:fourteen +RC=0 +---------------------------- Test 26 ----------------------------- + +complete pair +of lines + +And there they were again, to check line numbers. + +one +two +three +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. +RC=0 +---------------------------- Test 27 ----------------------------- +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. +RC=0 +---------------------------- Test 28 ----------------------------- +14-of lines all by themselves. +15- +16-complete pair +17-of lines +18- +19-And there they were again, to check line numbers. +20- +21-one +22-two +23-three +24:four +25-five +26-six +27-seven +28-eight +29-nine +30-ten +31-eleven +32-twelve +33-thirteen +34:fourteen +RC=0 +---------------------------- Test 29 ----------------------------- +of lines all by themselves. + +complete pair +of lines + +And there they were again, to check line numbers. + +one +two +three +four +five +six +seven +eight +nine +ten +eleven +twelve +thirteen +fourteen +fifteen +sixteen +seventeen +eighteen +nineteen +twenty + +This line contains pattern not on a line by itself. +This is the last line of this file. +RC=0 +---------------------------- Test 30 ----------------------------- +./testdata/grepinput-4-features should be added at the end, because some of the tests involve the +./testdata/grepinput-5-output of line numbers, and we don't want these to change. +./testdata/grepinput-6- +./testdata/grepinput:7:PATTERN at the start of a line. +./testdata/grepinput:8:In the middle of a line, PATTERN appears. +./testdata/grepinput-9- +./testdata/grepinput:10:This pattern is in lower case. +-- +./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. +./testdata/grepinput-621-============================= +./testdata/grepinput-622- +./testdata/grepinput:623:Check up on PATTERN near the end. +-- +./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. +./testdata/grepinputx-2- +./testdata/grepinputx:3:Here is the pattern again. +./testdata/grepinputx-4- +./testdata/grepinputx:5:Pattern +-- +./testdata/grepinputx-39-nineteen +./testdata/grepinputx-40-twenty +./testdata/grepinputx-41- +./testdata/grepinputx:42:This line contains pattern not on a line by itself. +RC=0 +---------------------------- Test 31 ----------------------------- +./testdata/grepinput:7:PATTERN at the start of a line. +./testdata/grepinput:8:In the middle of a line, PATTERN appears. +./testdata/grepinput-9- +./testdata/grepinput:10:This pattern is in lower case. +./testdata/grepinput-11- +./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24KiB long. +./testdata/grepinput-13- +-- +./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinput-624-This is the last line of this file. +-- +./testdata/grepinputx:3:Here is the pattern again. +./testdata/grepinputx-4- +./testdata/grepinputx:5:Pattern +./testdata/grepinputx-6-That time it was on a line by itself. +./testdata/grepinputx-7- +./testdata/grepinputx-8-To pat or not to pat, that is the question. +-- +./testdata/grepinputx:42:This line contains pattern not on a line by itself. +./testdata/grepinputx-43-This is the last line of this file. +RC=0 +---------------------------- Test 32 ----------------------------- +./testdata/grepinputx +RC=0 +---------------------------- Test 33 ----------------------------- +pcre2grep: Failed to open ./testdata/grepnonexist: No such file or directory +RC=2 +---------------------------- Test 34 ----------------------------- +RC=2 +---------------------------- Test 35 ----------------------------- +./testdata/grepinput8 +./testdata/grepinputx +RC=0 +---------------------------- Test 36 ----------------------------- +./testdata/grepinput3 +./testdata/grepinputx +RC=0 +---------------------------- Test 37 ----------------------------- +24KiB long so that it needs more than a single read() call to process it. New +aaaaa0 +aaaaa2 +010203040506 +RC=0 +======== STDERR ======== +pcre2grep: pcre2_match() gave error -47 while matching this text: + +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +pcre2grep: pcre2_match() gave error -47 while matching this text: + +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. +pcre2grep: Check your regex for nested unlimited loops. +---------------------------- Test 38 ------------------------------ +This line contains a binary zero here >< for testing. +RC=0 +---------------------------- Test 39 ------------------------------ +This is a line before the binary zero. +This line contains a binary zero here >< for testing. +RC=0 +---------------------------- Test 40 ------------------------------ +This line contains a binary zero here >< for testing. +This is a line after the binary zero. +RC=0 +---------------------------- Test 41 ------------------------------ +before the binary zero +after the binary zero +RC=0 +---------------------------- Test 42 ------------------------------ +./testdata/grepinput:595:before the binary zero +./testdata/grepinput:597:after the binary zero +RC=0 +---------------------------- Test 43 ------------------------------ +595:before +595:zero +596:zero +597:after +597:zero +RC=0 +---------------------------- Test 44 ------------------------------ +595:before +595:zero +596:zero +597:after +597:zero +RC=0 +---------------------------- Test 45 ------------------------------ +10:pattern +595:binary +596:binary +597:binary +RC=0 +---------------------------- Test 46 ------------------------------ +pcre2grep: Error in 1st command-line regex at offset 8: unmatched closing parenthesis +pcre2grep: Error in 2nd command-line regex at offset 9: missing closing parenthesis +pcre2grep: Error in 3rd command-line regex at offset 9: missing terminating ] for character class +pcre2grep: Error in 4th command-line regex at offset 9: missing terminating ] for character class +RC=2 +---------------------------- Test 47 ------------------------------ +AB.VE +RC=0 +---------------------------- Test 48 ------------------------------ +ABOVE the elephant +AB.VE +AB.VE the turtle +RC=0 +---------------------------- Test 49 ------------------------------ +ABOVE the elephant +AB.VE +AB.VE the turtle +PUT NEW DATA ABOVE THIS LINE. +RC=0 +---------------------------- Test 50 ------------------------------ +RC=1 +---------------------------- Test 51 ------------------------------ +over the lazy dog. +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate +RC=0 +---------------------------- Test 52 ------------------------------ +fox jumps +This time it jumps and jumps and jumps. +RC=0 +---------------------------- Test 53 ------------------------------ +36976,6 +36994,4 +37028,4 +37070,5 +37087,4 +RC=0 +---------------------------- Test 54 ------------------------------ +595:15,6 +595:33,4 +596:28,4 +597:15,5 +597:32,4 +RC=0 +---------------------------- Test 55 ----------------------------- +Here is the pattern again. +That time it was on a line by itself. +This line contains pattern not on a line by itself. +RC=0 +---------------------------- Test 56 ----------------------------- +./testdata/grepinput:456 +./testdata/grepinput3:0 +./testdata/grepinput8:0 +./testdata/grepinputM:0 +./testdata/grepinputv:1 +./testdata/grepinputx:0 +RC=0 +---------------------------- Test 57 ----------------------------- +./testdata/grepinput:456 +./testdata/grepinputv:1 +RC=0 +---------------------------- Test 58 ----------------------------- +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. +Check up on PATTERN near the end. +RC=0 +---------------------------- Test 59 ----------------------------- +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. +Check up on PATTERN near the end. +RC=0 +---------------------------- Test 60 ----------------------------- +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. +Check up on PATTERN near the end. +RC=0 +---------------------------- Test 61 ----------------------------- +PATTERN at the start of a line. +In the middle of a line, PATTERN appears. +Check up on PATTERN near the end. +RC=0 +---------------------------- Test 62 ----------------------------- +pcre2grep: pcre2_match() gave error -47 while matching text that starts: + +This is a file of miscellaneous text that is used as test data for checking +that the pcregrep command is working correctly. The file must be more than +24KiB long so that it needs more than a single re + +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. +pcre2grep: Check your regex for nested unlimited loops. +RC=1 +---------------------------- Test 63 ----------------------------- +pcre2grep: pcre2_match() gave error -53 while matching text that starts: + +This is a file of miscellaneous text that is used as test data for checking +that the pcregrep command is working correctly. The file must be more than +24KiB long so that it needs more than a single re + +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. +pcre2grep: Check your regex for nested unlimited loops. +RC=1 +---------------------------- Test 64 ------------------------------ +appears +RC=0 +---------------------------- Test 65 ------------------------------ +pear +RC=0 +---------------------------- Test 66 ------------------------------ +RC=0 +---------------------------- Test 67 ------------------------------ +RC=0 +---------------------------- Test 68 ------------------------------ +pear +RC=0 +---------------------------- Test 69 ----------------------------- +1:This is a second file of input for the pcregrep tests. +2: +4: +5:Pattern +6:That time it was on a line by itself. +7: +8:To pat or not to pat, that is the question. +9: +10:complete pair +11:of lines +12: +13:That was a complete pair +14:of lines all by themselves. +15: +16:complete pair +17:of lines +18: +19:And there they were again, to check line numbers. +20: +21:one +22:two +23:three +24:four +25:five +26:six +27:seven +28:eight +29:nine +30:ten +31:eleven +32:twelve +33:thirteen +34:fourteen +35:fifteen +36:sixteen +37:seventeen +38:eighteen +39:nineteen +40:twenty +41: +43:This is the last line of this file. +RC=0 +---------------------------- Test 70 ----------------------------- +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 +1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 +1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +RC=0 +---------------------------- Test 71 ----------------------------- +01 +RC=0 +---------------------------- Test 72 ----------------------------- +010203040506 +RC=0 +---------------------------- Test 73 ----------------------------- +01 +RC=0 +---------------------------- Test 74 ----------------------------- +01 +02 +RC=0 +---------------------------- Test 75 ----------------------------- +010203040506 +RC=0 +---------------------------- Test 76 ----------------------------- +01 +02 +RC=0 +---------------------------- Test 77 ----------------------------- +01 +03 +RC=0 +---------------------------- Test 78 ----------------------------- +010203040506 +RC=0 +---------------------------- Test 79 ----------------------------- +01 +03 +RC=0 +---------------------------- Test 80 ----------------------------- +01 +RC=0 +---------------------------- Test 81 ----------------------------- +010203040506 +RC=0 +---------------------------- Test 82 ----------------------------- +01 +RC=0 +---------------------------- Test 83 ----------------------------- +pcre2grep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer +pcre2grep: the maximum buffer size is 100 +pcre2grep: use the --max-buffer-size option to change it +RC=2 +---------------------------- Test 84 ----------------------------- +testdata/grepinputv:fox jumps +testdata/grepinputx:complete pair +testdata/grepinputx:That was a complete pair +testdata/grepinputx:complete pair +testdata/grepinput3:triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt +RC=0 +---------------------------- Test 85 ----------------------------- +./testdata/grepinput3:Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +RC=0 +---------------------------- Test 86 ----------------------------- +Binary file ./testdata/grepbinary matches +RC=0 +---------------------------- Test 87 ----------------------------- +RC=1 +---------------------------- Test 88 ----------------------------- +Binary file ./testdata/grepbinary matches +RC=0 +---------------------------- Test 89 ----------------------------- +RC=1 +---------------------------- Test 90 ----------------------------- +RC=1 +---------------------------- Test 91 ----------------------------- +The quick brown fx jumps over the lazy dog. +RC=0 +---------------------------- Test 92 ----------------------------- +The quick brown fx jumps over the lazy dog. +RC=0 +---------------------------- Test 93 ----------------------------- +The quick brown fx jumps over the lazy dog. +RC=0 +---------------------------- Test 94 ----------------------------- +./testdata/grepinput8 +./testdata/grepinputx +RC=0 +---------------------------- Test 95 ----------------------------- +testdata/grepinputx:complete pair +testdata/grepinputx:That was a complete pair +testdata/grepinputx:complete pair +RC=0 +---------------------------- Test 96 ----------------------------- +./testdata/grepinput3 +./testdata/grepinput8 +./testdata/grepinputx +RC=0 +---------------------------- Test 97 ----------------------------- +./testdata/grepinput3 +./testdata/grepinputx +RC=0 +---------------------------- Test 98 ----------------------------- +./testdata/grepinputx +RC=0 +---------------------------- Test 99 ----------------------------- +./testdata/grepinput3 +./testdata/grepinputx +RC=0 +---------------------------- Test 100 ------------------------------ +./testdata/grepinput:zerothe. +./testdata/grepinput:zeroa +./testdata/grepinput:zerothe. +RC=0 +---------------------------- Test 101 ------------------------------ +./testdata/grepinput:.|zero|the|. +./testdata/grepinput:zero|a +./testdata/grepinput:.|zero|the|. +RC=0 +---------------------------- Test 102 ----------------------------- +2: +5: +7: +9: +12: +14: +RC=0 +---------------------------- Test 103 ----------------------------- +RC=0 +---------------------------- Test 104 ----------------------------- +2: +5: +7: +9: +12: +14: +RC=0 +---------------------------- Test 105 ----------------------------- +triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t2_txt s1_tag s_txt p_tag p_txt o_tag +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt + +triple: t5_txt s1_tag s_txt p_tag p_txt o_tag +o_txt + +triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt + +triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt +RC=0 +---------------------------- Test 106 ----------------------------- +a +RC=0 +---------------------------- Test 107 ----------------------------- +1:0,1 +2:0,1 +2:1,1 +2:2,1 +2:3,1 +2:4,1 +RC=0 +---------------------------- Test 108 ------------------------------ +RC=0 +---------------------------- Test 109 ----------------------------- +RC=0 +---------------------------- Test 110 ----------------------------- +match 1: + a +/1/a +match 2: + b +/2/b +match 3: + c +/3/c +match 4: + d +/4/d +match 5: + e +/5/e +RC=0 +---------------------------- Test 111 ----------------------------- +607:0,12 +609:0,12 +611:0,12 +613:0,12 +615:0,12 +RC=0 +---------------------------- Test 112 ----------------------------- +37172,12 +37184,12 +37196,12 +37208,12 +37220,12 +RC=0 +---------------------------- Test 113 ----------------------------- +480 +RC=0 +---------------------------- Test 114 ----------------------------- +testdata/grepinput:469 +testdata/grepinput3:0 +testdata/grepinput8:0 +testdata/grepinputM:2 +testdata/grepinputv:3 +testdata/grepinputx:6 +TOTAL:480 +RC=0 +---------------------------- Test 115 ----------------------------- +testdata/grepinput:469 +testdata/grepinputM:2 +testdata/grepinputv:3 +testdata/grepinputx:6 +TOTAL:480 +RC=0 +---------------------------- Test 116 ----------------------------- +478 +RC=0 +---------------------------- Test 117 ----------------------------- +469 +0 +0 +2 +3 +6 +480 +RC=0 +---------------------------- Test 118 ----------------------------- +testdata/grepinput3 +testdata/grepinput8 +RC=0 +---------------------------- Test 119 ----------------------------- +123 +456 +789 +--- +abc +def +xyz +--- +RC=0 +---------------------------- Test 120 ------------------------------ +./testdata/grepinput:the binary zero.:zerothe. +./testdata/grepinput:a binary zero:zeroa +./testdata/grepinput:the binary zero.:zerothe. +RC=0 +the binary zero.: +RC=0 +pcre2grep: Error in output text at offset 2: decimal number expected +RC=2 +pcre2grep: Error in output text at offset 3: no character after $ +RC=2 +pcre2grep: Error in output text at offset 8: too many hex digits +RC=2 +pcre2grep: Error in output text at offset 5: missing closing brace +RC=2 +pcre2grep: Error in output text at offset 7: code point greater than 0xff is invalid +RC=2 +---------------------------- Test 121 ----------------------------- +This line contains \E and (regex) *meta* [characters]. +RC=0 +---------------------------- Test 122 ----------------------------- +over the lazy dog. +The word is cat in this line +RC=0 +---------------------------- Test 123 ----------------------------- +over the lazy dog. +The word is cat in this line +RC=0 +---------------------------- Test 124 ----------------------------- +3:start end in between start +end and following +7:start end in between start +end and following start +end other stuff +11:start end in between start + +end +16:start end in between start +end +RC=0 +3:start end in between start +end and following +5-Other stuff +6- +7:start end in between start +end and following start +end other stuff +10- +11:start end in between start + +end +14- +15-** These two lines must be last. +16:start end in between start +end +RC=0 +3:start end in between start +end and following +7:start end in between start +end and following start +end other stuff +11:start end in between start + +end +16:start end in between start +end +RC=0 +3:start end in between start +end and following +5-Other stuff +6- +7:start end in between start +end and following start +end other stuff +10- +11:start end in between start + +end +14- +15-** These two lines must be last. +16:start end in between start +end +RC=0 +---------------------------- Test 125 ----------------------------- +abcd +RC=0 +abcd +RC=0 +abcd +RC=0 +abcd +RC=0 +abcd +RC=0 +---------------------------- Test 126 ----------------------------- +ABCXYZ +RC=0 +pcre2grep: Error in regex in line 2 of testtemp1grep at offset 4: unmatched closing parenthesis +RC=2 +---------------------------- Test 127 ----------------------------- +pattern +RC=0 +---------------------------- Test 128 ----------------------------- +pcre2grep: Requested group 1 cannot be captured. +pcre2grep: Use --om-capture to increase the size of the capture vector. +RC=2 +---------------------------- Test 129 ----------------------------- +The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the +lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox +RC=0 +---------------------------- Test 130 ----------------------------- +fox +fox +fox +fox +RC=0 +---------------------------- Test 131 ----------------------------- +2 +RC=0 +---------------------------- Test 132 ----------------------------- +match 1: + a +match 2: + b +--- + a +RC=0 +---------------------------- Test 133 ----------------------------- +match 1: + a +match 2: + b +--- +match 2: + b +match 3: + c +RC=0 +---------------------------- Test 134 ----------------------------- +(standard input):2:=AB3CD5= +RC=0 +---------------------------- Test 135 ----------------------------- +./testdata/grepinputv@The word is cat in this line +RC=0 +./testdata/grepinputv@./testdata/grepinputv@RC=0 +./testdata/grepinputv@This line contains \E and (regex) *meta* [characters]. +./testdata/grepinputv@The word is cat in this line +./testdata/grepinputv@The caterpillar sat on the mat +RC=0 +testdata/grepinputM3:start end in between start +end and following +testdata/grepinputM7:start end in between start +end and following start +end other stuff +testdata/grepinputM11:start end in between start + +end +testdata/grepinputM16:start end in between start +end +RC=0 +---------------------------- Test 136 ----------------------------- +pcre2grep: Malformed number "1MK" after -m +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +pcre2grep: Malformed number "1MK" after --max-count +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +---------------------------- Test 137 ----------------------------- +Last line +has no newline +RC=0 +---------------------------- Test 138 ----------------------------- +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: pcre2_match() gave error -63 while matching this text: + +AbC + +pcre2grep: Too many errors - abandoned. +pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded. +pcre2grep: Check your regex for nested unlimited loops. +RC=2 +---------------------------- Test 139 ----------------------------- +fox jumps +RC=0 +---------------------------- Test 140 ----------------------------- +The quick brown +fox jumps +RC=0 +---------------------------- Test 141 ----------------------------- +(standard input):This is a line from stdin. +RC=0 +---------------------------- Test 142 ----------------------------- +pcre2grep: Failed to open /does/not/exist: No such file or directory +RC=2 +---------------------------- Test 143 ----------------------------- +fox jumps +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate +RC=0 +---------------------------- Test 144 ----------------------------- +pcre2grep: Failed to open /non/exist: No such file or directory +RC=2 +---------------------------- Test 145 ----------------------------- +The quick brown +fox jumps +over the lazy dog. +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +A buried feline in the syndicate + RC=0 +---------------------------- Test 146 ----------------------------- +(standard input):A123B +RC=0 +A123B +fox jumps +RC=0 +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +---------------------------- Test 147 ----------------------------- +pcre2grep: Failed to open -nonfile: No such file or directory +RC=2 +---------------------------- Test 148 ----------------------------- +pcre2grep: Unknown option --nonexist +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +pcre2grep: Unknown option letter '-' in "-n-n-bad" +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +pcre2grep: Data missing after --context +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +pcre2grep: Cannot mix --only-matching, --output, --file-offsets and/or --line-offsets +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +pcre2grep: Unknown colour setting "badvalue" +RC=2 +pcre2grep: Invalid newline specifier "badvalue" +RC=2 +pcre2grep: Invalid value "badvalue" for -d +RC=2 +pcre2grep: Invalid value "badvalue" for -D +RC=2 +pcre2grep: --buffer-size must be greater than zero +RC=2 +pcre2grep: Error in --exclude regex at offset 7: missing closing parenthesis +RC=2 +pcre2grep: Failed to open /non/exist: No such file or directory +RC=2 +pcre2grep: Failed to open /non/exist: No such file or directory +RC=2 +pcre2grep: Failed to open /non/exist: No such file or directory +RC=2 +---------------------------- Test 149 ----------------------------- +Binary file ./testdata/grepbinary matches +RC=0 +pcre2grep: unknown value "wrong" for binary-files +Usage: pcre2grep [-AaBCcDdEeFfHhIilLMmNnOoPqrstuUVvwxZ] [long options] [pattern] [files] +Type "pcre2grep --help" for more information and the long options. +RC=2 +---------------------------- Test 150 ----------------------------- +pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE) +RC=2 +---------------------------- Test 151 ----------------------------- +The quick brown +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +---------------------------- Test 152 ----------------------------- +24:four +25-five +26-six +27-seven +++ +34:fourteen +35-fifteen +36-sixteen +37-seventeen +RC=0 +---------------------------- Test 153 ----------------------------- +24:four +25-five +26-six +27-seven +34:fourteen +35-fifteen +36-sixteen +37-seventeen +RC=0 diff --git a/testdata/grepoutput8 b/testdata/grepoutput8 new file mode 100644 index 0000000..68bc765 --- /dev/null +++ b/testdata/grepoutput8 @@ -0,0 +1,47 @@ +---------------------------- Test U1 ------------------------------ +1:X one +2:X two 3:X three 4:X four 5:X five +6:X six +7:X seven…8:X eight
9:X nine
10:X ten +RC=0 +---------------------------- Test U2 ------------------------------ +12-Before 111 +13-Before 222
14-Before 333…15:Match +16-After 111 +17-After 222
18-After 333 +RC=0 +---------------------------- Test U3 ------------------------------ +21:0,2 +22:0,2 +22:2,2 +22:4,2 +22:6,2 +22:8,2 +RC=0 +---------------------------- Test U4 ------------------------------ +pcre2grep: pcre2_match() gave error -22 while matching this text: + +Aက€CD Z + +UTF-8 error: isolated byte with 0x80 bit set at offset 4 + +RC=1 +---------------------------- Test U5 ------------------------------ +CD Z +RC=0 +---------------------------- Test U6 ----------------------------- +=ǓǤ= +RC=0 +---------------------------- Test U7 ------------------------------ +ÁabcÁ KkK +RC=0 +---------------------------- Test U8 ------------------------------ +ÁabcÁ KkK +RC=0 +---------------------------- Test U9 ------------------------------ +A1 +A1 +RC=0 +---------------------------- Test U10 ------------------------------ +A1 +RC=0 diff --git a/testdata/grepoutputC b/testdata/grepoutputC new file mode 100644 index 0000000..56bd283 --- /dev/null +++ b/testdata/grepoutputC @@ -0,0 +1,62 @@ +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +The quick brown +Arg1: [T] [his] [s] Arg2: |T| () () (0) +This time it jumps and jumps and jumps. +Arg1: [T] [his] [s] Arg2: |T| () () (0) +This line contains \E and (regex) *meta* [characters]. +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +The word is cat in this line +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +The caterpillar sat on the mat +Arg1: [T] [he ] [ ] Arg2: |T| () () (0) +The snowcat is not an animal +Arg1: [qu] [qu] +The quick brown +Arg1: [ t] [ t] +This time it jumps and jumps and jumps. +Arg1: [ l] [ l] +This line contains \E and (regex) *meta* [characters]. +Arg1: [wo] [wo] +The word is cat in this line +Arg1: [ca] [ca] +The caterpillar sat on the mat +Arg1: [sn] [sn] +The snowcat is not an animal +0:T +The quick brown +0:T +This time it jumps and jumps and jumps. +0:T +This line contains \E and (regex) *meta* [characters]. +0:T +The word is cat in this line +0:T +The caterpillar sat on the mat +0:T +The snowcat is not an animal +0:T + +The quick brown +0:T + +This time it jumps and jumps and jumps. +0:T + +This line contains \E and (regex) *meta* [characters]. +0:T + +The word is cat in this line +0:T + +The caterpillar sat on the mat +0:T + +The snowcat is not an animal +T +T +T +T +T +T +0:T:AA +The quick brown diff --git a/testdata/grepoutputCN b/testdata/grepoutputCN new file mode 100644 index 0000000..aef1a3d --- /dev/null +++ b/testdata/grepoutputCN @@ -0,0 +1,38 @@ +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +0:T +The quick brown +0:T +This time it jumps and jumps and jumps. +0:T +This line contains \E and (regex) *meta* [characters]. +0:T +The word is cat in this line +0:T +The caterpillar sat on the mat +0:T +The snowcat is not an animal +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal +T +T +T +T +T +T +0:T:AA +The quick brown diff --git a/testdata/grepoutputCNU b/testdata/grepoutputCNU new file mode 100644 index 0000000..2fbfba0 --- /dev/null +++ b/testdata/grepoutputCNU @@ -0,0 +1,18 @@ +0:¦ +The quick brown +0:¦ +This time it jumps and jumps and jumps. +0:¦ +This line contains \E and (regex) *meta* [characters]. +0:¦ +The word is cat in this line +0:¦ +The caterpillar sat on the mat +0:¦ +The snowcat is not an animal +The quick brown +This time it jumps and jumps and jumps. +This line contains \E and (regex) *meta* [characters]. +The word is cat in this line +The caterpillar sat on the mat +The snowcat is not an animal diff --git a/testdata/grepoutputCU b/testdata/grepoutputCU new file mode 100644 index 0000000..ed8f491 --- /dev/null +++ b/testdata/grepoutputCU @@ -0,0 +1,30 @@ +0:¦ +The quick brown +0:¦ +This time it jumps and jumps and jumps. +0:¦ +This line contains \E and (regex) *meta* [characters]. +0:¦ +The word is cat in this line +0:¦ +The caterpillar sat on the mat +0:¦ +The snowcat is not an animal +0:¦ + +The quick brown +0:¦ + +This time it jumps and jumps and jumps. +0:¦ + +This line contains \E and (regex) *meta* [characters]. +0:¦ + +The word is cat in this line +0:¦ + +The caterpillar sat on the mat +0:¦ + +The snowcat is not an animal diff --git a/testdata/grepoutputCbz2 b/testdata/grepoutputCbz2 new file mode 100644 index 0000000..6eebfab --- /dev/null +++ b/testdata/grepoutputCbz2 @@ -0,0 +1,6 @@ +one +two +RC=0 +one +two +RC=0 diff --git a/testdata/grepoutputCgz b/testdata/grepoutputCgz new file mode 100644 index 0000000..f9b65a3 --- /dev/null +++ b/testdata/grepoutputCgz @@ -0,0 +1,3 @@ +one +two +RC=0 diff --git a/testdata/grepoutputN b/testdata/grepoutputN new file mode 100644 index 0000000..b39654f --- /dev/null +++ b/testdata/grepoutputN @@ -0,0 +1,27 @@ +---------------------------- Test N1 ------------------------------ +1:abc 2:def 1-abc 2:def ---------------------------- Test N2 ------------------------------ +1:abc def +2:ghi +jkl +1-abc def +2:ghi +jkl +---------------------------- Test N3 ------------------------------ +2:def 3: +ghi +jkl ---------------------------- Test N4 ------------------------------ +2:ghi +jkl +---------------------------- Test N5 ------------------------------ +1:abc 2:def +3:ghi +4:jkl +1-abc 2:def +---------------------------- Test N6 ------------------------------ +1:abc 2:def +3:ghi +4:jkl +3-ghi +4:jkl +---------------------------- Test N7 ------------------------------ +2:abc@3:def@1-xyz@2:abc@3:def@ diff --git a/testdata/grepoutputUN b/testdata/grepoutputUN new file mode 100644 index 0000000..ae5eb7a --- /dev/null +++ b/testdata/grepoutputUN @@ -0,0 +1,3 @@ +---------------------------- Test UN1 ------------------------------ +1:abcሴdef + diff --git a/testdata/greppatN4 b/testdata/greppatN4 new file mode 100644 index 0000000..ea1bfc7 --- /dev/null +++ b/testdata/greppatN4 @@ -0,0 +1,2 @@ +xxx +jkl \ No newline at end of file diff --git a/testdata/testbtables b/testdata/testbtables new file mode 100644 index 0000000000000000000000000000000000000000..b7aeeaf0242e4fa1a4b34e497e42446efc28ac14 GIT binary patch literal 1088 zcmZQzWMXDvWn<^yMC+6cQE@6%&_`l#-T_m6KOcR8m$^Ra4i{)Y8_`)zddH zG%_|ZH8Z!cw6eCbwX=6fOiE5kO-s+n%*xKm&C4$+EGjN3Ei136tcs3_jf+pfEni(z zTUX!E*woz8+ScCD+11_C+t)u~;-tw_rcRqaW9F>cbLP&QzhL2_#Y>hhTfSoDs?}@O zu3NuhGGAU*S_DldF%F_yZ8Px*fB6LG7tj)nd^W`Ko~pt&x~ChA^PwC ze|7|qf#LuE`g&wOhz(c6qQC;=HUQQ9hp73_43uj}mIukf+HT$aO~1px61Z`S|- literal 0 HcmV?d00001 diff --git a/testdata/testinput1 b/testdata/testinput1 new file mode 100644 index 0000000..00e76da --- /dev/null +++ b/testdata/testinput1 @@ -0,0 +1,6657 @@ +# This set of tests is for features that are compatible with all versions of +# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and +# 32-bit PCRE libraries, and also using the perltest.sh script. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + +#forbid_utf +#newline_default lf any anycrlf +#perltest + +/the quick brown fox/ + the quick brown fox + What do you know about the quick brown fox? +\= Expect no match + The quick brown FOX + What do you know about THE QUICK BROWN FOX? + +/The quick brown fox/i + the quick brown fox + The quick brown FOX + What do you know about the quick brown fox? + What do you know about THE QUICK BROWN FOX? + +/abcd\t\n\r\f\a\e\071\x3b\$\\\?caxyz/ + abcd\t\n\r\f\a\e9;\$\\?caxyz + +/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/ + abxyzpqrrrabbxyyyypqAzz + abxyzpqrrrabbxyyyypqAzz + aabxyzpqrrrabbxyyyypqAzz + aaabxyzpqrrrabbxyyyypqAzz + aaaabxyzpqrrrabbxyyyypqAzz + abcxyzpqrrrabbxyyyypqAzz + aabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypAzz + aaabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypqqAzz + aaabcxyzpqrrrabbxyyyypqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqAzz + aaaabcxyzpqrrrabbxyyyypqAzz + abxyzzpqrrrabbxyyyypqAzz + aabxyzzzpqrrrabbxyyyypqAzz + aaabxyzzzzpqrrrabbxyyyypqAzz + aaaabxyzzzzpqrrrabbxyyyypqAzz + abcxyzzpqrrrabbxyyyypqAzz + aabcxyzzzpqrrrabbxyyyypqAzz + aaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + aaabcxyzpqrrrabbxyyyypABzz + aaabcxyzpqrrrabbxyyyypABBzz + >>>aaabxyzpqrrrabbxyyyypqAzz + >aaaabxyzpqrrrabbxyyyypqAzz + >>>>abcxyzpqrrrabbxyyyypqAzz +\= Expect no match + abxyzpqrrabbxyyyypqAzz + abxyzpqrrrrabbxyyyypqAzz + abxyzpqrrrabxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyypqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqqAzz + +/^(abc){1,2}zz/ + abczz + abcabczz +\= Expect no match + zz + abcabcabczz + >>abczz + +/^(b+?|a){1,2}?c/ + bc + bbc + bbbc + bac + bbac + aac + abbbbbbbbbbbc + bbbbbbbbbbbac +\= Expect no match + aaac + abbbbbbbbbbbac + +/^(b+|a){1,2}c/ + bc + bbc + bbbc + bac + bbac + aac + abbbbbbbbbbbc + bbbbbbbbbbbac +\= Expect no match + aaac + abbbbbbbbbbbac + +/^(ba|b*){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/^\ca\cA\c[;\c:/ + \x01\x01\e;z + +/^[ab\]cde]/ + athing + bthing + ]thing + cthing + dthing + ething +\= Expect no match + fthing + [thing + \\thing + +/^[]cde]/ + ]thing + cthing + dthing + ething +\= Expect no match + athing + fthing + +/^[^ab\]cde]/ + fthing + [thing + \\thing +\= Expect no match + athing + bthing + ]thing + cthing + dthing + ething + +/^[^]cde]/ + athing + fthing +\= Expect no match + ]thing + cthing + dthing + ething + +/^\/ +  + +/^ÿ/ + ÿ + +/^[0-9]+$/ + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 100 +\= Expect no match + abc + +/^.*nter/ + enter + inter + uponter + +/^xxx[0-9]+$/ + xxx0 + xxx1234 +\= Expect no match + xxx + +/^.+[0-9][0-9][0-9]$/ + x123 + x1234 + xx123 + 123456 +\= Expect no match + 123 + +/^.+?[0-9][0-9][0-9]$/ + x123 + x1234 + xx123 + 123456 +\= Expect no match + 123 + +/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ + abc!pqr=apquxz.ixr.zzz.ac.uk +\= Expect no match + !pqr=apquxz.ixr.zzz.ac.uk + abc!=apquxz.ixr.zzz.ac.uk + abc!pqr=apquxz:ixr.zzz.ac.uk + abc!pqr=apquxz.ixr.zzz.ac.ukk + +/:/ + Well, we need a colon: somewhere +\= Expect no match + Fail without a colon + +/([\da-f:]+)$/i + 0abc + abc + fed + E + :: + 5f03:12C0::932e + fed def + Any old stuff +\= Expect no match + 0zzz + gzzz + fed\x20 + Any old rubbish + +/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ + .1.2.3 + A.12.123.0 +\= Expect no match + .1.2.3333 + 1.2.3 + 1234.2.3 + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 1 IN SOA non-sp1 non-sp2 ( +\= Expect no match + 1IN SOA non-sp1 non-sp2( + +/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ + a. + Z. + 2. + ab-c.pq-r. + sxk.zzz.ac.uk. + x-.y-. +\= Expect no match + -abc.peq. + +/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ + *.a + *.b0-a + *.c3-b.c + *.c-a.b-c +\= Expect no match + *.0 + *.a- + *.a-b.c- + *.c-a.0-c + +/^(?=ab(de))(abd)(e)/ + abde + +/^(?!(ab)de|x)(abd)(f)/ + abdf + +/^(?=(ab(cd)))(ab)/ + abcd + +/^[\da-f](\.[\da-f])*$/i + a.b.c.d + A.B.C.D + a.b.c.1.2.3.C + +/^\".*\"\s*(;.*)?$/ + \"1234\" + \"abcd\" ; + \"\" ; rhubarb +\= Expect no match + \"1234\" : things + +/^$/ + \ +\= Expect no match + A non-empty line + +/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x + ab c +\= Expect no match + abc + ab cde + +/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ + ab c +\= Expect no match + abc + ab cde + +/^ a\ b[c ]d $/x + a bcd + a b d +\= Expect no match + abcd + ab d + +/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/ + abcdefhijklm + +/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/ + abcdefhijklm + +/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/ + a+ Z0+\x08\n\x1d\x12 + +/^[.^$|()*+?{,}]+/ + .^\$(*+)|{?,?} + +/^a*\w/ + z + az + aaaz + a + aa + aaaa + a+ + aa+ + +/^a*?\w/ + z + az + aaaz + a + aa + aaaa + a+ + aa+ + +/^a+\w/ + az + aaaz + aa + aaaa + aa+ + +/^a+?\w/ + az + aaaz + aa + aaaa + aa+ + +/^\d{8}\w{2,}/ + 1234567890 + 12345678ab + 12345678__ +\= Expect no match + 1234567 + +/^[aeiou\d]{4,5}$/ + uoie + 1234 + 12345 + aaaaa +\= Expect no match + 123456 + +/^[aeiou\d]{4,5}?/ + uoie + 1234 + 12345 + aaaaa + 123456 + +/\A(abc|def)=(\1){2,3}\Z/ + abc=abcabc + def=defdefdef +\= Expect no match + abc=defdef + +/^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + abcdefghijkcda2 + abcdefghijkkkkcda2 + +/(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + cataract cataract23 + catatonic catatonic23 + caterpillar caterpillar23 + + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + +/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + From abcd Mon Sep 01 12:33:02 1997 + From abcd Mon Sep 1 12:33:02 1997 +\= Expect no match + From abcd Sep 01 12:33:02 1997 + +/^12.34/s + 12\n34 + 12\r34 + +/\w+(?=\t)/ + the quick brown\t fox + +/foo(?!bar)(.*)/ + foobar is foolish see? + +/(?:(?!foo)...|^.{0,2})bar(.*)/ + foobar crowbar etc + barrel + 2barrel + A barrel + +/^(\D*)(?=\d)(?!123)/ + abc456 +\= Expect no match + abc123 + +/^1234(?# test newlines + inside)/ + 1234 + +/^1234 #comment in extended re + /x + 1234 + +/#rhubarb + abcd/x + abcd + +/^abcd#rhubarb/x + abcd + +/^(a)\1{2,3}(.)/ + aaab + aaaab + aaaaab + aaaaaab + +/(?!^)abc/ + the abc +\= Expect no match + abc + +/(?=^)abc/ + abc +\= Expect no match + the abc + +/^[ab]{1,3}(ab*|b)/ + aabbbbb + +/^[ab]{1,3}?(ab*|b)/ + aabbbbb + +/^[ab]{1,3}?(ab*?|b)/ + aabbbbb + +/^[ab]{1,3}(ab*?|b)/ + aabbbbb + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/x + Alan Other + + user\@dom.ain + \"A. Other\" (a comment) + A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + A missing angle @,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +# leading word +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces +(?: +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +| +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +) # "special" comment or quoted string +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal" +)* +< +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# < +(?: +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +(?: , +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +)* # additional domains +: +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address spec +> # > +# name and address +) +/x + Alan Other + + user\@dom.ain + \"A. Other\" (a comment) + A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + A missing angle ]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 43.
Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + +/a[^a]b/ + acb + a\nb + +/a.b/ + acb +\= Expect no match + a\nb + +/a[^a]b/s + acb + a\nb + +/a.b/s + acb + a\nb + +/^(b+?|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/^(b+|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/(?!\A)x/m + a\bx\n + a\nx\n +\= Expect no match + x\nb\n + +/(A|B)*?CD/ + CD + +/(A|B)*CD/ + CD + +/(AB)*?\1/ + ABABAB + +/(AB)*\1/ + ABABAB + +/(?.*\/)foo/ + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ + +/(?>(\.\d\d[1-9]?))\d+/ + 1.230003938 + 1.875000282 +\= Expect no match + 1.235 + +/^((?>\w+)|(?>\s+))*$/ + now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! + +/(\d+)(\w)/ + 12345a + 12345+ + +/((?>\d+))(\w)/ + 12345a +\= Expect no match + 12345+ + +/(?>a+)b/ + aaab + +/((?>a+)b)/ + aaab + +/(?>(a+))b/ + aaab + +/(?>b)+/ + aaabbbccc + +/(?>a+|b+|c+)*c/ + aaabbbbccccd + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + +/\(((?>[^()]+)|\([^()]+\))+\)/ + (abc) + (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/a(?-i)b/i + ab + Ab +\= Expect no match + aB + AB + +/(a (?x)b c)d e/ + a bcd e +\= Expect no match + a b cd e + abcd e + a bcde + +/(a b(?x)c d (?-x)e f)/ + a bcde f +\= Expect no match + abcdef + +/(a(?i)b)c/ + abc + aBc +\= Expect no match + abC + aBC + Abc + ABc + ABC + AbC + +/a(?i:b)c/ + abc + aBc +\= Expect no match + ABC + abC + aBC + +/a(?i:b)*c/ + aBc + aBBc +\= Expect no match + aBC + aBBC + +/a(?=b(?i)c)\w\wd/ + abcd + abCd +\= Expect no match + aBCd + abcD + +/(?s-i:more.*than).*million/i + more than million + more than MILLION + more \n than Million +\= Expect no match + MORE THAN MILLION + more \n than \n million + +/(?:(?s-i)more.*than).*million/i + more than million + more than MILLION + more \n than Million +\= Expect no match + MORE THAN MILLION + more \n than \n million + +/(?>a(?i)b+)+c/ + abc + aBbc + aBBc +\= Expect no match + Abc + abAb + abbC + +/(?=a(?i)b)\w\wc/ + abc + aBc +\= Expect no match + Ab + abC + aBC + +/(?<=a(?i)b)(\w\w)c/ + abxxc + aBxxc +\= Expect no match + Abxxc + ABxxc + abxxC + +/(?:(a)|b)(?(1)A|B)/ + aA + bB +\= Expect no match + aB + bA + +/^(a)?(?(1)a|b)+$/ + aa + b + bb +\= Expect no match + ab + +# Perl gets this next one wrong if the pattern ends with $; in that case it +# fails to match "12". + +/^(?(?=abc)\w{3}:|\d\d)/ + abc: + 12 + 123 +\= Expect no match + xyz + +/^(?(?!abc)\d\d|\w{3}:)$/ + abc: + 12 +\= Expect no match + 123 + xyz + +/(?(?<=foo)bar|cat)/ + foobar + cat + fcat + focat +\= Expect no match + foocat + +/(?(?a*)*/ + a + aa + aaaa + +/(abc|)+/ + abc + abcabc + abcabcabc + xyz + +/([a]*)*/ + a + aaaaa + +/([ab]*)*/ + a + b + ababab + aaaabcde + bbbb + +/([^a]*)*/ + b + bbbb + aaa + +/([^ab]*)*/ + cccc + abab + +/([a]*?)*/ + a + aaaa + +/([ab]*?)*/ + a + b + abab + baba + +/([^a]*?)*/ + b + bbbb + aaa + +/([^ab]*?)*/ + c + cccc + baba + +/(?>a*)*/ + a + aaabcde + +/((?>a*))*/ + aaaaa + aabbaa + +/((?>a*?))*/ + aaaaa + aabbaa + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x + 12-sep-98 + 12-09-98 +\= Expect no match + sep-12-98 + +/(?<=(foo))bar\1/ + foobarfoo + foobarfootling +\= Expect no match + foobar + barfoo + +/(?i:saturday|sunday)/ + saturday + sunday + Saturday + Sunday + SATURDAY + SUNDAY + SunDay + +/(a(?i)bc|BB)x/ + abcx + aBCx + bbx + BBx +\= Expect no match + abcX + aBCX + bbX + BBX + +/^([ab](?i)[cd]|[ef])/ + ac + aC + bD + elephant + Europe + frog + France +\= Expect no match + Africa + +/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/ + ab + aBd + xy + xY + zebra + Zambesi +\= Expect no match + aCD + XY + +/(?<=foo\n)^bar/m + foo\nbar +\= Expect no match + bar + baz\nbar + +/(?<=(?]&/ + <&OUT + +/^(a\1?){4}$/ + aaaaaaaaaa +\= Expect no match + AB + aaaaaaaaa + aaaaaaaaaaa + +/^(a(?(1)\1)){4}$/ + aaaaaaaaaa +\= Expect no match + aaaaaaaaa + aaaaaaaaaaa + +/(?<=a)b/ + ab +\= Expect no match + cb + b + +/(?a+)ab/ + +/(?>a+)b/ + aaab + +/([[:]+)/ + a:[b]: + +/([[=]+)/ + a=[b]= + +/([[.]+)/ + a.[b]. + +/((?>a+)b)/ + aaab + +/(?>(a+))b/ + aaab + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + +/a\Z/ +\= Expect no match + aaab + a\nb\n + +/b\Z/ + a\nb\n + +/b\z/ + +/b\Z/ + a\nb + +/b\z/ + a\nb + +/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ + a + abc + a-b + 0-9 + a.b + 5.6.7 + the.quick.brown.fox + a100.b200.300c + 12-ab.1245 +\= Expect no match + \ + .a + -a + a- + a. + a_b + a.- + a.. + ab..bc + the.quick.brown.fox- + the.quick.brown.fox. + the.quick.brown.fox_ + the.quick.brown.fox+ + +/(?>.*)(?<=(abcd|wxyz))/ + alphabetabcd + endingwxyz +\= Expect no match + a rather long string that doesn't end with one of them + +/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark + +/word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope + +/(?<=\d{3}(?!999))foo/ + 999foo + 123999foo +\= Expect no match + 123abcfoo + +/(?<=(?!...999)\d{3})foo/ + 999foo + 123999foo +\= Expect no match + 123abcfoo + +/(?<=\d{3}(?!999)...)foo/ + 123abcfoo + 123456foo +\= Expect no match + 123999foo + +/(?<=\d{3}...)(?\s*)=(?>\s*) # find Z)+|A)*/ + ZABCDEFG + +/((?>)+|A)*/ + ZABCDEFG + +/a*/g + abbab + +/[[:space:]]+/ + > \x09\x0a\x0c\x0d\x0b< + +/[[:blank:]]+/ + > \x09\x0a\x0c\x0d\x0b< + +/[\s]+/ + > \x09\x0a\x0c\x0d\x0b< + +/\s+/ + > \x09\x0a\x0c\x0d\x0b< + +/a b/x + ab + +/(?!\A)x/m + a\nxb\n + +/(?!^)x/m +\= Expect no match + a\nxb\n + +/abc\Qabc\Eabc/ + abcabcabc + +/abc\Q(*+|\Eabc/ + abc(*+|abc + +/ abc\Q abc\Eabc/x + abc abcabc +\= Expect no match + abcabcabc + +/abc#comment + \Q#not comment + literal\E/x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal/x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal\E #more comment + /x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal\E #more comment/x + abc#not comment\n literal + +/\Qabc\$xyz\E/ + abc\\\$xyz + +/\Qabc\E\$\Qxyz\E/ + abc\$xyz + +/\Gabc/ + abc +\= Expect no match + xyzabc + +/\Gabc./g + abc1abc2xyzabc3 + +/abc./g + abc1abc2xyzabc3 + +/a(?x: b c )d/ + XabcdY +\= Expect no match + Xa b c d Y + +/((?x)x y z | a b c)/ + XabcY + AxyzB + +/(?i)AB(?-i)C/ + XabCY +\= Expect no match + XabcY + +/((?i)AB(?-i)C|D)E/ + abCE + DE +\= Expect no match + abcE + abCe + dE + De + +/(.*)\d+\1/ + abc123abc + abc123bc + +/(.*)\d+\1/s + abc123abc + abc123bc + +/((.*))\d+\1/ + abc123abc + abc123bc + +# This tests for an IPv6 address in the form where it can have up to +# eight components, one and only one of which is empty. This must be +# an internal component. + +/^(?!:) # colon disallowed at start + (?: # start of item + (?: [0-9a-f]{1,4} | # 1-4 hex digits or + (?(1)0 | () ) ) # if null previously matched, fail; else null + : # followed by colon + ){1,7} # end item; 1-7 of them required + [0-9a-f]{1,4} $ # final hex number at end of string + (?(1)|.) # check that there was an empty component + /ix + a123::a123 + a123:b342::abcd + a123:b342::324e:abcd + a123:ddde:b342::324e:abcd + a123:ddde:b342::324e:dcba:abcd + a123:ddde:9999:b342::324e:dcba:abcd +\= Expect no match + 1:2:3:4:5:6:7:8 + a123:bce:ddde:9999:b342::324e:dcba:abcd + a123::9999:b342::324e:dcba:abcd + abcde:2:3:4:5:6:7:8 + ::1 + abcd:fee0:123:: + :1 + 1: + +/[z\Qa-d]\E]/ + z + a + - + d + ] +\= Expect no match + b + +/(a+)*b/ +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(?i)reg(?:ul(?:[aä]|ae)r|ex)/ + REGular + regulaer + Regex + regulär + +/Åæåä[à-ÿÀ-ß]+/ + Åæåäà + Åæåäÿ + ÅæåäÀ + Åæåäß + +/(?<=Z)X./ + \x84XAZXB + +/ab cd (?x) de fg/ + ab cd defg + +/ab cd(?x) de fg/ + ab cddefg +\= Expect no match + abcddefg + +/(?a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(?:a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\Z/g + abc\n + +/^(?s)(?>.*)(?(a))b|(a)c/ + ac + +/(?=(a))ab|(a)c/ + ac + +/((?>(a))b|(a)c)/ + ac + +/((?>(a))b|(a)c)++/ + ac + +/(?:(?>(a))b|(a)c)++/ + ac + +/(?=(?>(a))b|(a)c)(..)/ + ac + +/(?>(?>(a))b|(a)c)/ + ac + +/(?:(?>([ab])))+a=/aftertext + =ba= + +/(?>([ab]))+a=/aftertext + =ba= + +/((?>(a+)b)+(aabab))/ + aaaabaaabaabab + +/(?>a+|ab)+?c/ +\= Expect no match + aabc + +/(?>a+|ab)+c/ +\= Expect no match + aabc + +/(?:a+|ab)+c/ + aabc + +/(?(?=(a))a)/ + a + +/(?(?=(a))a)(b)/ + ab + +/^(?:a|ab)++c/ +\= Expect no match + aaaabc + +/^(?>a|ab)++c/ +\= Expect no match + aaaabc + +/^(?:a|ab)+c/ + aaaabc + +/(?=abc){3}abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc)+abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc)++abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc){0}xyz/ + xyz + +/(?=abc){1}xyz/ +\= Expect no match + xyz + +/(?=(a))?./ + ab + bc + +/(?=(a))??./ + ab + bc + +/^(?=(?1))?[az]([abc])d/ + abd + zcdxx + +/^(?!a){0}\w+/ + aaaaa + +/(?<=(abc))?xyz/ + abcxyz + pqrxyz + +/^[\g]+/ + ggg<<>> +\= Expect no match + \\ga + +/^[\ga]+/ + gggagagaxyz + +/^[:a[:digit:]]+/ + aaaa444:::Z + +/^[:a[:digit:]:b]+/ + aaaa444:::bbbZ + +/[:a]xxx[b:]/ + :xxx: + +/(?<=a{2})b/i + xaabc +\= Expect no match + xabc + +/(?XNNNYZ + > X NYQZ +\= Expect no match + >XYZ + > X NY Z + +/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/ + >XY\x0aZ\x0aA\x0bNN\x0c + >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + +/(foo)\Kbar/ + foobar + +/(foo)(\Kbar|baz)/ + foobar + foobaz + +/(foo\Kbar)baz/ + foobarbaz + +/abc\K|def\K/g,aftertext + Xabcdefghi + +/ab\Kc|de\Kf/g,aftertext + Xabcdefghi + +/(?=C)/g,aftertext + ABCDECBA + +/^abc\K/aftertext + abcdef +\= Expect no match + defabcxyz + +/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-2}Z/ + ababababbbabZXXXX + +/(?tom|bon)-\g{A}/ + tom-tom + bon-bon + +/(^(a|b\g{-1}))/ +\= Expect no match + bacxxx + +/(?|(abc)|(xyz))\1/ + abcabc + xyzxyz +\= Expect no match + abcxyz + xyzabc + +/(?|(abc)|(xyz))(?1)/ + abcabc + xyzabc +\= Expect no match + xyzxyz + +/^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/ + XYabcdY + +/^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)/ + XYabcdY + +/^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)/ + XYabcdY + +/(?'abc'\w+):\k{2}/ + a:aaxyz + ab:ababxyz +\= Expect no match + a:axyz + ab:abxyz + +/(?'abc'\w+):\g{abc}{2}/ + a:aaxyz + ab:ababxyz +\= Expect no match + a:axyz + ab:abxyz + +/^(?a)? (?()b|c) (?('ab')d|e)/x + abd + ce + +/^(a.)\g-1Z/ + aXaXZ + +/^(a.)\g{-1}Z/ + aXaXZ + +/^(?(DEFINE) (? a) (? b) ) (?&A) (?&B) /x + abcd + +/(?(?:(?:a(?&all))|(b))(c?))/ + aabc + +/(a(b)|(c))(?1)/ + abc + cab + +/(?1)(a(b)|(c))/ + abc + cab + +/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x + metcalfe 33 + +/(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + 1.2.3.4 + 131.111.10.206 + 10.0.0.0 +\= Expect no match + 10.6 + 455.3.4.5 + +/\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + 1.2.3.4 + 131.111.10.206 + 10.0.0.0 +\= Expect no match + 10.6 + 455.3.4.5 + +/^(\w++|\s++)*$/ + now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! + +/(\d++)(\w)/ + 12345a +\= Expect no match + 12345+ + +/a++b/ + aaab + +/(a++b)/ + aaab + +/(a++)b/ + aaab + +/([^()]++|\([^()]*\))+/ + ((abc(ade)ufh()()x + +/\(([^()]++|\([^()]+\))+\)/ + (abc) + (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/^([^()]|\((?1)*\))*$/ + abc + a(b)c + a(b(c))d +\= Expect no match) + a(b(c)d + +/^>abc>([^()]|\((?1)*\))*abc>123abc>1(2)3abc>(1(2)3)]*+) | (?2)) * >))/x + <> + + hij> + hij> + def> + +\= Expect no match + a)(?<=b(?&X))/ + baz + +/^(?|(abc)|(def))\1/ + abcabc + defdef +\= Expect no match + abcdef + defabc + +/^(?|(abc)|(def))(?1)/ + abcabc + defabc +\= Expect no match + defdef + abcdef + +/(?:a(? (?')|(?")) |b(? (?')|(?")) ) (?('quote')[a-z]+|[0-9]+)/x,dupnames + a\"aaaaa + b\"aaaaa +\= Expect no match + b\"11111 + +/(?:(?1)|B)(A(*F)|C)/ + ABCD + CCD +\= Expect no match + CAD + +/^(?:(?1)|B)(A(*F)|C)/ + CCD + BCD +\= Expect no match + ABCD + CAD + BAD + +/(?:(?1)|B)(A(*ACCEPT)XX|C)D/ + AAD + ACD + BAD + BCD + BAX +\= Expect no match + ACX + ABC + +/(?(DEFINE)(A))B(?1)C/ + BAC + +/(?(DEFINE)((A)\2))B(?1)C/ + BAAC + +/(? \( ( [^()]++ | (?&pn) )* \) )/x + (ab(cd)ef) + +/^(?=a(*SKIP)b|ac)/ +\= Expect no match + ac + +/^(?=a(*PRUNE)b)/ + ab +\= Expect no match + ac + +/^(?=a(*ACCEPT)b)/ + ac + +/(?>a\Kb)/ + ab + +/((?>a\Kb))/ + ab + +/(a\Kb)/ + ab + +/^a\Kcz|ac/ + ac + +/(?>a\Kbz|ab)/ + ab + +/^(?&t)(?(DEFINE)(?a\Kb))$/ + ab + +/^([^()]|\((?1)*\))*$/ + a(b)c + a(b(c)d)e + +/(?P(?P0)(?P>L1)|(?P>L2))/ + 0 + 00 + 0000 + +/(?P(?P0)|(?P>L2)(?P>L1))/ + 0 + 00 + 0000 + +# This one does fail, as expected, in Perl. It needs the complex item at the +# end of the pattern. A single letter instead of (B|D) makes it not fail, which +# I think is a Perl bug. + +/A(*COMMIT)(B|D)/ +\= Expect no match + ACABX + +# Check the use of names for failure + +/^(A(*PRUNE:A)B|C(*PRUNE:B)D)/mark +\= Expect no match + AC + CB + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C +\= Expect no match + D + +/^(A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB + +/^(?:A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB + +/^(?>A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB + +# This should succeed, as the skip causes bump to offset 1 (the mark). Note +# that we have to have something complicated such as (B|Z) at the end because, +# for Perl, a simple character somehow causes an unwanted optimization to mess +# with the handling of backtracking verbs. + +/A(*MARK:A)A+(*SKIP:A)(B|Z) | AC/x,mark + AAAC + +# Test skipping over a non-matching mark. + +/A(*MARK:A)A+(*MARK:B)(*SKIP:A)(B|Z) | AC/x,mark + AAAC + +# Check shorthand for MARK. + +/A(*:A)A+(*SKIP:A)(B|Z) | AC/x,mark + AAAC + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC + +# This should succeed, as a non-existent skip name disables the skip. + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/x,mark + AAAC + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/x,mark + AAAC + +# COMMIT at the start of a pattern should act like an anchor. Again, however, +# we need the complication for Perl. + +/(*COMMIT)(A|P)(B|P)(C|P)/ + ABCDEFG +\= Expect no match + DEFGABC + +# COMMIT inside an atomic group can't stop backtracking over the group. + +/(\w+)(?>b(*COMMIT))\w{2}/ + abbb + +/(\w+)b(*COMMIT)\w{2}/ +\= Expect no match + abbb + +# Check opening parens in comment when seeking forward reference. + +/(?&t)(?#()(?(DEFINE)(?a))/ + bac + +# COMMIT should override THEN. + +/(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ +\= Expect no match + yes + +/(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ +\= Expect no match + yes + +/b?(*SKIP)c/ + bc + abc + +/(*SKIP)bc/ +\= Expect no match + a + +/(*SKIP)b/ +\= Expect no match + a + +/(?P(?P=abn)xxx|)+/ + xxx + +/(?i:([^b]))(?1)/ + aa + aA +\= Expect no match + ab + aB + Ba + ba + +/^(?&t)*+(?(DEFINE)(?a))\w$/ + aaaaaaX +\= Expect no match + aaaaaa + +/^(?&t)*(?(DEFINE)(?a))\w$/ + aaaaaaX + aaaaaa + +/^(a)*+(\w)/ + aaaaX + YZ +\= Expect no match + aaaa + +/^(?:a)*+(\w)/ + aaaaX + YZ +\= Expect no match + aaaa + +/^(a)++(\w)/ + aaaaX +\= Expect no match + aaaa + YZ + +/^(?:a)++(\w)/ + aaaaX +\= Expect no match + aaaa + YZ + +/^(a)?+(\w)/ + aaaaX + YZ + +/^(?:a)?+(\w)/ + aaaaX + YZ + +/^(a){2,}+(\w)/ + aaaaX +\= Expect no match + aaa + YZ + +/^(?:a){2,}+(\w)/ + aaaaX +\= Expect no match + aaa + YZ + +/(a|)*(?1)b/ + b + ab + aab + +/(a)++(?1)b/ +\= Expect no match + ab + aab + +/(a)*+(?1)b/ +\= Expect no match + ab + aab + +/(?1)(?:(b)){0}/ + b + +/(foo ( \( ((?:(?> [^()]+ )|(?2))*) \) ) )/x + foo(bar(baz)+baz(bop)) + +/(A (A|B(*ACCEPT)|C) D)(E)/x + AB + +/\A.*?(a|bc)/ + ba + +/\A.*?(?:a|bc)++/ + ba + +/\A.*?(a|bc)++/ + ba + +/\A.*?(?:a|bc|d)/ + ba + +/(?:(b))++/ + beetle + +/(?(?=(a(*ACCEPT)z))a)/ + a + +/^(a)(?1)+ab/ + aaaab + +/^(a)(?1)++ab/ +\= Expect no match + aaaab + +/^(?=a(*:M))aZ/mark + aZbc + +/^(?!(*:M)b)aZ/mark + aZbc + +/(?(DEFINE)(a))?b(?1)/ + backgammon + +/^\N+/ + abc\ndef + +/^\N{1,}/ + abc\ndef + +/(?(R)a+|(?R)b)/ + aaaabcde + +/(?(R)a+|((?R))b)/ + aaaabcde + +/((?(R)a+|(?1)b))/ + aaaabcde + +/((?(R1)a+|(?1)b))/ + aaaabcde + +/((?(R)a|(?1)))*/ + aaa + +/((?(R)a|(?1)))+/ + aaa + +/a(*:any +name)/mark + abc + +/(?>(?&t)c|(?&t))(?(DEFINE)(?a|b(*PRUNE)c))/ + a + ba + bba + +# Checking revised (*THEN) handling. + +# Capture + +/^.*? (a(*THEN)b) c/x +\= Expect no match + aabc + +/^.*? (a(*THEN)b|(*F)) c/x + aabc + +/^.*? ( (a(*THEN)b) | (*F) ) c/x + aabc + +/^.*? ( (a(*THEN)b) ) c/x +\= Expect no match + aabc + +# Non-capture + +/^.*? (?:a(*THEN)b) c/x +\= Expect no match + aabc + +/^.*? (?:a(*THEN)b|(*F)) c/x + aabc + +/^.*? (?: (?:a(*THEN)b) | (*F) ) c/x + aabc + +/^.*? (?: (?:a(*THEN)b) ) c/x +\= Expect no match + aabc + +# Atomic + +/^.*? (?>a(*THEN)b) c/x +\= Expect no match + aabc + +/^.*? (?>a(*THEN)b|(*F)) c/x + aabc + +/^.*? (?> (?>a(*THEN)b) | (*F) ) c/x + aabc + +/^.*? (?> (?>a(*THEN)b) ) c/x +\= Expect no match + aabc + +# Possessive capture + +/^.*? (a(*THEN)b)++ c/x +\= Expect no match + aabc + +/^.*? (a(*THEN)b|(*F))++ c/x + aabc + +/^.*? ( (a(*THEN)b)++ | (*F) )++ c/x + aabc + +/^.*? ( (a(*THEN)b)++ )++ c/x +\= Expect no match + aabc + +# Possessive non-capture + +/^.*? (?:a(*THEN)b)++ c/x +\= Expect no match + aabc + +/^.*? (?:a(*THEN)b|(*F))++ c/x + aabc + +/^.*? (?: (?:a(*THEN)b)++ | (*F) )++ c/x + aabc + +/^.*? (?: (?:a(*THEN)b)++ )++ c/x +\= Expect no match + aabc + +# Condition assertion + +/^(?(?=a(*THEN)b)ab|ac)/ + ac + +# Condition + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba + +/^.*?(?:(?(?=a)a|b(*THEN)c)|d)/ + ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac + +# Assertion + +/^.*(?=a(*THEN)b)/ + aabc + +# -------------------------- + +/(?>a(*:m))/imsx,mark + a + +/(?>(a)(*:m))/imsx,mark + a + +/(?<=a(*ACCEPT)b)c/ + xacd + +/(?<=(a(*ACCEPT)b))c/ + xacd + +/(?<=(a(*COMMIT)b))c/ + xabcd +\= Expect no match + xacd + +/(?a?)*)*c/ + aac + +/(?>.*?a)(?<=ba)/ + aba + +/(?:.*?a)(?<=ba)/ + aba + +/(?>.*?a)b/s + aab + +/(?>.*?a)b/ + aab + +/(?>^a)b/s +\= Expect no match + aab + +/(?>.*?)(?<=(abcd)|(wxyz))/ + alphabetabcd + endingwxyz + +/(?>.*)(?<=(abcd)|(wxyz))/ + alphabetabcd + endingwxyz + +/(?>.*)foo/ +\= Expect no match + abcdfooxyz + +/(?>.*?)foo/ + abcdfooxyz + +/(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/ + ac + +/(?:(a(*SKIP)b)){0}(?:(?1)|ac)/ + ac + +/(?<=(*SKIP)ac)a/ +\= Expect no match + aa + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/x,mark + AAAC + +/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/mark + acacd + +/A(*SKIP:m)x|A(*SKIP:n)x|AB/mark + AB + +/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/mark + acacd + +# Tests that try to figure out how Perl works. My hypothesis is that the first +# verb that is backtracked onto is the one that acts. This seems to be the case +# almost all the time, but there is one exception that is perhaps a bug. + +# This matches "aaaac"; each PRUNE advances one character until the subject no +# longer starts with 5 'a's. + +/aaaaa(*PRUNE)b|a+c/ + aaaaaac + +# Putting SKIP in front of PRUNE makes no difference, as it is never +# backtracked onto, whether or not it has a label. + +/aaaaa(*SKIP)(*PRUNE)b|a+c/ + aaaaaac + +/aaaaa(*SKIP:N)(*PRUNE)b|a+c/ + aaaaaac + +/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/ + aaaaaac + +# Putting THEN in front makes no difference. + +/aaaaa(*THEN)(*PRUNE)b|a+c/ + aaaaaac + +# However, putting COMMIT in front of the prune changes it to "no match". I +# think this is inconsistent and possibly a bug. For the moment, running this +# test is moved out of the Perl-compatible file. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + +# OK, lets play the same game again using SKIP instead of PRUNE. + +# This matches "ac" because SKIP forces the next match to start on the +# sixth "a". + +/aaaaa(*SKIP)b|a+c/ + aaaaaac + +# Putting PRUNE in front makes no difference. + +/aaaaa(*PRUNE)(*SKIP)b|a+c/ + aaaaaac + +# Putting THEN in front makes no difference. + +/aaaaa(*THEN)(*SKIP)b|a+c/ + aaaaaac + +# In this case, neither does COMMIT. This still matches "ac". + +/aaaaa(*COMMIT)(*SKIP)b|a+c/ + aaaaaac + +# This gives "no match", as expected. + +/aaaaa(*COMMIT)b|a+c/ +\= Expect no match + aaaaaac + +# ---- Tests using THEN ---- + +# This matches "aaaaaac", as expected. + +/aaaaa(*THEN)b|a+c/ + aaaaaac + +# Putting SKIP in front makes no difference. + +/aaaaa(*SKIP)(*THEN)b|a+c/ + aaaaaac + +# Putting PRUNE in front makes no difference. + +/aaaaa(*PRUNE)(*THEN)b|a+c/ + aaaaaac + +# Putting COMMIT in front makes no difference. + +/aaaaa(*COMMIT)(*THEN)b|a+c/ + aaaaaac + +# End of "priority" tests + +/aaaaa(*:m)(*PRUNE:m)(*SKIP:m)m|a+/ + aaaaaa + +/aaaaa(*:m)(*MARK:m)(*PRUNE)(*SKIP:m)m|a+/ + aaaaaa + +/aaaaa(*:n)(*PRUNE:m)(*SKIP:m)m|a+/ + aaaaaa + +/aaaaa(*:n)(*MARK:m)(*PRUNE)(*SKIP:m)m|a+/ + aaaaaa + +/a(*MARK:A)aa(*PRUNE:A)a(*SKIP:A)b|a+c/ + aaaac + +/a(*MARK:A)aa(*MARK:A)a(*SKIP:A)b|a+c/ + aaaac + +/aaa(*PRUNE:A)a(*SKIP:A)b|a+c/ + aaaac + +/aaa(*MARK:A)a(*SKIP:A)b|a+c/ + aaaac + +/a(*:m)a(*COMMIT)(*SKIP:m)b|a+c/mark + aaaaaac + +/.?(a|b(*THEN)c)/ + ba + +/(a(*COMMIT)b)c|abd/ + abc +\= Expect no match + abd + +/(?=a(*COMMIT)b)abc|abd/ + abc + abd + +/(?>a(*COMMIT)b)c|abd/ + abc + abd + +/a(?=b(*COMMIT)c)[^d]|abd/ + abc +\= Expect no match + abd + +/a(?=bc).|abd/ + abd + abc + +/a(?>b(*COMMIT)c)d|abd/ +\= Expect no match + abceabd + +/a(?>bc)d|abd/ + abceabd + +/(?>a(*COMMIT)b)c|abd/ + abd + +/(?>a(*COMMIT)c)d|abd/ +\= Expect no match + abd + +/((?=a(*COMMIT)b)ab|ac){0}(?:(?1)|a(c))/ + ac + +# These tests were formerly in test 2, but changes in PCRE and Perl have +# made them compatible. + +/^(a)?(?(1)a|b)+$/ +\= Expect no match + a + +/A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark +\= Expect no match + AAAC + +/^((abc|abcx)(*THEN)y|abcd)/ + abcd +\= Expect no match + abcxy + +/^((yes|no)(*THEN)(*F))?/ +\= Expect no match + yes + +/(A (.*) C? (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) C? (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) C? (*THEN) | A D) \s* (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) C? (*THEN) | A D) \s* z/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) (?:C|) (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) (?:C|) (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) C{0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) C{0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) (CE){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCEBefgBhiBqz + +/(A (.*) (CE){0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCEBefgBhiBqz + +/(A (.*) (CE*){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz + +/(A (.*) (CE*){0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz + +/(?=a(*COMMIT)b|ac)ac|ac/ +\= Expect no match + ac + +/(?=a(*COMMIT)b|(ac)) ac | (a)c/x +\= Expect no match + ac + +# ---- + +/(?(?!b(*THEN)a)bn|bnn)/ + bnn + +/(?!b(*SKIP)a)bn|bnn/ + bnn + +/(?(?!b(*SKIP)a)bn|bnn)/ + bnn + +/(?!b(*PRUNE)a)bn|bnn/ + bnn + +/(?(?!b(*PRUNE)a)bn|bnn)/ + bnn + +/(?!b(*COMMIT)a)bn|bnn/ + bnn + +/(?(?!b(*COMMIT)a)bn|bnn)/ + bnn + +/(?=b(*SKIP)a)bn|bnn/ +\= Expect no match + bnn + +/(?=b(*THEN)a)bn|bnn/ + bnn + +/^(?!a(*SKIP)b)/ + ac + +/^(?!a(*SKIP)b)../ + acd + +/(?!a(*SKIP)b)../ + acd + +/^(?(?!a(*SKIP)b))/ + ac + +/^(?!a(*PRUNE)b)../ + acd + +/(?!a(*PRUNE)b)../ + acd + +/(?!a(*COMMIT)b)ac|cd/ + ac + +/\A.*?(?:a|bc)/ + ba + +/^(A(*THEN)B|C(*THEN)D)/ + CD + +/(*:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + +/(*PRUNE:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + +/(*SKIP:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + +/(*THEN:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + +/^\d*\w{4}/ + 1234 +\= Expect no match + 123 + +/^[^b]*\w{4}/ + aaaa +\= Expect no match + aaa + +/^[^b]*\w{4}/i + aaaa +\= Expect no match + aaa + +/^a*\w{4}/ + aaaa +\= Expect no match + aaa + +/^a*\w{4}/i + aaaa +\= Expect no match + aaa + +/(?:(?foo)|(?bar))\k/dupnames + foofoo + barbar + +/(?A)(?:(?foo)|(?bar))\k/dupnames + AfooA + AbarA +\= Expect no match + Afoofoo + Abarbar + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + +/^ (?:(?A)|(?'B'B)(?A)) (?('A')x) (?()y)$/x,dupnames + Ax + BAxy + +/^A\xZ/ + A\0Z + +/^A\o{123}B/ + A\123B + +/ ^ a + + b $ /x + aaaab + +/ ^ a + #comment + + b $ /x + aaaab + +/ ^ a + #comment + #comment + + b $ /x + aaaab + +/ ^ (?> a + ) b $ /x + aaaab + +/ ^ ( a + ) + + \w $ /x + aaaab + +/(?:a\Kb)*+/aftertext + ababc + +/(?>a\Kb)*/aftertext + ababc + +/(?:a\Kb)*/aftertext + ababc + +/(a\Kb)*+/aftertext + ababc + +/(a\Kb)*/aftertext + ababc + +/(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/ +\= Expect no match + acb + +/\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +/\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +/\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +/\A([^\"1]++|[\"2]([^\"3]*+|[\"4][\"5])*+[\"6])++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +/^\w+(?>\s*)(?<=\w)/ + test test + +/(?Pa)(?Pb)/g,dupnames + abbaba + +/(?Pa)(?Pb)(?P=same)/g,dupnames + abbaba + +/(?P=same)?(?Pa)(?Pb)/g,dupnames + abbaba + +/(?:(?P=same)?(?:(?P=same)(?Pa)(?P=same)|(?P=same)?(?Pb)(?P=same)){2}(?P=same)(?Pc)(?P=same)){2}(?Pz)?/g,dupnames +\= Expect no match + bbbaaaccccaaabbbcc + +/(?Pa)?(?Pb)?(?()c|d)*l/ + acl + bdl + adl + bcl + +/\sabc/ + \x{0b}abc + +/[\Qa]\E]+/ + aa]] + +/[\Q]a\E]+/ + aa]] + +/A((((((((a))))))))\8B/ + AaaB + +/A(((((((((a)))))))))\9B/ + AaaB + +/A[\8\9]B/ + A8B + A9B + +/(|ab)*?d/ + abd + xyd + +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + +/(\2|a)(\1)/ + aaa + +/(\2)(\1)/ + +/Z*(|d*){216}/ + +/(?1)(?#?'){8}(a)/ + baaaaaaaaac + +/((((((((((((x))))))))))))\12/ + xx + +/A[\8]B[\9]C/ + A8B9C + +/(?1)()((((((\1++))\x85)+)|))/ + \x85\x85 + +/(?|(\k'Pm')|(?'Pm'))/ + abcd + +/(?|(aaa)|(b))\g{1}/ + aaaaaa + bb + +/(?|(aaa)|(b))(?1)/ + aaaaaa + baaa +\= Expect no match + bb + +/(?|(aaa)|(b))/ + xaaa + xbc + +/(?|(?'a'aaa)|(?'a'b))\k'a'/ + aaaaaa + bb + +/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames + aaaccccaaa + bccccb + +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + +/(?<=a(B){0}c)X/ + acX + +/(?b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb +\= Expect no match + baaab + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + +/(?=.*X)X$/ + \ X + +/(?s)(?=.*?)b/ + aabc + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + +/a+(?:|b)a/ + aaaa + +/X?(R||){3335}/ + +/(?1)(A(*COMMIT)|B)D/ + ABD + XABD + BAD + ABXABD +\= Expect no match + ABX + +/(?(DEFINE)(? 1? (?=(?2)?) 1 2 (?('cond')|3))) + \A + () + (?&m) + \Z/x + 123 + +/^(?: +(?: A| (1? (?=(?2)?) (1) 2 (?('cond')|3)) ) +(Z) +)+$/x + AZ123Z +\= Expect no match + AZ12Z + +/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x + acb +\= Expect no match + aab + +/(?>ab|abab){1,5}?M/ + abababababababababababM + +/(?>ab|abab){2}?M/ + abababM + +/((?(?=(a))a)+k)/ + bbak + +/((?(?=(a))a|)+k)/ + bbak + +/(?(?!(b))a|b)+k/ + ababbalbbadabak + +/(?!(b))c|b/ + Ab + Ac + +/(?=(b))b|c/ + Ab + Ac + +/^(.|(.)(?1)\2)$/ + a + aba + abcba + ababa + abcdcba + +/^((.)(?1)\2|.?)$/ + a + aba + abba + abcba + ababa + abccba + abcdcba + abcddcba + +/^(.)(\1|a(?2))/ + bab + +/^(.|(.)(?1)?\2)$/ + abcba + +/^(?(?=(a))abc|def)/ + abc + +/^(?(?!(a))def|abc)/ + abc + +/^(?(?=(a)(*ACCEPT))abc|def)/ + abc + +/^(?(?!(a)(*ACCEPT))def|abc)/ + abc + +/^(?1)\d{3}(a)/ + a123a + +# This pattern uses a lot of named subpatterns in order to match email +# addresses in various formats. It's a heavy test for named subpatterns. In the +# group, slash is coded as \x{2f} so that this pattern can also be +# processed by perltest.sh, which does not cater for an escaped delimiter +# within the pattern. $ within the pattern must also be escaped. All $ and @ +# characters in subject strings are escaped so that Perl doesn't interpret them +# as variable insertions and " characters must also be escaped for Perl. + +# This set of subpatterns is more or less a direct transliteration of the BNF +# definitions in RFC2822, without any of the obsolete features. The addition of +# a possessive + to the definition of reduced the match limit in PCRE2 +# from over 5 million to just under 400, and eliminated a very noticeable delay +# when this file was passed to perltest.sh. + +/(?ix)(?(DEFINE) +(? (?&local_part) \@ (?&domain) ) +(? (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ ) +(? [a-z\d!#\$%&'*+-\x{2f}=?^_`{|}~] ) +(? (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ ) +(? (?&ctext) | (?"ed_pair) | (?&comment) ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] ) +(? \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) ) +(? (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive) + (?: (?&FWS)?+ (?&comment) | (?&FWS) ) ) +(? (?&dtext) | (?"ed_pair) ) +(? (?&phrase) ) +(? (?&dot_atom) | (?&domain_literal) ) +(? (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \] + (?&CFWS)?+ ) +(? (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ ) +(? (?&atext)++ (?: \. (?&atext)++)*+ ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] ) +(? (?: [\t\ ]*+ \n)?+ [\t\ ]++ ) +(? (?&dot_atom) | (?"ed_string) ) +(? (?&name_addr) | (?&addr_spec) ) +(? (?&display_name)? (?&angle_addr) ) +(? (?&word)++ ) +(? (?&qtext) | (?"ed_pair) ) +(? " (?&text) ) +(? (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ " + (?&CFWS)?+ ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] ) +(? [^\r\n] ) +(? (?&atom) | (?"ed_string) ) +) # End DEFINE +^(?&mailbox)$/ + Alan Other + + user\@dom.ain + user\@[] + user\@[domain literal] + user\@[domain literal with \"[square brackets\"] inside] + \"A. Other\" (a comment) + A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay +\= Expect no match + A missing angle (?&simple_assertion) | (?&lookaround) ) + +(? \( \? > (?®ex) \) ) + +(? \\ \d+ | + \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | + \\k <(?&groupname)> | + \\k '(?&groupname)' | + \\k \{ (?&groupname) \} | + \( \? P= (?&groupname) \) ) + +(? (?:(?&assertion) | + (?&callout) | + (?&comment) | + (?&option_setting) | + (?&qualified_item) | + (?"ed_string) | + (?"ed_string_empty) | + (?&special_escape) | + (?&verb) + )* ) + +(? \(\?C (?: \d+ | + (?: (?["'`^%\#\$]) + (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | + \{ (?: \}\} | [^}]*+ )* \} ) + )? \) ) + +(? \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? + (?®ex) \) ) + +(? \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] ) + +(? (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] ) + +(? (?: \[ : (?: + alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| + punct|space|upper|word|xdigit + ) : \] | + (?"ed_string) | + (?"ed_string_empty) | + (?&escaped_character) | + (?&character_type) | + [^]] ) ) + +(? \(\?\# [^)]* \) | (?"ed_string_empty) | \\E ) + +(? (?: \( [+-]? \d+ \) | + \( < (?&groupname) > \) | + \( ' (?&groupname) ' \) | + \( R \d* \) | + \( R & (?&groupname) \) | + \( (?&groupname) \) | + \( DEFINE \) | + \( VERSION >?=\d+(?:\.\d\d?)? \) | + (?&callout)?+ (?&comment)* (?&lookaround) ) ) + +(? \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) ) + +(? (? [-\x{2f}!"'`=_:;,%&@~]) (?®ex) + \k'delimiter' .* ) + +(? \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | + x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | + [aefnrt] | c[[:print:]] | + [^[:alnum:]] ) ) + +(? (?&capturing_group) | (?&non_capturing_group) | + (?&resetting_group) | (?&atomic_group) | + (?&conditional_group) ) + +(? [a-zA-Z_]\w* ) + +(? (?! (?&range_qualifier) ) [^[()|*+?.\$\\] ) + +(? \(\? (?: = | ! | <= | \(\? [iJmnsUx-]* : (?®ex) \) ) + +(? \(\? [iJmnsUx-]* \) ) + +(? (?:\. | + (?&lookaround) | + (?&back_reference) | + (?&character_class) | + (?&character_type) | + (?&escaped_character) | + (?&group) | + (?&subroutine_call) | + (?&literal_character) | + (?"ed_string) + ) (?&comment)? (?&qualifier)? ) + +(? (?: [?*+] | (?&range_qualifier) ) [+?]? ) + +(? (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) + +(? \\Q\\E ) + +(? \{ (?: \d+ (?: , \d* )? | , \d+ ) \} ) + +(? (?&start_item)* (?&branch) (?: \| (?&branch) )* ) + +(? \( \? \| (?®ex) \) ) + +(? \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z ) + +(? \\K ) + +(? \( \* (?: + ANY | + ANYCRLF | + BSR_ANYCRLF | + BSR_UNICODE | + CR | + CRLF | + LF | + LIMIT_MATCH=\d+ | + LIMIT_DEPTH=\d+ | + LIMIT_HEAP=\d+ | + NOTEMPTY | + NOTEMPTY_ATSTART | + NO_AUTO_POSSESS | + NO_DOTSTAR_ANCHOR | + NO_JIT | + NO_START_OPT | + NUL | + UTF | + UCP ) \) ) + +(? (?: \(\?R\) | \(\?[+-]?\d+\) | + \(\? (?: & | P> ) (?&groupname) \) | + \\g < (?&groupname) > | + \\g ' (?&groupname) ' | + \\g < [+-]? \d+ > | + \\g ' [+-]? \d+ ) ) + +(? \(\* (?: ACCEPT | FAIL | F | COMMIT | + (?:MARK)?:(?&verbname) | + (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) ) + +(? [^)]+ ) + +) # End DEFINE +# Kick it all off... +^(?&delimited_regex)$/subject_literal,jitstack=256 + /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + /]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + /^(?(DEFINE) (?
a) (? b) ) (?&A) (?&B) / + /(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + /^(\w++|\s++)*$/ + /a+b?(*THEN)c+(*FAIL)/ + /(A (A|B(*ACCEPT)|C) D)(E)/x + /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + /(?sx)(?(DEFINE)(? (?&simple_assertion) | (?&lookaround) )(? \( \? > (?®ex) \) )(? \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(? (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(? \(\?C (?: \d+ | (?: (?["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(? \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(? \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(? (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(? (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(? \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(? (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(? \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(? (? [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(? \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(? (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(? [a-zA-Z_]\w* )(? (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(? \(\? (?: = | ! | <= | \(\? [iJmnsUx-]* : (?®ex) \) )(? \(\? [iJmnsUx-]* \) )(? (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(? (?: [?*+] | (?&range_qualifier) ) [+?]? )(? (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (? \\Q\\E ) (? \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(? (?&start_item)* (?&branch) (?: \| (?&branch) )* )(? \( \? \| (?®ex) \) )(? \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(? \\K )(? \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(? (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(? \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(? [^)]+ ))^(?&delimited_regex)$/ +\= Expect no match + /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + /(?:(?(2y)a|b)(X))+/ + /a(*MARK)b/ + /a(*CR)b/ + /(?P(?P=abn)(?/xx + < > + +/<(?:[a b])>/xx + < > + +/<(?xxx:[a b])>/ + < > + +/<(?-x:[a b])>/xx + < > + +/[[:digit:]-]+/ + 12-24 + +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/ + aa + a + +/^(a?)b(?1)a/ + abaa + aba + baa + ba + +/^(a?)+b(?1)a/ + abaa + aba + baa + ba + +/^(a?)++b(?1)a/ + abaa + aba + baa + ba + +/^(a?)+b/ + b + ab + aaab + +/(?=a+)a(a+)++b/ + aab + +/(?<=\G.)/g,aftertext + abc + +/(?<=(?=.)?)/ + +/(?<=(?=.)?+)/ + +/(?<=(?=.)*)/ + +/(?<=(?=.){4,5})/ + +/(?<=(?=.){4,5}x)/ + +/a(?=.(*:X))(*SKIP:X)(*F)|(.)/ + abc + +/a(?>(*:X))(*SKIP:X)(*F)|(.)/ + abc + +/a(?:(*:X))(*SKIP:X)(*F)|(.)/ + abc + +#pattern no_start_optimize + +/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/ + abc + +/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ + abc + +#subject mark + +/a(*ACCEPT:X)b/ + abc + +/(?=a(*ACCEPT:QQ)bc)axyz/ + axyz + +/(?(DEFINE)(a(*ACCEPT:X)))(?1)b/ + abc + +/a(*F:X)b/ + abc + +/(?(DEFINE)(a(*F:X)))(?1)b/ + abc + +/a(*COMMIT:X)b/ + abc + +/(?(DEFINE)(a(*COMMIT:X)))(?1)b/ + abc + +/a+(*:Z)b(*COMMIT:X)(*SKIP:Z)c|.*/ + aaaabd + +/a+(*:Z)b(*COMMIT:X)(*SKIP:X)c|.*/ + aaaabd + +/a(*COMMIT:X)b/ + axabc + +#pattern -no_start_optimize +#subject -mark + +/(.COMMIT)(*COMMIT::::::::::interal error:::)/ + +/(*COMMIT:ÿÿ)/ + +/(*COMMIT:]w)/ + +/(?i)A(?^)B(?^x:C D)(?^i)e f/ + aBCDE F +\= Expect no match + aBCDEF + AbCDe f + +/(*pla:foo).{6}/ + abcfoobarxyz +\= Expect no match + abcfooba + +/(*positive_lookahead:foo).{6}/ + abcfoobarxyz + +/(?(*pla:foo).{6}|a..)/ + foobarbaz + abcfoobar + +/(?(*positive_lookahead:foo).{6}|a..)/ + foobarbaz + abcfoobar + +/(*plb:foo)bar/ + abcfoobar +\= Expect no match + abcbarfoo + +/(*positive_lookbehind:foo)bar/ + abcfoobar +\= Expect no match + abcbarfoo + +/(?(*plb:foo)bar|baz)/ + abcfoobar + bazfoobar + abcbazfoobar + foobazfoobar + +/(?(*positive_lookbehind:foo)bar|baz)/ + abcfoobar + bazfoobar + abcbazfoobar + foobazfoobar + +/(*nlb:foo)bar/ + abcbarfoo +\= Expect no match + abcfoobar + +/(*negative_lookbehind:foo)bar/ + abcbarfoo +\= Expect no match + abcfoobar + +/(?(*nlb:foo)bar|baz)/ + abcfoobaz + abcbarbaz +\= Expect no match + abcfoobar + +/(?(*negative_lookbehind:foo)bar|baz)/ + abcfoobaz + abcbarbaz +\= Expect no match + abcfoobar + +/(*atomic:a+)\w/ + aaab +\= Expect no match + aaaa + +/ (? \w+ )* \. /xi + pokus. + +/(?(DEFINE) (? \w+ ) ) (?&word)* \./xi + pokus. + +/(?(DEFINE) (? \w+ ) ) ( (?&word)* ) \./xi + pokus. + +/(?&word)* (?(DEFINE) (? \w+ ) ) \./xi + pokus. + +/(?&word)* \. (? \w+ )/xi + pokus.hokus + +/a(?(?=(*:2)b).)/mark + abc + acb + +/a(?(?!(*:2)b).)/mark + acb + abc + +/(?:a|ab){1}+c/ +\= Expect no match + abc + +/(a|ab){1}+c/ + abc + +/(a+){1}+a/ +\= Expect no match + aaaa + +/(?(DEFINE)(a|ab))(?1){1}+c/ + abc + +/(?:a|(?=b)|.)*\z/ + abc + +/(?:a|(?=b)|.)*/ + abc + +/(?<=a(*SKIP)x)|c/ + abcd + +/(?<=a(*SKIP)x)|d/ + abcd + +/(?<=(?=.(?<=x)))/aftertext + abx + +/(?<=(?=(?<=a)))b/ + ab + +/^(?a)(?()b)((?<=b).*)$/ + abc + +/^(a\1?){4}$/ + aaaa + aaaaaa + +/^((\1+)|\d)+133X$/ + 111133X + +/^(?>.*?([A-Z])(?!.*\1)){26}/i + The quick brown fox jumps over the lazy dog. + Jackdaws love my big sphinx of quartz. + Pack my box with five dozen liquor jugs. +\= Expect no match + The quick brown fox jumps over the lazy cat. + Hackdaws love my big sphinx of quartz. + Pack my fox with five dozen liquor jugs. + +/(?<=X(?(DEFINE)(A)))X(*F)/ +\= Expect no match + AXYZ + +/(?<=X(?(DEFINE)(A)))./ + AXYZ + +/(?<=X(?(DEFINE)(.*))Y)./ + AXYZ + +/(?<=X(?(DEFINE)(Y))(?1))./ + AXYZ + +/(?(DEFINE)(?bar))(?\x{8c}748364< + +/a{65536/ + >a{65536< + +/a\K.(?0)*/ + abac + +/(a\K.(?1)*)/ + abac + +# -------------------------------------------------------------------------- +# Perl-compatible tests of variable-length lookbehinds. + +/(?<=ab?c).../g + abcdefgacxyz + +/(?<=PQR|ab?c).../g + abcdefgacxyzPQR123 + +/(?<=ab?c|PQR).../g + abcdefgacxyzPQR123 + +/(?<=PQ|ab?c).../g + abcdefgacxyzPQR123 + +/(?<=ab?c|PQ).../g + abcdefgacxyzPQR123 + +/(?<=a(b?c|d?e?e)f)X./g + acfX1zzzaefX2zzzadeefX3zzzX4zzz + +/(?)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/ +# This is not very readable, and also does not handle all features. I have done +# some work on it. + +/^ +(? +# A regular expression is zero or more of these items. + (?: + # An item is one of these: + (?: + [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted . + \\.| # Quoted . + + \[ # Class, which is [ + (?: # Followed by + \^?\\.| # Optional ^ and any escaped character + \^[^\\]| # OR ^ and not escaped character + [^\\^] # OR neither ^ nor \ + ) # Followed by + (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot + \]| # Class ends with ] + + \( # Parenthesized group + (?: # Start with optional + \?[:=!]| # ? followed by : = ! + \?<[=!]| # OR ?< followed by = or ! + \?> # OR ?> + )? + (?&re)?? # Then a nested + \)| # End parenthesized group + + \(\? # Other parenthesized items + (?: # (? followed by + R| # R + [+-]?\d++ # Or optional +- and digits + ) + \)| # End parens + + \(\* # Verbs + (?: + COMMIT| + FAIL| + MARK:[^)]*| + (?:PRUNE|SKIP|THEN)(?::[^\)]*+)? + ) + \) + ) # End list of items + + # Followed by an optional quantifier + + (?: + (?: + [?+*] # ?+* + | # OR + \{\d+ # { digits + (?:,\d*)? # optionally followed by ,digits + \} # then closing } + | # OR + \{,\d+} # {,digits} + ) + [?+]? # optional ungreedy or possessive + )? + + | # OR an "item" is a branch ending + + \| + + )* # Zero or more top-level items. +) # End regex group. +$/x + [abcdef] + [abc\\]def] + a.b|abcd + ab()d + ab{1,3}d + ab{,3}d + ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + ab(*MARK:xyz) + (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s]) + abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz + a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2 +\= Expect no match + ab)d + ab(d + {4,5} + a[]b + (a)(?(1)a|b|c) + +/^..A(*SKIP)B|C/ + 12ADC + +/(?\x{ff}< + +/[^\xff]/IB,utf + +/\x{100}abc(xyz(?1))/IB,utf + +/\777/I,utf + \x{1ff} + \777 + +/\x{100}+\x{200}/IB,utf + +/\x{100}+X/IB,utf + +/^[\QĀ\E-\QŐ\E/B,utf + +# This tests the stricter UTF-8 check according to RFC 3629. + +/X/utf +\= Expect UTF-8 errors + \x{d800} + \x{da00} + \x{dfff} + \x{110000} + \x{2000000} + \x{7fffffff} +\= Expect no match + \x{d800}\=no_utf_check + \x{da00}\=no_utf_check + \x{dfff}\=no_utf_check + \x{110000}\=no_utf_check + \x{2000000}\=no_utf_check + \x{7fffffff}\=no_utf_check + +/(*UTF8)\x{1234}/ + abcd\x{1234}pqr + +/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I + +/\h/I,utf + ABC\x{09} + ABC\x{20} + ABC\x{a0} + ABC\x{1680} + ABC\x{180e} + ABC\x{2000} + ABC\x{202f} + ABC\x{205f} + ABC\x{3000} + +/\v/I,utf + ABC\x{0a} + ABC\x{0b} + ABC\x{0c} + ABC\x{0d} + ABC\x{85} + ABC\x{2028} + +/\h*A/I,utf + CDBABC + +/\v+A/I,utf + +/\s?xxx\s/I,utf + +/\sxxx\s/I,utf,tables=2 + AB\x{85}xxx\x{a0}XYZ + AB\x{a0}xxx\x{85}XYZ + +/\S \S/I,utf,tables=2 + \x{a2} \x{84} + A Z + +/a+/utf + a\x{123}aa\=offset=1 + a\x{123}aa\=offset=3 + a\x{123}aa\=offset=4 +\= Expect bad offset value + a\x{123}aa\=offset=6 +\= Expect bad UTF-8 offset + a\x{123}aa\=offset=2 +\= Expect no match + a\x{123}aa\=offset=5 + +/\x{1234}+/Ii,utf + +/\x{1234}+?/Ii,utf + +/\x{1234}++/Ii,utf + +/\x{1234}{2}/Ii,utf + +/[^\x{c4}]/IB,utf + +/X+\x{200}/IB,utf + +/\R/I,utf + +/\777/IB,utf + +/\w+\x{C4}/B,utf + a\x{C4}\x{C4} + +/\w+\x{C4}/B,utf,tables=2 + a\x{C4}\x{C4} + +/\W+\x{C4}/B,utf + !\x{C4} + +/\W+\x{C4}/B,utf,tables=2 + !\x{C4} + +/\W+\x{A1}/B,utf + !\x{A1} + +/\W+\x{A1}/B,utf,tables=2 + !\x{A1} + +/X\s+\x{A0}/B,utf + X\x20\x{A0}\x{A0} + +/X\s+\x{A0}/B,utf,tables=2 + X\x20\x{A0}\x{A0} + +/\S+\x{A0}/B,utf + X\x{A0}\x{A0} + +/\S+\x{A0}/B,utf,tables=2 + X\x{A0}\x{A0} + +/\x{a0}+\s!/B,utf + \x{a0}\x20! + +/\x{a0}+\s!/B,utf,tables=2 + \x{a0}\x20! + +/A/utf + \x{ff000041} + \x{7f000041} + +/(*UTF8)abc/never_utf + +/abc/utf,never_utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf + +/AB\x{1fb0}/IB,utf + +/AB\x{1fb0}/IBi,utf + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/[â±¥]/Bi,utf + +/[^â±¥]/Bi,utf + +/\h/I + +/\v/I + +/\R/I + +/[[:blank:]]/B,ucp + +/\x{212a}+/Ii,utf + KKkk\x{212a} + +/s+/Ii,utf + SSss\x{17f} + +/\x{100}*A/IB,utf + A + +/\x{100}*\d(?R)/IB,utf + +/[Z\x{100}]/IB,utf + Z\x{100} + \x{100} + \x{100}Z + +/[z-\x{100}]/IB,utf + +/[z\Qa-d]Ā\E]/IB,utf + \x{100} + Ā + +/[ab\x{100}]abc(xyz(?1))/IB,utf + +/\x{100}*\s/IB,utf + +/\x{100}*\d/IB,utf + +/\x{100}*\w/IB,utf + +/\x{100}*\D/IB,utf + +/\x{100}*\S/IB,utf + +/\x{100}*\W/IB,utf + +/[\x{105}-\x{109}]/IBi,utf + \x{104} + \x{105} + \x{109} +\= Expect no match + \x{100} + \x{10a} + +/[z-\x{100}]/IBi,utf + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} +\= Expect no match + \x{102} + Y + y + +/[z-\x{100}]/IBi,utf + +/\x{3a3}B/IBi,utf + +/abc/utf,replace=à + abc + +/(?<=(a)(?-1))x/I,utf + a\x80zx\=offset=3 + +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc +\= Expect no match + 123 + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf + +/[\s[:^ascii:]]/B,ucp + +# A special extra option allows excaped surrogate code points in 8-bit mode, +# but subjects containing them must not be UTF-checked. + +/\x{d800}/I,utf,allow_surrogate_escapes + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf + c + \x{ff} + \x{100} +\= Expect no match + aaa + +# Offsets are different in 8-bit mode. + +/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout + 123abcáyzabcdef789abcሴqr + +# Check name length with non-ASCII characters + +/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf + +/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf + +/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf + +/(?(n/utf + +/(?(á/utf + +# Invalid UTF-8 tests + +/.../g,match_invalid_utf + abcd\x80wxzy\x80pqrs + abcd\x{80}wxzy\x80pqrs + +/abc/match_invalid_utf + ab\x80ab\=ph +\= Expect no match + ab\x80cdef\=ph + +/.a/match_invalid_utf + ab\=ph + ab\=ps + b\xf0\x91\x88b\=ph + b\xf0\x91\x88b\=ps + b\xf0\x91\x88\xb4a +\= Expect no match + b\x80\=ph + b\x80\=ps + b\xf0\x91\x88\=ph + b\xf0\x91\x88\=ps + +/.a$/match_invalid_utf + ab\=ph + ab\=ps +\= Expect no match + b\xf0\x91\x98\=ph + b\xf0\x91\x98\=ps + +/ab$/match_invalid_utf + ab\x80cdeab +\= Expect no match + ab\x80cde + +/.../g,match_invalid_utf + abcd\x{80}wxzy\x80pqrs + +/(?<=x)../g,match_invalid_utf + abcd\x{80}wxzy\x80pqrs + abcd\x{80}wxzy\x80xpqrs + +/X$/match_invalid_utf +\= Expect no match + X\xc4 + +/(?<=..)X/match_invalid_utf,aftertext + AB\x80AQXYZ + AB\x80AQXYZ\=offset=5 + AB\x80\x80AXYZXC\=offset=5 +\= Expect no match + AB\x80XYZ + AB\x80XYZ\=offset=3 + AB\xfeXYZ + AB\xffXYZ\=offset=3 + AB\x80AXYZ + AB\x80AXYZ\=offset=4 + AB\x80\x80AXYZ\=offset=5 + +/.../match_invalid_utf + AB\xc4CCC +\= Expect no match + A\x{d800}B + A\x{110000}B + A\xc4B + +/\bX/match_invalid_utf + A\x80X + +/\BX/match_invalid_utf +\= Expect no match + A\x80X + +/(?<=...)X/match_invalid_utf + AAA\x80BBBXYZ +\= Expect no match + AAA\x80BXYZ + AAA\x80BBXYZ + +# ------------------------------------- + +/(*UTF)(?=\x{123})/I + +/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf + +/[󿾟,]/BI,utf + +/[\x{fff4}-\x{ffff8}]/I,utf + +/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf + +/[\xff\x{ffff}]/I,utf + +/[\xff\x{ff}]/I,utf + abc\x{ff}def + +/[\xff\x{ff}]/I + abc\x{ff}def + +/[Ss]/I + +/[Ss]/I,utf + +/(?:\x{ff}|\x{3000})/I,utf + +/x/utf + abxyz + \x80\=startchar + abc\x80\=startchar + abc\x80\=startchar,offset=3 + +/\x{c1}+\x{e1}/iIB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/a|\x{c1}/iI,ucp + \x{e1}xxx + +/a|\x{c1}/iI,utf + \x{e1}xxx + +/\x{c1}|\x{e1}/iI,ucp + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + +/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended + X\x{c1}Y + +# Without UTF or UCP characters > 127 have only one case in the default locale. + +/X(\x{e1})Y/replace=>\U$1<,substitute_extended + X\x{e1}Y + +/A/utf,match_invalid_utf,caseless + \xe5A + +/\bch\b/utf,match_invalid_utf + qchq\=ph + qchq\=ps + +/line1\nbreak/firstline,utf,match_invalid_utf + line1\nbreak + line0\nline1\nbreak + +/A\z/utf,match_invalid_utf + A\x80\x42\n + +# End of testinput10 diff --git a/testdata/testinput11 b/testdata/testinput11 new file mode 100644 index 0000000..2bc8a25 --- /dev/null +++ b/testdata/testinput11 @@ -0,0 +1,374 @@ +# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) +# features that are not compatible with the 8-bit library, or which give +# different output in 16-bit or 32-bit mode. The output for the two widths is +# different, so they have separate output files. + +#forbid_utf +#newline_default LF ANY ANYCRLF + +/[^\x{c4}]/IB + +/\x{100}/I + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/Ix + +/[\h]/B + >\x09< + +/[\h]+/B + >\x09\x20\xa0< + +/[\v]/B + +/[^\h]/B + +/\h+/I + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + +/[\h\x{dc00}]+/IB + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + +/\H+/I + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + +/[\H\x{d800}]+/ + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + +/\v+/I + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + +/[\v\x{dc00}]+/IB + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + +/\V+/I + \x{2028}\x{2029}\x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + +/[\V\x{d800}]+/ + \x{2028}\x{2029}\x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + +/\R+/I,bsr=unicode + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + +/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I + \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark + XX + +/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref + +/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref + +/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref + +/^\x{ffff}+/i + \x{ffff} + +/^\x{ffff}?/i + \x{ffff} + +/^\x{ffff}*/i + \x{ffff} + +/^\x{ffff}{3}/i + \x{ffff}\x{ffff}\x{ffff} + +/^\x{ffff}{0,3}/i + \x{ffff} + +/[^\x00-a]{12,}[^b-\xff]*/B + +/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B + +/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B + +/^[\x{1234}\x{4321}]{2,4}?/ + \x{1234}\x{1234}\x{1234} + +# Check maximum non-UTF character size for the 16-bit library. + +/\x{ffff}/ + A\x{ffff}B + +/\x{10000}/ + +/\o{20000}/ + +# Check maximum character size for the 32-bit library. These will all give +# errors in the 16-bit library. + +/\x{110000}/ + +/\x{7fffffff}/ + +/\x{80000000}/ + +/\x{ffffffff}/ + +/\x{100000000}/ + +/\o{17777777777}/ + +/\o{20000000000}/ + +/\o{37777777777}/ + +/\o{40000000000}/ + +/\x{7fffffff}\x{7fffffff}/I + +/\x{80000000}\x{80000000}/I + +/\x{ffffffff}\x{ffffffff}/I + +# Non-UTF characters + +/.{2,3}/ + \x{400000}\x{400001}\x{400002}\x{400003} + +/\x{400000}\x{800000}/IBi + +# Check character ranges + +/[\H]/IB + +/[\V]/IB + +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input + abý¿¿¿¿¿z + ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input + abÿý¿¿¿¿¿z + ab\x{ffffffff}z + +/abÿAz/utf8_input + abÿAz + ab\x{80000041}z + +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + +# End of testinput11 diff --git a/testdata/testinput12 b/testdata/testinput12 new file mode 100644 index 0000000..85550c3 --- /dev/null +++ b/testdata/testinput12 @@ -0,0 +1,606 @@ +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. + +/ÃÃÃxxx/IB,utf,no_utf_check + +/abc/utf + Ã] + +# Check maximum character size + +/\x{ffff}/IB,utf + +/\x{10000}/IB,utf + +/\x{100}/IB,utf + +/\x{1000}/IB,utf + +/\x{10000}/IB,utf + +/\x{100000}/IB,utf + +/\x{10ffff}/IB,utf + +/[\x{ff}]/IB,utf + +/[\x{100}]/IB,utf + +/\x80/IB,utf + +/\xff/IB,utf + +/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf + \x{D55c}\x{ad6d}\x{C5B4} + +/\x{65e5}\x{672c}\x{8a9e}/IB,utf + \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/IB,utf + +/\x{084}/IB,utf + +/\x{104}/IB,utf + +/\x{861}/IB,utf + +/\x{212ab}/IB,utf + +/[^ab\xC0-\xF0]/IB,utf + \x{f1} + \x{bf} + \x{100} + \x{1000} +\= Expect no match + \x{c0} + \x{f0} + +/Ā{3,4}/IB,utf + \x{100}\x{100}\x{100}\x{100\x{100} + +/(\x{100}+|x)/IB,utf + +/(\x{100}*a|x)/IB,utf + +/(\x{100}{0,2}a|x)/IB,utf + +/(\x{100}{1,2}a|x)/IB,utf + +/\x{100}/IB,utf + +/a\x{100}\x{101}*/IB,utf + +/a\x{100}\x{101}+/IB,utf + +/[^\x{c4}]/IB + +/[\x{100}]/IB,utf + \x{100} + Z\x{100} + \x{100}Z + +/[\xff]/IB,utf + >\x{ff}< + +/[^\xff]/IB,utf + +/\x{100}abc(xyz(?1))/IB,utf + +/\777/I,utf + \x{1ff} + \777 + +/\x{100}+\x{200}/IB,utf + +/\x{100}+X/IB,utf + +/^[\QĀ\E-\QŐ\E/B,utf + +/X/utf + XX\x{d800}\=no_utf_check + XX\x{da00}\=no_utf_check + XX\x{dc00}\=no_utf_check + XX\x{de00}\=no_utf_check + XX\x{dfff}\=no_utf_check +\= Expect UTF error + XX\x{d800} + XX\x{da00} + XX\x{dc00} + XX\x{de00} + XX\x{dfff} + XX\x{110000} + XX\x{d800}\x{1234} +\= Expect no match + XX\x{d800}\=offset=3 + +/(?<=.)X/utf + XX\x{d800}\=offset=3 + +/(*UTF16)\x{11234}/ + abcd\x{11234}pqr + +/(*UTF)\x{11234}/I + abcd\x{11234}pqr + +/(*UTF-32)\x{11234}/ + abcd\x{11234}pqr + +/(*UTF-32)\x{112}/ + abcd\x{11234}pqr + +/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I + +/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I + +/\h/I,utf + ABC\x{09} + ABC\x{20} + ABC\x{a0} + ABC\x{1680} + ABC\x{180e} + ABC\x{2000} + ABC\x{202f} + ABC\x{205f} + ABC\x{3000} + +/\v/I,utf + ABC\x{0a} + ABC\x{0b} + ABC\x{0c} + ABC\x{0d} + ABC\x{85} + ABC\x{2028} + +/\h*A/I,utf + CDBABC + \x{2000}ABC + +/\R*A/I,bsr=unicode,utf + CDBABC + \x{2028}A + +/\v+A/I,utf + +/\s?xxx\s/I,utf + +/\sxxx\s/I,utf,tables=2 + AB\x{85}xxx\x{a0}XYZ + AB\x{a0}xxx\x{85}XYZ + +/\S \S/I,utf,tables=2 + \x{a2} \x{84} + A Z + +/a+/utf + a\x{123}aa\=offset=1 + a\x{123}aa\=offset=2 + a\x{123}aa\=offset=3 +\= Expect no match + a\x{123}aa\=offset=4 +\= Expect bad offset error + a\x{123}aa\=offset=5 + a\x{123}aa\=offset=6 + +/\x{1234}+/Ii,utf + +/\x{1234}+?/Ii,utf + +/\x{1234}++/Ii,utf + +/\x{1234}{2}/Ii,utf + +/[^\x{c4}]/IB,utf + +/X+\x{200}/IB,utf + +/\R/I,utf + +# Check bad offset + +/a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit + \x{10000}\=offset=1 + \x{10000}ab\=offset=1 +\= Expect 16-bit match, 32-bit no match + \x{10000}ab\=offset=2 +\= Expect no match + \x{10000}ab\=offset=3 +\= Expect no match in 16-bit, bad offset in 32-bit + \x{10000}ab\=offset=4 +\= Expect bad offset + \x{10000}ab\=offset=5 + +/í¼€/utf + +/\w+\x{C4}/B,utf + a\x{C4}\x{C4} + +/\w+\x{C4}/B,utf,tables=2 + a\x{C4}\x{C4} + +/\W+\x{C4}/B,utf + !\x{C4} + +/\W+\x{C4}/B,utf,tables=2 + !\x{C4} + +/\W+\x{A1}/B,utf + !\x{A1} + +/\W+\x{A1}/B,utf,tables=2 + !\x{A1} + +/X\s+\x{A0}/B,utf + X\x20\x{A0}\x{A0} + +/X\s+\x{A0}/B,utf,tables=2 + X\x20\x{A0}\x{A0} + +/\S+\x{A0}/B,utf + X\x{A0}\x{A0} + +/\S+\x{A0}/B,utf,tables=2 + X\x{A0}\x{A0} + +/\x{a0}+\s!/B,utf + \x{a0}\x20! + +/\x{a0}+\s!/B,utf,tables=2 + \x{a0}\x20! + +/(*UTF)abc/never_utf + +/abc/utf,never_utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf + +/AB\x{1fb0}/IB,utf + +/AB\x{1fb0}/IBi,utf + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/[â±¥]/Bi,utf + +/[^â±¥]/Bi,utf + +/[[:blank:]]/B,ucp + +/\x{212a}+/Ii,utf + KKkk\x{212a} + +/s+/Ii,utf + SSss\x{17f} + +# Non-UTF characters should give errors in both 16-bit and 32-bit modes. + +/\x{110000}/utf + +/\o{4200000}/utf + +/\x{100}*A/IB,utf + A + +/\x{100}*\d(?R)/IB,utf + +/[Z\x{100}]/IB,utf + Z\x{100} + \x{100} + \x{100}Z + +/[z-\x{100}]/IB,utf + +/[z\Qa-d]Ā\E]/IB,utf + \x{100} + Ā + +/[ab\x{100}]abc(xyz(?1))/IB,utf + +/\x{100}*\s/IB,utf + +/\x{100}*\d/IB,utf + +/\x{100}*\w/IB,utf + +/\x{100}*\D/IB,utf + +/\x{100}*\S/IB,utf + +/\x{100}*\W/IB,utf + +/[\x{105}-\x{109}]/IBi,utf + \x{104} + \x{105} + \x{109} +\= Expect no match + \x{100} + \x{10a} + +/[z-\x{100}]/IBi,utf + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} +\= Expect no match + \x{102} + Y + y + +/[z-\x{100}]/IBi,utf + +/\x{3a3}B/IBi,utf + +/./utf + \x{110000} + +/(*UTF)abý¿¿¿¿¿z/B + +/abý¿¿¿¿¿z/utf + +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc + \x{100} + \x{308} +\= Expect no match + 123 + +/[\s[:^ascii:]]/B,ucp + +/\pP/ucp + \x{7fffffff} + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf + c + \x{ff} + \x{100} +\= Expect no match + aaa + +# Offsets are different in 8-bit mode. + +/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout + 123abcáyzabcdef789abcሴqr + +# A few script run tests in non-UTF mode (but they need Unicode support) + +/^(*script_run:.{4})/ + \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han + \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han + \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul + +/^(*sr:.*)/utf,allow_surrogate_escapes + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check + +/(?(n/utf + +/(?(á/utf + +# Invalid UTF-16/32 tests. + +/.../g,match_invalid_utf + abcd\x{df00}wxzy\x{df00}pqrs + abcd\x{80}wxzy\x{df00}pqrs + +/abc/match_invalid_utf + ab\x{df00}ab\=ph +\= Expect no match + ab\x{df00}cdef\=ph + +/.a/match_invalid_utf + ab\=ph + ab\=ps +\= Expect no match + b\x{df00}\=ph + b\x{df00}\=ps + +/.a$/match_invalid_utf + ab\=ph + ab\=ps +\= Expect no match + b\x{df00}\=ph + b\x{df00}\=ps + +/ab$/match_invalid_utf + ab\x{df00}cdeab +\= Expect no match + ab\x{df00}cde + +/.../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + +/(?<=x)../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + abcd\x{80}wxzy\x{df00}xpqrs + +/X$/match_invalid_utf +\= Expect no match + X\x{df00} + +/(?<=..)X/match_invalid_utf,aftertext + AB\x{df00}AQXYZ + AB\x{df00}AQXYZ\=offset=5 + AB\x{df00}\x{df00}AXYZXC\=offset=5 +\= Expect no match + AB\x{df00}XYZ + AB\x{df00}XYZ\=offset=3 + AB\x{df00}AXYZ + AB\x{df00}AXYZ\=offset=4 + AB\x{df00}\x{df00}AXYZ\=offset=5 + +/.../match_invalid_utf +\= Expect no match + A\x{d800}B + A\x{110000}B + +/aa/utf,ucp,match_invalid_utf,global + aa\x{d800}aa + +/aa/utf,ucp,match_invalid_utf,global + \x{d800}aa + +/A\z/utf,match_invalid_utf + A\x{df00}\n + +# ---------------------------------------------------- + +/(*UTF)(?=\x{123})/I + +/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf + +/[\xff\x{ffff}]/I,utf + +/[\xff\x{ff}]/I,utf + +/[\xff\x{ff}]/I + +/[Ss]/I + +/[Ss]/I,utf + +/(?:\x{ff}|\x{3000})/I,utf + +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I + +/\x{c1}/i,I,ucp + +/[\x{120}\x{121}]/iB,ucp + +/[ab\x{120}]+/iB,ucp + aABb\x{121}\x{120} + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} + +/[^\x{120}]/i + \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} + +/\x{c1}+\x{e1}/iB,ucp + \x{c1}\x{c1}\x{c1} + +/\x{c1}+\x{e1}/iIB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/a|\x{c1}/iI,ucp + \x{e1}xxx + +/\x{c1}|\x{e1}/iI,ucp + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + +/s/i,ucp + \x{17f} + +/s/i,utf + \x{17f} + +/[^s]/i,ucp +\= Expect no match + \x{17f} + +/[^s]/i,utf +\= Expect no match + \x{17f} + +# ---------------------------------------------------- + +# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This +# fails in 16-bit mode, but is OK for 32-bit. + +/\x{802a0000}*/ + \x{802a0000}\x{802a0000} + +# UTF matching without UTF, check invalid UTF characters +/\X++/ + a\x{110000}\x{ffffffff} + +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z + +# --------------------------------------------------------- + +# End of testinput12 diff --git a/testdata/testinput13 b/testdata/testinput13 new file mode 100644 index 0000000..93ac25f --- /dev/null +++ b/testdata/testinput13 @@ -0,0 +1,22 @@ +# These DFA tests are for the handling of characters greater than 255 in +# 16-bit or 32-bit, non-UTF mode. + +#forbid_utf +#subject dfa + +/^\x{ffff}+/i + \x{ffff} + +/^\x{ffff}?/i + \x{ffff} + +/^\x{ffff}*/i + \x{ffff} + +/^\x{ffff}{3}/i + \x{ffff}\x{ffff}\x{ffff} + +/^\x{ffff}{0,3}/i + \x{ffff} + +# End of testinput13 diff --git a/testdata/testinput14 b/testdata/testinput14 new file mode 100644 index 0000000..8880b5c --- /dev/null +++ b/testdata/testinput14 @@ -0,0 +1,108 @@ +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. + +#subject dfa + +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + +/X/utf + XX\x{d800} + XX\x{d800}\=offset=3 + XX\x{d800}\=no_utf_check + XX\x{da00} + XX\x{da00}\=no_utf_check + XX\x{dc00} + XX\x{dc00}\=no_utf_check + XX\x{de00} + XX\x{de00}\=no_utf_check + XX\x{dfff} + XX\x{dfff}\=no_utf_check + XX\x{110000} + XX\x{d800}\x{1234} + +/badutf/utf + X\xdf + XX\xef + XXX\xef\x80 + X\xf7 + XX\xf7\x80 + XXX\xf7\x80\x80 + +/shortutf/utf + XX\xdf\=ph + XX\xef\=ph + XX\xef\x80\=ph + \xf7\=ph + \xf7\x80\=ph + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} + +/\x{c1}+\x{e1}/iB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} + +/[^\x{120}]/i + \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} + +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z + +# ---------------------------------------------------- + +# End of testinput14 diff --git a/testdata/testinput15 b/testdata/testinput15 new file mode 100644 index 0000000..081b9e2 --- /dev/null +++ b/testdata/testinput15 @@ -0,0 +1,253 @@ +# These are: +# +# (1) Tests of the match-limiting features. The results are different for +# interpretive or JIT matching, so this test should not be run with JIT. The +# same tests are run using JIT in test 17. + +# (2) Other tests that must not be run with JIT. + +# These tests are first so that they don't inherit a large enough heap frame +# vector from a previous test. + +/(*LIMIT_HEAP=21)\[(a)]{60}/expand + \[a]{60} + +"(*LIMIT_HEAP=21)()((?))()()()()()()()()()()()()()()()()()()()()()()()(())()()()()()()()()()()()()()()()()()()()()()(())()()()()()()()()()()()()()" + xx + +# ----------------------------------------------------------------------- + +/(a+)*zz/I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits_noheap + aaaaaaaaaaaaaz\=find_limits_noheap + +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I + /* this is a C style comment */\=find_limits_noheap + +/^(?>a)++/ + aa\=find_limits_noheap + aaaaaaaaa\=find_limits_noheap + +/(a)(?1)++/ + aa\=find_limits_noheap + aaaaaaaaa\=find_limits_noheap + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits_noheap + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits_noheap + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits_noheap + +/(*LIMIT_MATCH=12bc)abc/ + +/(*LIMIT_MATCH=4294967290)abc/ + +/(*LIMIT_DEPTH=4294967280)abc/I + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +/(a+)*zz/ +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 + +/(*LIMIT_MATCH=3000)(a+)*zz/I +\= Expect limit exceeded + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +\= Expect limit exceeded + aaaaaaaaaaaaaz + +/(*LIMIT_MATCH=60000)(a+)*zz/I +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +/(*LIMIT_DEPTH=10)(a+)*zz/I +\= Expect limit exceeded + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=1000 + +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I +\= Expect no match + aaaaaaaaaaaaaz + +/(*LIMIT_DEPTH=1000)(a+)*zz/I +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 + +# These three have infinitely nested recursions. + +/((?2))((?1))/ + abc + +/((?(R2)a+|(?1)b))()/ + aaaabcde + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + abcxyzpqr\=aftertext + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + xyzpqrabcxyzpqr\=aftertext + +/a\b/ + a.\=allusedtext + a\=allusedtext + +/abc\Kxyz/ + abcxyz\=allusedtext + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + +#subject allusedtext + +/(?<=abc)123/ + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/\babc\b/ + +++abc+++ + +++ab\=ps + +++ab\=ph + +/(?<=abc)def/ + abc\=ph + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph + xxxx123a\=ps + +/(?<=(?<=a)b)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/(?<=ab)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/abc(?<=bc)def/ + xxxabcd\=ph + +/(?<=ab)cdef/ + xxabcd\=ph + +/(?<=(?<=(?<=a)b)c)./I + 123abcXYZ + +/(?<=ab(cd(?<=...)))./I + abcdX + +/(?<=ab((?<=...)cd))./I + ZabcdX + +/(?<=((?<=(?<=ab).))(?1)(?1))./I + abxZ + +#subject +# ------------------------------------------------------------------- + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I + abcd + +/(a|(?R))/I + abcd + defg + +/(ab|(bc|(de|(?R))))/I + abcd + fghi + +/(ab|(bc|(de|(?1))))/I + abcd + fghi + +/x(ab|(bc|(de|(?1)x)x)x)/I + xab123 + xfghi + +/(?!\w)(?R)/ + abcd + =abc + +/(?=\w)(?R)/ + =abc + abcd + +/(?a)++/ + aa\=find_limits + aaaaaaaaa\=find_limits + +/(a)(?1)++/ + aa\=find_limits + aaaaaaaaa\=find_limits + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast + aabbccddee\=find_limits + aabbccddee\=jitstack=1 + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +/(*LIMIT_MATCH=3000)(a+)*zz/I +\= Expect limit exceeded + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +\= Expect limit exceeded + aaaaaaaaaaaaaz + +/(*LIMIT_MATCH=60000)(a+)*zz/I +\= Expect no match + aaaaaaaaaaaaaz +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 + +# These three have infinitely nested recursions. + +/((?2))((?1))/ +\= Expect JIT stack limit reached + abc + +/((?(R2)a+|(?1)b))()/ +\= Expect JIT stack limit reached + aaaabcde + +/(?(R)a*(?1)|((?R))b)/ +\= Expect JIT stack limit reached + aaaabcde + +# Invalid options disable JIT when called via pcre2_match(), causing the +# match to happen via the interpreter, but for fast JIT invalid options are +# ignored, so an unanchored match happens. + +/abcd/ + abcd\=anchored +\= Expect no match + fail abcd\=anchored + +/abcd/jitfast + abcd\=anchored + succeed abcd\=anchored + +# Push/pop does not lose the JIT information, though jitverify applies only to +# compilation, but serializing (save/load) discards JIT data completely. + +/^abc\Kdef/info,push +#pop jitverify + abcdef + +/^abc\Kdef/info,push +#save testsaved1 +#load testsaved1 +#pop jitverify + abcdef + +#load testsaved1 +#pop jit,jitverify + abcdef + +/abcd/pushcopy,jitverify + abcd + +#pop jitverify + abcd + +# Test pattern compilation + +/(?:a|b|c|d|e)(?R)/jit=1 + +/(?:a|b|c|d|e)(?R)(?R)/jit=1 + +/(a(?:a|b|c|d|e)b){8,16}/jit=1 + +/(?:|a|){100}x/jit=1 + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I + abcd + +/(a|(?R))/I + abcd + defg + +/(ab|(bc|(de|(?R))))/I + abcd + fghi + +/(ab|(bc|(de|(?1))))/I + abcd + fghi + +/x(ab|(bc|(de|(?1)x)x)x)/I + xab123 + xfghi + +/(?!\w)(?R)/ + abcd + =abc + +/(?=\w)(?R)/ + =abc + abcd + +/(?b)c/posix_nosub + abc + +/(a)\1/posix_nosub + zaay + +/a?|b?/ + abc +\= Expect no match + ddd\=notempty + +/\w+A/ + CDAAAAB + +/\w+A/ungreedy + CDAAAAB + +/\Biss\B/I,aftertext + Mississippi + +/abc/\ + +"(?(?C)" + +"(?(?C))" + +/abcd/substitute_extended + +/\[A]{1000000}**/expand,regerror_buffsize=31 + +/\[A]{1000000}**/expand,regerror_buffsize=32 + +//posix_nosub + \=offset=70000 + +/^d(e)$/posix + acdef\=posix_startend=2:4 + acde\=posix_startend=2 +\= Expect no match + acdef + acdef\=posix_startend=2 + +/^a\x{00}b$/posix + a\x{00}b\=posix_startend=0:3 + +/"A" 00 "B"/hex + A\x{00}B\=posix_startend=0:3 + +/ABC/use_length + ABC + +/a\b(c/literal,posix + a\\b(c + +/a\b(c/literal,posix,dotall + +/((a)(b)?(c))/posix + 123ace + 123ace\=posix_startend=2:6 + +//posix +\= Expect errors + \=null_subject + abc\=null_subject + +/(*LIMIT_HEAP=0)xx/posix +\= Expect error + xxxx + +# End of testdata/testinput18 diff --git a/testdata/testinput19 b/testdata/testinput19 new file mode 100644 index 0000000..7b2ab91 --- /dev/null +++ b/testdata/testinput19 @@ -0,0 +1,25 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). + +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + +/\w/ +\= Expect no match + +++\x{c2} + +/\w/ucp + +++\x{c2} + +/"^AB" 00 "\x{1234}$"/hex,utf + AB\x{00}\x{1234}\=posix_startend=0:6 + +/\w/utf +\= Expect UTF error + A\xabB + +# End of testdata/testinput19 diff --git a/testdata/testinput2 b/testdata/testinput2 new file mode 100644 index 0000000..b90489a --- /dev/null +++ b/testdata/testinput2 @@ -0,0 +1,6114 @@ +# This set of tests is not Perl-compatible. It checks on special features +# of PCRE2's API, error diagnostics, and the compiled code of some patterns. +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl +# bug. + +# NOTE: This is a non-UTF set of tests. When UTF support is needed, use +# test 5. + +#forbid_utf +#newline_default lf any anycrlf + +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex + a\x{0}b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex + a\x{0}b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex + DE + +/(a)b|/I + +/abc/I + abc + defabc + abc\=anchored +\= Expect no match + defabc\=anchored + ABC + +/^abc/I + abc + abc\=anchored +\= Expect no match + defabc + defabc\=anchored + +/a+bc/I + +/a*bc/I + +/a{3}bc/I + +/(abc|a+z)/I + +/^abc$/I + abc +\= Expect no match + def\nabc + +/ab\idef/ + +/(?X)ab\idef/ + +/x{5,4}/ + +/z{65536}/ + +/[abcd/ + +/[\B]/B + +/[\R]/B + +/[\X]/B + +/[z-a]/ + +/^*/ + +/(abc/ + +/(?# abc/ + +/(?z)abc/ + +/.*b/I + +/.*?b/I + +/cat|dog|elephant/I + this sentence eventually mentions a cat + this sentences rambles on and on for a while and then reaches elephant + +/cat|dog|elephant/I + this sentence eventually mentions a cat + this sentences rambles on and on for a while and then reaches elephant + +/cat|dog|elephant/Ii + this sentence eventually mentions a CAT cat + this sentences rambles on and on for a while to elephant ElePhant + +/a|[bcd]/I + +/(a|[^\dZ])/I + +/(a|b)*[\s]/I + +/(ab\2)/ + +/{4,5}abc/ + +/(a)(b)(c)\2/I + abcb + abcb\=ovector=0 + abcb\=ovector=1 + abcb\=ovector=2 + abcb\=ovector=3 + abcb\=ovector=4 + +/(a)bc|(a)(b)\2/I + abc + abc\=ovector=0 + abc\=ovector=1 + abc\=ovector=2 + aba + aba\=ovector=0 + aba\=ovector=1 + aba\=ovector=2 + aba\=ovector=3 + aba\=ovector=4 + +/abc$/I,dollar_endonly + abc +\= Expect no match + abc\n + abc\ndef + +/(a)(b)(c)(d)(e)\6/ + +/the quick brown fox/I + the quick brown fox + this is a line with the quick brown fox + +/the quick brown fox/I,anchored + the quick brown fox +\= Expect no match + this is a line with the quick brown fox + +/ab(?z)cd/ + +/^abc|def/I + abcdef + abcdef\=notbol + +/.*((abc)$|(def))/I + defabc + defabc\=noteol + +/)/ + +/a[]b/ + +/[^aeiou ]{3,}/I + co-processors, and for + +/<.*>/I + abcghinop + +/<.*?>/I + abcghinop + +/<.*>/I,ungreedy + abcghinop + +/(?U)<.*>/I + abcghinop + +/<.*?>/I,ungreedy + abcghinop + +/={3,}/I,ungreedy + abc========def + +/(?U)={3,}?/I + abc========def + +/(?^abc)/Im + abc + def\nabc +\= Expect no match + defabc + +/(?<=ab(c+)d)ef/ + +/(?<=ab(?<=c+)d)ef/ + +/The next three are in testinput2 because they have variable length branches/ + +/(?<=bullock|donkey)-cart/I + the bullock-cart + a donkey-cart race +\= Expect no match + cart + horse-and-cart + +/(?<=ab(?i)x|y|z)/I + +/(?>.*)(?<=(abcd)|(xyz))/I + alphabetabcd + endingxyz + +/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I + abxyZZ + abXyZZ + ZZZ + zZZ + bZZ + BZZ +\= Expect no match + ZZ + abXYZZ + zzz + bzz + +/(?[^()]+) # Either a sequence of non-brackets (no backtracking) + | # Or + (?R) # Recurse - i.e. nested bracketed string + )* # Zero or more contents + \) # Closing ) + /Ix + (abcd) + (abcd)xyz + xyz(abcd) + (ab(xy)cd)pqr + (ab(xycd)pqr + () abc () + 12(abcde(fsh)xyz(foo(bar))lmno)89 +\= Expect no match + abcd + abcd) + (abcd + +/\( ( (?>[^()]+) | (?R) )* \) /Igx + (ab(xy)cd)pqr + 1(abcd)(x(y)z)pqr + +/\( (?: (?>[^()]+) | (?R) ) \) /Ix + (abcd) + (ab(xy)cd) + (a(b(c)d)e) + ((ab)) +\= Expect no match + () + +/\( (?: (?>[^()]+) | (?R) )? \) /Ix + () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + +/\( ( (?>[^()]+) | (?R) )* \) /Ix + (ab(xy)cd) + +/\( ( ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + +/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + (123ab(xy)cd) + +/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + (123ab(xy)cd) + +/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /Ix + (ab(xy)cd) + +/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /Ix + (abcd(xyz

qrs)123) + +/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /Ix + (ab(cd)ef) + (ab(cd(ef)gh)ij) + +/^[[:alnum:]]/IB + +/^[[:^alnum:]]/IB + +/^[[:alpha:]]/IB + +/^[[:^alpha:]]/IB + +/[_[:alpha:]]/I + +/^[[:ascii:]]/IB + +/^[[:^ascii:]]/IB + +/^[[:blank:]]/IB + +/^[[:^blank:]]/IB + +/[\n\x0b\x0c\x0d[:blank:]]/I + +/^[[:cntrl:]]/IB + +/^[[:digit:]]/IB + +/^[[:graph:]]/IB + +/^[[:lower:]]/IB + +/^[[:print:]]/IB + +/^[[:punct:]]/IB + +/^[[:space:]]/IB + +/^[[:upper:]]/IB + +/^[[:xdigit:]]/IB + +/^[[:word:]]/IB + +/^[[:^cntrl:]]/IB + +/^[12[:^digit:]]/IB + +/^[[:^blank:]]/IB + +/[01[:alpha:]%]/IB + +/[[.ch.]]/I + +/[[=ch=]]/I + +/[[:rhubarb:]]/I + +/[[:upper:]]/Ii + A + a + +/[[:lower:]]/Ii + A + a + +/((?-i)[[:lower:]])[[:lower:]]/Ii + ab + aB +\= Expect no match + Ab + AB + +/[\200-\110]/I + +/^(?(0)f|b)oo/I + +# This one's here because of the large output vector needed + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 + +# This one's here because Perl does this differently and PCRE2 can't at present + +/(main(O)?)+/I + mainmain + mainOmain + +# These are all cases where Perl does it differently (nested captures) + +/^(a(b)?)+$/I + aba + +/^(aa(bb)?)+$/I + aabbaa + +/^(aa|aa(bb))+$/I + aabbaa + +/^(aa(bb)??)+$/I + aabbaa + +/^(?:aa(bb)?)+$/I + aabbaa + +/^(aa(b(b))?)+$/I + aabbaa + +/^(?:aa(b(b))?)+$/I + aabbaa + +/^(?:aa(b(?:b))?)+$/I + aabbaa + +/^(?:aa(bb(?:b))?)+$/I + aabbbaa + +/^(?:aa(b(?:bb))?)+$/I + aabbbaa + +/^(?:aa(?:b(b))?)+$/I + aabbaa + +/^(?:aa(?:b(bb))?)+$/I + aabbbaa + +/^(aa(b(bb))?)+$/I + aabbbaa + +/^(aa(bb(bb))?)+$/I + aabbbbaa + +# ---------------- + +/#/IBx + +/a#/IBx + +/[\s]/IB + +/[\S]/IB + +/a(?i)b/IB + ab + aB +\= Expect no match + AB + +/(a(?i)b)/IB + ab + aB +\= Expect no match + AB + +/ (?i)abc/IBx + +/#this is a comment + (?i)abc/IBx + +/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB + +/\Q123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB + +/\Q\E/IB + \ + +/\Q\Ex/IB + +/ \Q\E/IB + +/a\Q\E/IB + abc + bca + bac + +/a\Q\Eb/IB + abc + +/\Q\Eabc/IB + +/x*+\w/IB +\= Expect no match + xxxxx + +/x?+/IB + +/x++/IB + +/x{1,3}+/B,no_auto_possess + +/x{1,3}+/Bi,no_auto_possess + +/[^x]{1,3}+/B,no_auto_possess + +/[^x]{1,3}+/Bi,no_auto_possess + +/(x)*+/IB + +/^(\w++|\s++)*$/I + now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! + +/(\d++)(\w)/I + 12345a +\= Expect no match + 12345+ + +/a++b/I + aaab + +/(a++b)/I + aaab + +/(a++)b/I + aaab + +/([^()]++|\([^()]*\))+/I + ((abc(ade)ufh()()x + +/\(([^()]++|\([^()]+\))+\)/I + (abc) + (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(abc){1,3}+/IB + +/a+?+/I + +/a{2,3}?+b/I + +/(?U)a+?+/I + +/a{2,3}?+b/I,ungreedy + +/x(?U)a++b/IB + xaaaab + +/(?U)xa++b/IB + xaaaab + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/IB + +/^x(?U)a+b/IB + +/^x(?U)(a+)b/IB + +/[.x.]/I + +/[=x=]/I + +/[:x:]/I + +/\F/I + +/\l/I + +/\L/I + +/\N{name}/I + +/\u/I + +/\U/I + +/\N{4}/ + abcdefg + +/\N{,}/ + +/\N{25,ab}/ + +/a{1,3}b/ungreedy + ab + +/[/I + +/[a-/I + +/[[:space:]/I + +/[\s]/IB + +/[[:space:]]/IB + +/[[:space:]abcde]/IB + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix + <> + + hij> + hij> + def> + +\= Expect no match + iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB + +/\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB + +/(.*)\d+\1/I + +/(.*)\d+/I + +/(.*)\d+\1/Is + +/(.*)\d+/Is + +/(.*(xyz))\d+\2/I + +/((.*))\d+\1/I + abc123bc + +/a[b]/I + +/(?=a).*/I + +/(?=abc).xyz/Ii + +/(?=abc)(?i).xyz/I + +/(?=a)(?=b)/I + +/(?=.)a/I + +/((?=abcda)a)/I + +/((?=abcda)ab)/I + +/()a/I + +/(?:(?=.)|(?abc>([^()]|\((?1)*\))*abc>123abc>1(2)3abc>(1(2)3)]*+) | (?2)) * >))/Ix + <> + + hij> + hij> + def> + +\= Expect no match + b|c)d(?Pe)/IB + abde + acde + +/(?:a(?Pc(?Pd)))(?Pa)/IB + +/(?Pa)...(?P=a)bbb(?P>a)d/IB + +/^\W*(?:(?P(?P.)\W*(?P>one)\W*(?P=two)|)|(?P(?P.)\W*(?P>three)\W*(?P=four)|\W*.\W*))\W*$/Ii + 1221 + Satan, oscillate my metallic sonatas! + A man, a plan, a canal: Panama! + Able was I ere I saw Elba. +\= Expect no match + The quick brown fox + +/((?(R)a|b))\1(?1)?/I + bb + bbaa + +/(.*)a/Is + +/(.*)a\1/Is + +/(.*)a(b)\2/Is + +/((.*)a|(.*)b)z/Is + +/((.*)a|(.*)b)z\1/Is + +/((.*)a|(.*)b)z\2/Is + +/((.*)a|(.*)b)z\3/Is + +/((.*)a|^(.*)b)z\3/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is + +/(a)(bc)/IB,no_auto_capture + abc + +/(?Pa)(bc)/IB,no_auto_capture + abc + +/(a)(?Pbc)/IB,no_auto_capture + +/(aaa(?C1)bbb|ab)/I + aaabbb + aaabbb\=callout_data=0 + aaabbb\=callout_data=1 +\= Expect no match + aaabbb\=callout_data=-1 + +/ab(?Pcd)ef(?Pgh)/I + abcdefgh + abcdefgh\=copy=1,get=two + abcdefgh\=copy=one,copy=two + abcdefgh\=copy=three + +/(?P)(?P)/IB + +/(?P)(?P)/IB + +/(?Pzz)(?Paa)/I + zzaa\=copy=Z + zzaa\=copy=A + +/(?Peks)(?Peccs)/I + +/(?Pabc(?Pdef)(?Pxyz))/I + +"\[((?P\d+)(,(?P>elem))*)\]"I + [10,20,30,5,5,4,4,2,43,23,4234] +\= Expect no match + [] + +"\[((?P\d+)(,(?P>elem))*)?\]"I + [10,20,30,5,5,4,4,2,43,23,4234] + [] + +/(a(b(?2)c))?/IB + +/(a(b(?2)c))*/IB + +/(a(b(?2)c)){0,2}/IB + +/[ab]{1}+/B + +/()(?1){1}/B + +/()(?1)/B + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii + Baby Bjorn Active Carrier - With free SHIPPING!! + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii + Baby Bjorn Active Carrier - With free SHIPPING!! + +/a*.*b/IB + +/(a|b)*.?c/IB + +/abc(?C255)de(?C)f/IB + +/abcde/IB,auto_callout + abcde +\= Expect no match + abcdfe + +/a*b/IB,auto_callout + ab + aaaab + aaaacb + +/a*b/IB,auto_callout + ab + aaaab + aaaacb + +/a+b/IB,auto_callout + ab + aaaab +\= Expect no match + aaaacb + +/(abc|def)x/IB,auto_callout + abcx + defx +\= Expect no match + abcdefzx + +/(abc|def)x/IB,auto_callout + abcx + defx +\= Expect no match + abcdefzx + +/(ab|cd){3,4}/I,auto_callout + ababab + abcdabcd + abcdcdcdcdcd + +/([ab]{,}c|xy)/IB,auto_callout +\= Expect no match + Note: that {,} does NOT introduce a quantifier + +/([ab]{,}c|xy)/IB,auto_callout +\= Expect no match + Note: that {,} does NOT introduce a quantifier + +/([ab]{1,4}c|xy){4,5}?123/IB,auto_callout + aacaacaacaacaac123 + +/\b.*/I + ab cd\=offset=1 + +/\b.*/Is + ab cd\=startoffset=1 + +/(?!.bcd).*/I + Xbcd12345 + +/abcde/I + ab\=ps + abc\=ps + abcd\=ps + abcde\=ps + the quick brown abc\=ps +\= Expect no match\=ps + the quick brown abxyz fox\=ps + +"^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I + 13/05/04\=ps + 13/5/2004\=ps + 02/05/09\=ps + 1\=ps + 1/2\=ps + 1/2/0\=ps + 1/2/04\=ps + 0\=ps + 02/\=ps + 02/0\=ps + 02/1\=ps +\= Expect no match\=ps + \=ps + 123\=ps + 33/4/04\=ps + 3/13/04\=ps + 0/1/2003\=ps + 0/\=ps + 02/0/\=ps + 02/13\=ps + +/0{0,2}ABC/I + +/\d{3,}ABC/I + +/\d*ABC/I + +/[abc]+DE/I + +/[abc]?123/I + 123\=ps + a\=ps + b\=ps + c\=ps + c12\=ps + c123\=ps + +/^(?:\d){3,5}X/I + 1\=ps + 123\=ps + 123X + 1234\=ps + 1234X + 12345\=ps + 12345X +\= Expect no match + 1X + 123456\=ps + +"<(\w+)/?>(.)*"Igms + \n\n\nPartner der LCO\nde\nPartner der LINEAS Consulting\nGmbH\nLINEAS Consulting GmbH Hamburg\nPartnerfirmen\n30 days\nindex,follow\n\nja\n3\nPartner\n\n\nLCO\nLINEAS Consulting\n15.10.2003\n\n\n\n\nDie Partnerfirmen der LINEAS Consulting\nGmbH\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\=jitstack=1024 + +/line\nbreak/I + this is a line\nbreak + line one\nthis is a line\nbreak in the second line + +/line\nbreak/I,firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/line\nbreak/Im,firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/(?i)(?-i)AbCd/I + AbCd +\= Expect no match + abcd + +/a{11111111111111111111}/I + +/(){64294967295}/I + +/(){2,4294967295}/I + +"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I + abcdefghijklAkB + +"(?Pa)(?Pb)(?Pc)(?Pd)(?Pe)(?Pf)(?Pg)(?Ph)(?Pi)(?Pj)(?Pk)(?Pl)A\11B"I + abcdefghijklAkB + +"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"I + abcdefghijklAkB + +"(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)"I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/[^()]*(?:\((?R)\)[^()]*)*/I + (this(and)that + (this(and)that) + (this(and)that)stuff + +/[^()]*(?:\((?>(?R))\)[^()]*)*/I + (this(and)that + (this(and)that) + +/[^()]*(?:\((?R)\))*[^()]*/I + (this(and)that + (this(and)that) + +/(?:\((?R)\))*[^()]*/I + (this(and)that + (this(and)that) + ((this)) + +/(?:\((?R)\))|[^()]*/I + (this(and)that + (this(and)that) + (this) + ((this)) + +/\x{0000ff}/I + +/^((?Pa1)|(?Pa2)b)/I + +/^((?Pa1)|(?Pa2)b)/I,dupnames + a1b\=copy=A + a2b\=copy=A + a1b\=copy=Z,copy=A + +/(?|(?)(?)(?)|(?)(?)(?))/I,dupnames + +/^(?Pa)(?Pb)/I,dupnames + ab\=copy=A + +/^(?Pa)(?Pb)|cd/I,dupnames + ab\=copy=A + cd\=copy=A + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames + cdefgh\=copy=A + +/^((?Pa1)|(?Pa2)b)/I,dupnames + a1b\=get=A + a2b\=get=A + a1b\=get=Z,get=A + +/^(?Pa)(?Pb)/I,dupnames + ab\=get=A + +/^(?Pa)(?Pb)|cd/I,dupnames + ab\=get=A + cd\=get=A + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames + cdefgh\=get=A + +/(?J)^((?Pa1)|(?Pa2)b)/I + a1b\=copy=A + a2b\=copy=A + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I + +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I + a bc d\=copy=A,copy=B,copy=C + +/^(?Pa)?(?(A)a|b)/I + aabc + bc +\= Expect no match + abc + +/(?:(?(ZZ)a|b)(?PX))+/I + bXaX + +/(?:(?(2y)a|b)(X))+/I + +/(?:(?(ZA)a|b)(?PX))+/I + +/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I + bbXaaX + +/(?:(?(ZZ)a|\(b\))\\(?PX))+/I + (b)\\Xa\\X + +/(?PX|Y))+/I + bXXaYYaY + bXYaXXaX + +/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?PX|Y))+/I + bXXaYYaY + +/\s*,\s*/I + \x0b,\x0b + \x0c,\x0d + +/^abc/Im,newline=lf + xyz\nabc + xyz\r\nabc +\= Expect no match + xyz\rabc + xyzabc\r + xyzabc\rpqr + xyzabc\r\n + xyzabc\r\npqr + +/^abc/Im,newline=crlf + xyz\r\nabclf> +\= Expect no match + xyz\nabclf + xyz\rabclf + +/^abc/Im,newline=cr + xyz\rabc +\= Expect no match + xyz\nabc + xyz\r\nabc + +/^abc/Im,newline=bad + +/.*/I,newline=lf + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=cr + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=crlf + abc\ndef + abc\rdef + abc\r\ndef + +/\w+(.)(.)?def/Is + abc\ndef + abc\rdef + abc\r\ndef + +/(?P25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I + +/()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + (.(.))/Ix + XY\=ovector=133 + +/(a*b|(?i:c*(?-i)d))/I + +/()[ab]xyz/I + +/(|)[ab]xyz/I + +/(|c)[ab]xyz/I + +/(|c?)[ab]xyz/I + +/(d?|c?)[ab]xyz/I + +/(d?|c)[ab]xyz/I + +/^a*b\d/IB + +/^a*+b\d/IB + +/^a*?b\d/IB + +/^a+A\d/IB + aaaA5 +\= Expect no match + aaaa5 + +/^a*A\d/IBi + aaaA5 + aaaa5 + a5 + +/(a*|b*)[cd]/I + +/(a+|b*)[cd]/I + +/(a*|b+)[cd]/I + +/(a+|b+)[cd]/I + +/(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + ((( + a + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + ))) +/Ix + large nest + +/a*\d/B + +/a*\D/B + +/0*\d/B + +/0*\D/B + +/a*\s/B + +/a*\S/B + +/ *\s/B + +/ *\S/B + +/a*\w/B + +/a*\W/B + +/=*\w/B + +/=*\W/B + +/\d*a/B + +/\d*2/B + +/\d*\d/B + +/\d*\D/B + +/\d*\s/B + +/\d*\S/B + +/\d*\w/B + +/\d*\W/B + +/\D*a/B + +/\D*2/B + +/\D*\d/B + +/\D*\D/B + +/\D*\s/B + +/\D*\S/B + +/\D*\w/B + +/\D*\W/B + +/\s*a/B + +/\s*2/B + +/\s*\d/B + +/\s*\D/B + +/\s*\s/B + +/\s*\S/B + +/\s*\w/B + +/\s*\W/B + +/\S*a/B + +/\S*2/B + +/\S*\d/B + +/\S*\D/B + +/\S*\s/B + +/\S*\S/B + +/\S*\w/B + +/\S*\W/B + +/\w*a/B + +/\w*2/B + +/\w*\d/B + +/\w*\D/B + +/\w*\s/B + +/\w*\S/B + +/\w*\w/B + +/\w*\W/B + +/\W*a/B + +/\W*2/B + +/\W*\d/B + +/\W*\D/B + +/\W*\s/B + +/\W*\S/B + +/\W*\w/B + +/\W*\W/B + +/[^a]+a/B + +/[^a]+a/Bi + +/[^a]+A/Bi + +/[^a]+b/B + +/[^a]+\d/B + +/a*[^a]/B + +/(?Px)(?Py)/I + xy\=copy=abc,copy=xyz + +/(?x)(?'xyz'y)/I + xy\=copy=abc,copy=xyz + +/(?x)(?'xyz>y)/I + +/(?P'abc'x)(?Py)/I + +/^(?:(?(ZZ)a|b)(?X))+/ + bXaX + bXbX +\= Expect no match + aXaX + aXbX + +/^(?P>abc)(?xxx)/ + +/^(?P>abc)(?x|y)/ + xx + xy + yy + yx + +/^(?P>abc)(?Px|y)/ + xx + xy + yy + yx + +/^((?(abc)a|b)(?x|y))+/ + bxay + bxby +\= Expect no match + axby + +/^(((?P=abc)|X)(?x|y))+/ + XxXxxx + XxXyyx + XxXyxx +\= Expect no match + x + +/^(?1)(abc)/ + abcabc + +/^(?:(?:\1|X)(a|b))+/ + Xaaa + Xaba + +/^[\E\Qa\E-\Qz\E]+/B + +/^[a\Q]bc\E]/B + +/^[a-\Q\E]/B + +/^(?P>abc)[()](?)/B + +/^((?(abc)y)[()](?Px))+/B + (xy)x + +/^(?P>abc)\Q()\E(?)/B + +/^(?P>abc)[a\Q(]\E(](?)/B + +/^(?P>abc) # this is (a comment) + (?)/Bx + +/^\W*(?:(?(?.)\W*(?&one)\W*\k|)|(?(?.)\W*(?&three)\W*\k'four'|\W*.\W*))\W*$/Ii + 1221 + Satan, oscillate my metallic sonatas! + A man, a plan, a canal: Panama! + Able was I ere I saw Elba. +\= Expect no match + The quick brown fox + +/(?=(\w+))\1:/I + abcd: + +/(?=(?'abc'\w+))\k:/I + abcd: + +/(?'abc'a|b)(?d|e)\k{2}/dupnames + adaa +\= Expect no match + addd + adbb + +/(?'abc'a|b)(?d|e)(?&abc){2}/dupnames + bdaa + bdab +\= Expect no match + bddd + +/(?( (?'B' abc (?(R) (?(R&A)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + abcabc1Xabc2XabcXabcabc + +/(? (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + +/^(?(DEFINE) abc | xyz ) /x + +/(?(DEFINE) abc) xyz/Ix + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 + +/^a.b/newline=lf + a\rb +\= Expect no match + a\nb + +/^a.b/newline=cr + a\nb +\= Expect no match + a\rb + +/^a.b/newline=anycrlf + a\x85b +\= Expect no match + a\rb + +/^a.b/newline=any +\= Expect no match + a\nb + a\rb + a\x85b + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9 + +/^a\Rb/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode + a\nb + a\n\rb + a\n\r\x85b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/(?&abc)X(?P)/I + abcPXP123 + +/(?1)X(?P)/I + abcPXP123 + +/(?:a(?&abc)b)*(?x)/ + 123axbaxbaxbx456 + 123axbaxbaxb456 + +/(?:a(?&abc)b){1,5}(?x)/ + 123axbaxbaxbx456 + +/(?:a(?&abc)b){2,5}(?x)/ + 123axbaxbaxbx456 + +/(?:a(?&abc)b){2,}(?x)/ + 123axbaxbaxbx456 + +/(abc)(?i:(?1))/ + defabcabcxyz +\= Expect no match + DEFabcABCXYZ + +/(abc)(?:(?i)(?1))/ + defabcabcxyz +\= Expect no match + DEFabcABCXYZ + +/^(a)\g-2/ + +/^(a)\g/ + +/^(a)\g{0}/ + +/^(a)\g{3/ + +/^(a)\g{aa}/ + +/^a.b/newline=lf + a\rb +\= Expect no match + a\nb + +/.+foo/ + afoo +\= Expect no match + \r\nfoo + \nfoo + +/.+foo/newline=crlf + afoo + \nfoo +\= Expect no match + \r\nfoo + +/.+foo/newline=any + afoo +\= Expect no match + \nfoo + \r\nfoo + +/.+foo/s + afoo + \r\nfoo + \nfoo + +/^$/gm,newline=any + abc\r\rxyz + abc\n\rxyz +\= Expect no match + abc\r\nxyz + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + +/abc.$/gmx,newline=anycrlf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + +/^X/m + XABC +\= Expect no match + XABC\=notbol + +/(ab|c)(?-1)/B + abc + +/xy(?+1)(abc)/B + xyabcabc +\= Expect no match + xyabc + +/x(?-0)y/ + +/x(?-1)y/ + +/x(?+0)y/ + +/x(?+1)y/ + +/^(abc)?(?(-1)X|Y)/B + abcX + Y +\= Expect no match + abcY + +/^((?(+1)X|Y)(abc))+/B + YabcXabc + YabcXabcXabc +\= Expect no match + XabcXabc + +/(?(-1)a)/B + +/((?(-1)a))/B + +/((?(-2)a))/B + +/^(?(+1)X|Y)(.)/B + Y! + +/(?tom|bon)-\k{A}/ + tom-tom + bon-bon +\= Expect no match + tom-bon + +/\g{A/ + +/(?|(abc)|(xyz))/B + >abc< + >xyz< + +/(x)(?|(abc)|(xyz))(x)/B + xabcx + xxyzx + +/(x)(?|(abc)(pqr)|(xyz))(x)/B + xabcpqrx + xxyzx + +/\H++X/B +\= Expect no match + XXXX + +/\H+\hY/B + XXXX Y + +/\H+ Y/B + +/\h+A/B + +/\v*B/B + +/\V+\x0a/B + +/A+\h/B + +/ *\H/B + +/A*\v/B + +/\x0b*\V/B + +/\d+\h/B + +/\d*\v/B + +/S+\h\S+\v/B + +/\w{3,}\h\w+\v/B + +/\h+\d\h+\w\h+\S\h+\H/B + +/\v+\d\v+\w\v+\S\v+\V/B + +/\H+\h\H+\d/B + +/\V+\v\V+\w/B + +/\( (?: [^()]* | (?R) )* \)/x +(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)\=jitstack=1024 + +/[\E]AAA/ + +/[\Q\E]AAA/ + +/[^\E]AAA/ + +/[^\Q\E]AAA/ + +/[\E^]AAA/ + +/[\Q\E^]AAA/ + +/A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + +/^a+(*FAIL)/auto_callout +\= Expect no match + aaaaaa + +/a+b?c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match + aaabcccaaabccc + +/a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a(*MARK)b/ + +/\g6666666666/ + +/[\g6666666666]/B + +/(?1)\c[/ + +/.+A/newline=crlf +\= Expect no match + \r\nA + +/\nA/newline=crlf + \r\nA + +/[\r\n]A/newline=crlf + \r\nA + +/(\r|\n)A/newline=crlf + \r\nA + +/a(*CR)b/ + +/(*CR)a.b/ + a\nb +\= Expect no match + a\rb + +/(*CR)a.b/newline=lf + a\nb +\= Expect no match + a\rb + +/(*LF)a.b/newline=CRLF + a\rb +\= Expect no match + a\nb + +/(*CRLF)a.b/ + a\rb + a\nb +\= Expect no match + a\r\nb + +/(*ANYCRLF)a.b/newline=CR +\= Expect no match + a\rb + a\nb + a\r\nb + +/(*ANY)a.b/newline=cr +\= Expect no match + a\rb + a\nb + a\r\nb + a\x85b + +/(*ANY).*/g + abc\r\ndef + +/(*ANYCRLF).*/g + abc\r\ndef + +/(*CRLF).*/g + abc\r\ndef + +/(*NUL)^.*/ + a\nb\x00ccc + +/(*NUL)^.*/s + a\nb\x00ccc + +/^x/m,newline=NUL + ab\x00xy + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + +/(*NUL)^X\NY/ + X\nY + X\rY +\= Expect no match + X\x00Y + +/a\Rb/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\Rb/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R?b/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\R?b/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf + a\r\n\nb + a\n\r\rb + a\r\n\r\n\r\n\r\nb +\= Expect no match + a\x85\x85b + a\x0b\x0bb + +/a\R{2,4}b/I,bsr=unicode + a\r\rb + a\n\n\nb + a\r\n\n\r\rb + a\x85\x85b + a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb + +/(*BSR_ANYCRLF)a\Rb/I + a\nb + a\rb + +/(*BSR_UNICODE)a\Rb/I + a\x85b + +/(*BSR_ANYCRLF)(*CRLF)a\Rb/I + a\nb + a\rb + +/(*CRLF)(*BSR_UNICODE)a\Rb/I + a\x85b + +/(*CRLF)(*BSR_ANYCRLF)(*CR)ab/I + +/(?)(?&)/ + +/(?)(?&a)/ + +/(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ + +/(?+-a)/ + +/(?-+a)/ + +/(?(-1))/ + +/(?(+10))/ + +/(?(10))/ + +/(?(+2))()()/ + +/(?(2))()()/ + +/\k''/ + +/\k<>/ + +/\k{}/ + +/\k/ + +/\kabc/ + +/(?P=)/ + +/(?P>)/ + +/[[:foo:]]/ + +/[[:1234:]]/ + +/[[:f\oo:]]/ + +/[[: :]]/ + +/[[:...:]]/ + +/[[:l\ower:]]/ + +/[[:abc\:]]/ + +/[abc[:x\]pqr:]]/ + +/[[:a\dz:]]/ + +/(^(a|b\g<-1'c))/ + +/^(?+1)(?x|y){0}z/ + xzxx + yzyy +\= Expect no match + xxz + +/(\3)(\1)(a)/ +\= Expect no match + cat + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + +/TA]/ + The ACTA] comes + +/TA]/allow_empty_class,match_unset_backref,dupnames + The ACTA] comes + +/(?2)[]a()b](abc)/ + abcbabc + +/(?2)[^]a()b](abc)/ + abcbabc + +/(?1)[]a()b](abc)/ + abcbabc +\= Expect no match + abcXabc + +/(?1)[^]a()b](abc)/ + abcXabc +\= Expect no match + abcbabc + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + +/(?&N)[]a(?)](?abc)/ + abc)](abc)/ + abcY)/ + XYabcdY + +/Xa{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\d{2,4}b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}?b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}+b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\D{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[^a]{2,4}b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}?b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}+b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/(Y)X\1{2,4}b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}?b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}+b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/\++\KZ|\d+X|9+Y/startchar + ++++123999\=ps + ++++123999Y\=ps + ++++Z1234\=ps + +/Z(*F)/ +\= Expect no match + Z\=ps + ZA\=ps + +/Z(?!)/ +\= Expect no match + Z\=ps + ZA\=ps + +/dog(sbody)?/ + dogs\=ps + dogs\=ph + +/dog(sbody)??/ + dogs\=ps + dogs\=ph + +/dog|dogsbody/ + dogs\=ps + dogs\=ph + +/dogsbody|dog/ + dogs\=ps + dogs\=ph + +/\bthe cat\b/ + the cat\=ps + the cat\=ph + +/abc/ + abc\=ps + abc\=ph + +/abc\K123/startchar + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/(?<=abc)123/ + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/\babc\b/ + +++abc+++ + +++ab\=ps + +++ab\=ph + +/(?&word)(?&element)(?(DEFINE)(?<[^m][^>]>[^<])(?\w*+))/B + +/(?&word)(?&element)(?(DEFINE)(?<[^\d][^>]>[^<])(?\w*+))/B + +/(ab)(x(y)z(cd(*ACCEPT)))pq/B + +/abc\K/aftertext,startchar + abcdef + abcdef\=notempty_atstart + xyzabcdef\=notempty_atstart +\= Expect no match + abcdef\=notempty + xyzabcdef\=notempty + +/^(?:(?=abc)|abc\K)/aftertext,startchar + abcdef + abcdef\=notempty_atstart +\= Expect no match + abcdef\=notempty + +/a?b?/aftertext + xyz + xyzabc + xyzabc\=notempty + xyzabc\=notempty_atstart + xyz\=notempty_atstart +\= Expect no match + xyz\=notempty + +/^a?b?/aftertext + xyz + xyzabc +\= Expect no match + xyzabc\=notempty + xyzabc\=notempty_atstart + xyz\=notempty_atstart + xyz\=notempty + +/^(?a|b\gc)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(?a|b\g'name'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g<1>c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g'1'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g'-1'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/(^(a|b\g<-1>c))/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/(?-i:\g)(?i:(?a))/ + XaaX + XAAX + +/(?i:\g)(?-i:(?a))/ + XaaX +\= Expect no match + XAAX + +/(?-i:\g<+1>)(?i:(a))/ + XaaX + XAAX + +/(?=(?(?#simplesyntax)\$(?[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*)(?:\[(?[a-zA-Z0-9_\x{7f}-\x{ff}]+|\$\g)\]|->\g(\(.*?\))?)?|(?#simple syntax withbraces)\$\{(?:\g(?\[(?:\g|'(?:\\.|[^'\\])*'|"(?:\g|\\.|[^"\\])*")\])?|\g|\$\{\g\})\}|(?#complexsyntax)\{(?\$(?\g(\g*|\(.*?\))?)(?:->\g)*|\$\g|\$\{\g\})\}))\{/ + +/(?a|b|c)\g*/ + abc + accccbbb + +/^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/ + XYabcdY + +/(?<=b(?1)|zzz)(a)/ + xbaax + xzzzax + +/(a)(?<=b\1)/ + +/(a)(?<=b+(?1))/ + +/(a+)(?<=b(?1))/ + +/(a(?<=b(?1)))/ + +/(?<=b(?1))xyz/ + +/(?<=b(?1))xyz(b+)pqrstuvew/ + +/(a|bc)\1/I + +/(a|bc)\1{2,3}/I + +/(a|bc)(?1)/I + +/(a|b\1)(a|b\1)/I + +/(a|b\1){2}/I + +/(a|bbbb\1)(a|bbbb\1)/I + +/(a|bbbb\1){2}/I + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/Iis + +"(?>.*/)foo"I + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /Ix + +/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii + +/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I + +/A)|(?
B))/I + AB\=copy=a + BA\=copy=a + +/(?|(?A)|(?B))/ + +/(?:a(? (?')|(?")) | + b(? (?')|(?")) ) + (?('quote')[a-z]+|[0-9]+)/Ix,dupnames + a"aaaaa + b"aaaaa +\= Expect no match + b"11111 + a"11111 + +/^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames + abcdX + eX +\= Expect no match + abcdY + ey + +/(?a) (b)(c) (?d (?(R&A)$ | (?4)) )/IBx,dupnames + abcdd +\= Expect no match + abcdde + +/abcd*/ + xxxxabcd\=ps + xxxxabcd\=ph + +/abcd*/i + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/ + xxxxabc1\=ps + xxxxabc1\=ph + +/(a)bc\1*/ + xxxxabca\=ps + xxxxabca\=ph + +/abc[de]*/ + xxxxabcde\=ps + xxxxabcde\=ph + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + +/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames + cat + +/(\3)(\1)(a)/I +\= Expect no match + cat + +/i(?(DEFINE)(?a))/I + i + +/()i(?(1)a)/I + ia + +/(?i)a(?-i)b|c/B + XabX + XAbX + CcC +\= Expect no match + XABX + +/(?i)a(?s)b|c/B + +/(?i)a(?s-i)b|c/B + +/^(ab(c\1)d|x){2}$/B + xabcxd + +/^(?&t)*+(?(DEFINE)(?.))$/B + +/^(?&t)*(?(DEFINE)(?.))$/B + +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. + +/(?>a\Kb)z|(ab)/ + ab\=startchar + +/(?P(?P0|)|(?P>L2)(?P>L1))/ + abcd + 0abc + +/abc(*MARK:)pqr/ + +/abc(*:)pqr/ + +/(*COMMIT:X)/B + +# This should, and does, fail. In Perl, it does not, which I think is a +# bug because replacing the B in the pattern by (B|D) does make it fail. +# Turning off Perl's optimization by inserting (??{""}) also makes it fail. + +/A(*COMMIT)B/aftertext,mark +\= Expect no match + ACABX + +# These should be different, but in Perl they are not, which I think +# is a bug in Perl. + +/A(*THEN)B|A(*THEN)C/mark + AC + +/A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match + AC + +# Mark names can be duplicated. Perl doesn't give a mark for this one, +# though PCRE2 does. + +/^A(*:A)B|^X(*:A)Y/mark +\= Expect no match + XAQQ + +# COMMIT at the start of a pattern should be the same as an anchor. Perl +# optimizations defeat this. So does the PCRE2 optimization unless we disable +# it. + +/(*COMMIT)ABC/ + ABCDEFG + +/(*COMMIT)ABC/no_start_optimize +\= Expect no match + DEFGABC + +/^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match + abcccd + +/^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match + abcccd + +/^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match + abcccd + +# Perl gets some of these wrong + +/(?>.(*ACCEPT))*?5/ + abcde + +/(.(*ACCEPT))*?5/ + abcde + +/(.(*ACCEPT))5/ + abcde + +/(.(*ACCEPT))*5/ + abcde + +/A\NB./B + ACBD +\= Expect no match + A\nB + ACB\n + +/A\NB./Bs + ACBD + ACB\n +\= Expect no match + A\nB + +/A\NB/newline=crlf + A\nB + A\rB +\= Expect no match + A\r\nB + +/\R+b/B + +/\R+\n/B + +/\R+\d/B + +/\d*\R/B + +/\s*\R/B + \x20\x0a + \x20\x0d + \x20\x0d\x0a + +/\S*\R/B + a\x0a + +/X\h*\R/B + X\x20\x0a + +/X\H*\R/B + X\x0d\x0a + +/X\H+\R/B + X\x0d\x0a + +/X\H++\R/B +\= Expect no match + X\x0d\x0a + +/(?<=abc)def/ + abc\=ph + +/abc$/ + abc + abc\=ps + abc\=ph + +/abc$/m + abc + abc\n + abc\=ph + abc\n\=ph + abc\=ps + abc\n\=ps + +/abc\z/ + abc + abc\=ps + abc\=ph + +/abc\Z/ + abc + abc\=ps + abc\=ph + +/abc\b/ + abc + abc\=ps + abc\=ph + +/abc\B/ + abc\=ps + abc\=ph +\= Expect no match + abc + +/.+/ +\= Bad offsets + abc\=offset=4 + abc\=offset=-4 +\= Valid data + abc\=offset=0 + abc\=offset=1 + abc\=offset=2 +\= Expect no match + abc\=offset=3 + +/^\cÄ£/ + +/(?P(?P=abn)xxx)/B + +/(a\1z)/B + +/(?P(?P=abn)(?(?P=axn)xxx)/B + +/(?P(?P=axn)xxx)(?yy)/B + +# These tests are here because Perl gets the first one wrong. + +/(\R*)(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/(\R)*(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +# ------------- + +/^abc$/B + +/^abc$/Bm + +/^(a)*+(\w)/ + aaaaX +\= Expect no match + aaaa + +/^(?:a)*+(\w)/ + aaaaX +\= Expect no match + aaaa + +/(a)++1234/IB + +/([abc])++1234/I + +/(?<=(abc)+)X/ + +/(^ab)/I + +/(^ab)++/I + +/(^ab|^)+/I + +/(^ab|^)++/I + +/(?:^ab)/I + +/(?:^ab)++/I + +/(?:^ab|^)+/I + +/(?:^ab|^)++/I + +/(.*ab)/I + +/(.*ab)++/I + +/(.*ab|.*)+/I + +/(.*ab|.*)++/I + +/(?:.*ab)/I + +/(?:.*ab)++/I + +/(?:.*ab|.*)+/I + +/(?:.*ab|.*)++/I + +/(?=a)[bcd]/I + +/((?=a))[bcd]/I + +/((?=a))+[bcd]/I + +/((?=a))++[bcd]/I + +/(?=a+)[bcd]/Ii + +/(?=a+?)[bcd]/Ii + +/(?=a++)[bcd]/Ii + +/(?=a{3})[bcd]/Ii + +/(abc)\1+/ + +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) + +/(?1)(?:(b(*ACCEPT))){0}/ + b + +/(?1)(?:(b(*ACCEPT))){0}c/ + bc +\= Expect no match + b + +/(?1)(?:((*ACCEPT))){0}c/ + c + c\=notempty + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba + +/^.*?(?(?=a)a|bc)/ + ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac + +/^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match + ac + +/^.*?(a(*THEN)b)c/ +\= Expect no match + aabc + +/^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x + aabc + +/^.*?(a(*THEN)b|z)c/ + aabc + +/^.*?(z|a(*THEN)b)c/ + aabc + +# These are here because they are not Perl-compatible; the studying means the +# mark is not seen. + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C +\= Expect no match + D + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC + +# ---------------------------- + +"(?=a*(*ACCEPT)b)c" + c + c\=notempty + +/(?1)c(?(DEFINE)((*ACCEPT)b))/ + c + c\=notempty + +/(?>(*ACCEPT)b)c/ + c +\= Expect no match + c\=notempty + +/(?:(?>(a)))+a%/allaftertext + %aa% + +/(a)b|ac/allaftertext + ac\=ovector=1 + +/(a)(b)x|abc/allaftertext + abc\=ovector=2 + +/(a)bc|(a)(b)\2/ + abc\=ovector=1 + abc\=ovector=2 + aba\=ovector=1 + aba\=ovector=2 + aba\=ovector=3 + aba\=ovector=4 + +/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I + +/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I + +/(a(?2)|b)(b(?1)|a)(?1)(?2)/I + +/(abc)(?1)/I + +/(?:(foo)|(bar)|(baz))X/allcaptures + bazfooX + foobazbarX + barfooX + bazX + foobarbazX + bazfooX\=ovector=0 + bazfooX\=ovector=1 + bazfooX\=ovector=2 + bazfooX\=ovector=3 + +/(?=abc){3}abc/B + +/(?=abc)+abc/B + +/(?=abc)++abc/B + +/(?=abc){0}xyz/B + +/(?=(a))?./B + +/(?=(a))??./B + +/^(?=(a)){0}b(?1)/B + +/(?(DEFINE)(a))?b(?1)/B + +/^(?=(?1))?[az]([abc])d/B + +/^(?!a){0}\w+/B + +/(?<=(abc))?xyz/B + +/[:a[:abc]b:]/B + +/^(a(*:A)(d|e(*:B))z|aeq)/auto_callout + adz + aez + aeqwerty + +/.(*F)/ +\= Expect no match + abc\=ph + +/\btype\b\W*?\btext\b\W*?\bjavascript\b/I + +/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|a+)(?>(z+))\w/B + aaaazzzzb +\= Expect no match + aazz + +/(.)(\1|a(?2))/ + bab + +/\1|(.)(?R)\1/ + cbbbc + +/(.)((?(1)c|a)|a(?2))/ +\= Expect no match + baa + +/(?P(?P=abn)xxx)/B + +/(a\1z)/B + +/^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz +\= Expect no match + ax41z + +/^a[m\x41]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + +/^a\x1z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + ax1z + +/^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz +\= Expect no match + au0041z + +/^a[m\u0041]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + +/^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + au041z +\= Expect no match + aAz + +/^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aU0041z +\= Expect no match + aAz + +/^\u{7a}/alt_bsux + u{7a} +\= Expect no match + zoo + +/^\u{7a}/extra_alt_bsux + zoo + +/\u{}/extra_alt_bsux + u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + +/(?(?=c)c|d)++Y/B + +/(?(?=c)c|d)*+Y/B + +/a[\NB]c/ + aNc + +/a[B-\Nc]/ + +/a[B\Nc]/ + +/(a)(?2){0,1999}?(b)/ + +/(a)(?(DEFINE)(b))(?2){0,1999}?(?2)/ + +# This test, with something more complicated than individual letters, causes +# different behaviour in Perl. Perhaps it disables some optimization; no tag is +# passed back for the failures, whereas in PCRE2 there is a tag. + +/(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark + AABC + XXYZ +\= Expect no match + XAQQ + XAQQXZZ + AXQQQ + AXXQQQ + +# Perl doesn't give marks for these, though it does if the alternatives are +# replaced by single letters. + +/(b|q)(*:m)f|a(*:n)w/mark + aw +\= Expect no match + abc + +/(q|b)(*:m)f|a(*:n)w/mark + aw +\= Expect no match + abc + +# After a partial match, the behaviour is as for a failure. + +/^a(*:X)bcde/mark + abc\=ps + +# These are here because Perl doesn't return a mark, except for the first. + +/(?=(*:x))(q|)/aftertext,mark + abc + +/(?=(*:x))((*:y)q|)/aftertext,mark + abc + +/(?=(*:x))(?:(*:y)q|)/aftertext,mark + abc + +/(?=(*:x))(?>(*:y)q|)/aftertext,mark + abc + +/(?=a(*:x))(?!a(*:y)c)/aftertext,mark + ab + +/(?=a(*:x))(?=a(*:y)c|)/aftertext,mark + ab + +/(..)\1/ + ab\=ps + aba\=ps + abab\=ps + +/(..)\1/i + ab\=ps + abA\=ps + aBAb\=ps + +/(..)\1{2,}/ + ab\=ps + aba\=ps + abab\=ps + ababa\=ps + ababab\=ps + ababab\=ph + abababa\=ps + abababa\=ph + +/(..)\1{2,}/i + ab\=ps + aBa\=ps + aBAb\=ps + AbaBA\=ps + abABAb\=ps + aBAbaB\=ph + abABabA\=ps + abaBABa\=ph + +/(..)\1{2,}?x/i + ab\=ps + abA\=ps + aBAb\=ps + abaBA\=ps + abAbaB\=ps + abaBabA\=ps + abAbABaBx\=ps + +/^(..)\1/ + aba\=ps + +/^(..)\1{2,3}x/ + aba\=ps + ababa\=ps + ababa\=ph + abababx + ababababx + +/^(..)\1{2,3}?x/ + aba\=ps + ababa\=ps + ababa\=ph + abababx + ababababx + +/^(..)(\1{2,3})ab/ + abababab + +/^\R/ + \r\=ps + \r\=ph + +/^\R{2,3}x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R{2,3}?x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R?x/ + \r\=ps + \r\=ph + x + \rx + +/^\R+x/ + \r\=ps + \r\=ph + \r\n\=ps + \r\n\=ph + \rx + +/^a$/newline=crlf + a\r\=ps + a\r\=ph + +/^a$/m,newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + a\r\=ph + +/./newline=crlf + \r\=ps + \r\=ph + +/.{2,3}/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +"AB(C(D))(E(F))?(?(?=\2)(?=\4))" + ABCDGHI\=ovector=01 + +# These are all run as real matches in test 1; here we are just checking the +# settings of the anchored and startline bits. + +/(?>.*?a)(?<=ba)/I + +/(?:.*?a)(?<=ba)/I + +/.*?a(*PRUNE)b/I + +/.*?a(*PRUNE)b/Is + +/^a(*PRUNE)b/Is + +/.*?a(*SKIP)b/I + +/(?>.*?a)b/Is + +/(?>.*?a)b/I + +/(?>^a)b/Is + +/(?>.*?)(?<=(abcd)|(wxyz))/I + +/(?>.*)(?<=(abcd)|(wxyz))/I + +"(?>.*)foo"I + +"(?>.*?)foo"I + +/(?>^abc)/Im + +/(?>.*abc)/Im + +/(?:.*abc)/Im + +/(?:(a)+(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(a)++(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(?>(a))(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(?1)(?C1)x|ab(?C2))((a)){0}/ + aab\=callout_capture + +/(?1)(?C1)((a)(?C2)){0}/ + aab\=callout_capture + +/(?:(a)+(?C1)bb|aa(?C2)b)++/ + aab\=callout_capture + aab\=callout_capture,ovector=1 + +/(ab)x|ab/ + ab\=ovector=0 + ab\=ovector=1 + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph + xxxx123a\=ps + +/123\Kabc/startchar + xxxx123a\=ph + xxxx123a\=ps + +/^(?(?=a)aa|bb)/auto_callout + bb + +/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/ + bb + +# Perl seems to have a bug with this one. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + aaaaaac + +# Here are some that Perl treats differently because of the way it handles +# backtracking verbs. + +/(?!a(*COMMIT)b)ac|ad/ + ac + ad + +/^(?!a(*THEN)b|ac)../ + ad +\= Expect no match + ac + +/^(?=a(*THEN)b|ac)/ + ac + +/\A.*?(?:a|b(*THEN)c)/ + ba + +/\A.*?(?:a|b(*THEN)c)++/ + ba + +/\A.*?(?:a|b(*THEN)c|d)/ + ba + +/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ + aac + +/\A.*?(a|b(*THEN)c)/ + ba + +/^(A(*THEN)B|A(*THEN)D)/ + AD + +/(?!b(*THEN)a)bn|bnn/ + bnn + +/(?(?=b(*SKIP)a)bn|bnn)/ + bnn + +/(?=b(*THEN)a|)bn|bnn/ + bnn + +# This test causes a segfault with Perl 5.18.0 + +/^(?=(a)){0}b(?1)/ + backgammon + +/(?|(?f)|(?b))/I,dupnames + +/(?abc)(?z)\k()/IB,dupnames + +/a*[bcd]/B + +/[bcd]*a/B + +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). + +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx + +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx + +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx + +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx + +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx + +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx + +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx + +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx + +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx + +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx + +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx + +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx + +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx + +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx + +/(?=a+)a(a+)++a/B + +/a+(bb|cc)a+(?:bb|cc)a+(?>bb|cc)a+(?:bb|cc)+a+(aa)a+(?:bb|aa)/B + +/a+(bb|cc)?#a+(?:bb|cc)??#a+(?:bb|cc)?+#a+(?:bb|cc)*#a+(bb|cc)?a#a+(?:aa)?/B + +/a+(?:bb)?a#a+(?:|||)#a+(?:|b)a#a+(?:|||)?a/B + +/[ab]*/B + aaaa + +/[ab]*?/B + aaaa + +/[ab]?/B + aaaa + +/[ab]??/B + aaaa + +/[ab]+/B + aaaa + +/[ab]+?/B + aaaa + +/[ab]{2,3}/B + aaaa + +/[ab]{2,3}?/B + aaaa + +/[ab]{2,}/B + aaaa + +/[ab]{2,}?/B + aaaa + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B + +/[a-d]{5,12}[e-z0-9]*#[^a-z]+[b-y]*a[2-7]?[^0-9a-z]+/B + +/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/B + +/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/B + +/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/B + +/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/B + +/[a-c]*d/IB + +/[a-c]+d/IB + +/[a-c]?d/IB + +/[a-c]{4,6}d/IB + +/[a-c]{0,6}d/IB + +# End of special auto-possessive tests + +/^A\o{1239}B/ + A\123B + +/^A\oB/ + +/^A\x{zz}B/ + +/^A\x{12Z/ + +/^A\x{/ + +/[ab]++/B,no_auto_possess + +/[^ab]*+/B,no_auto_possess + +/a{4}+/B,no_auto_possess + +/a{4}+/Bi,no_auto_possess + +/[a-[:digit:]]+/ + +/[A-[:digit:]]+/ + +/[a-[.xxx.]]+/ + +/[a-[=xxx=]]+/ + +/[a-[!xxx!]]+/ + +/[A-[!xxx!]]+/ + A]]] + +/[a-\d]+/ + +/(?<0abc>xx)/ + +/(?&1abc)xx(?<1abc>y)/ + +/(?xx)/ + +/(?'0abc'xx)/ + +/(?P<0abc>xx)/ + +/\k<5ghj>/ + +/\k'5ghj'/ + +/\k{2fgh}/ + +/(?P=8yuki)/ + +/\g{4df}/ + +/(?&1abc)xx(?<1abc>y)/ + +/(?P>1abc)xx(?<1abc>y)/ + +/\g'3gh'/ + +/\g<5fg>/ + +/(?(<4gh>)abc)/ + +/(?('4gh')abc)/ + +/(?(4gh)abc)/ + +/(?(R&6yh)abc)/ + +/(((a\2)|(a*)\g<-1>))*a?/B + +# Test the ugly "start or end of word" compatibility syntax. + +/[[:<:]]red[[:>:]]/B + little red riding hood + a /red/ thing + red is a colour + put it all on red +\= Expect no match + no reduction + Alfred Winifred + +/[a[:<:]] should give error/ + +/(?=ab\K)/aftertext,allow_lookaround_bsk + abcd\=startchar + +/abcd/newline=lf,firstline +\= Expect no match + xx\nxabcd + +# Test stack guard external calls. + +/(((a)))/stackguard=1 + +/(((a)))/stackguard=2 + +/(((a)))/stackguard=3 + +/(((((a)))))/ + +# End stack guard tests + +/^\w+(?>\s*)(?<=\w)/B + +/\othing/ + +/\o{}/ + +/\o{whatever}/ + +/\xthing/ + +/\x{}/ + +/\x{whatever}/ + +/A\8B/ + +/A\9B/ + +# This one is here because Perl fails to match "12" for this pattern when the $ +# is present. + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 12 +\= Expect no match + 123 + xyz + +# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# match for cd. + +/(?(?=ab)ab)/aftertext + abxxx + ca + cd + +# This should test both paths for processing OP_RECURSE. + +/(?(R)a+|(?R)b)/ + aaaabcde + aaaabcde\=ovector=100 + +/a*?b*?/ + ab + +/(*NOTEMPTY)a*?b*?/ + ab + ba + cb + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + cdab + +/(?(VERSION>=10.0)yes|no)/I + yesno + +/(?(VERSION>=10.04)yes|no)/ + yesno + +/(?(VERSION=8)yes){3}/BI,aftertext + yesno + +/(?(VERSION=8)yes|no){3}/I + yesnononoyes +\= Expect no match + yesno + +/(?:(?abc)|xyz)(?(VERSION)yes|no)/I + abcyes + xyzno +\= Expect no match + abcno + xyzyes + +/(?(VERSION<10)yes|no)/ + +/(?(VERSION>10)yes|no)/ + +/(?(VERSION>=10.0.0)yes|no)/ + +/(?(VERSION=10.101)yes|no)/ + +/abcd/I + +/abcd/I,no_start_optimize + +/(|ab)*?d/I + abd + xyd + +/(|ab)*?d/I,no_start_optimize + abd + xyd + +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + +/(((((a)))))/parens_nest_limit=2 + +/abc/replace=XYZ + 123123 + 123abc123 + 123abc123abc123 + 123123\=zero_terminate + 123abc123\=zero_terminate + 123abc123abc123\=zero_terminate + +/abc/g,replace=XYZ + 123abc123 + 123abc123abc123 + +/abc/replace=X$$Z + 123abc123 + +/abc/g,replace=X$$Z + 123abc123abc123 + +/a(b)c(d)e/replace=X$1Y${2}Z + "abcde" + +/a(b)c(d)e/replace=X$1Y${2}Z,global + "abcde-abcde" + +/a(?b)c(?d)e/replace=X$ONE+${TWO}Z + "abcde" + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z + "abcde-abcde-" + +/abc/replace=a$++ + 123abc + +/abc/replace=a$bad + 123abc + +/abc/replace=a${A234567890123456789_123456789012}z + 123abc + +/abc/replace=a${A23456789012345678901234567890123}z + 123abc + +/abc/replace=a${bcd + 123abc + +/abc/replace=a${b+d}z + 123abc + +/abc/replace=[10]XYZ + 123abc123 + +/abc/replace=[9]XYZ + 123abc123 + +/abc/replace=xyz + 1abc2\=partial_hard + +/abc/replace=xyz + 123abc456 + 123abc456\=replace=pqr + 123abc456abc789 + 123abc456abc789\=g + +/(?<=abc)(|def)/g,replace=<$0> + 123abcxyzabcdef789abcpqr + +/./replace=$0 + a + +/(.)(.)/replace=$2+$1 + abc + +/(?.)(?.)/replace=$B+$A + abc + +/(.)(.)/g,replace=$2$1 + abcdefgh + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry + apple lemon blackberry\=substitute_overflow_length + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + +/abc/ + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + +/a(b)c/ + 123abc123\=replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[6]x$1z + 123abc123\=substitute_overflow_length,replace=[1]x$1z + 123abc123\=substitute_overflow_length,replace=[0]x$1z + +"((?=(?(?=(?(?=(?(?=()))))))))" + a + +"(?(?=)==)(((((((((?=)))))))))" +\= Expect no match + a + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall + +/x(?=ab\K)/allow_lookaround_bsk + xab\=get=0 + xab\=copy=0 + xab\=getall + +/(?a)|(?b)/dupnames + a\=ovector=1,copy=A,get=A,get=2 + a\=ovector=2,copy=A,get=A,get=2 + b\=ovector=2,copy=A,get=A,get=2 + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall + +/^abc/info + +/^abc/info,no_dotstar_anchor + +/.*\d/info,auto_callout +\= Expect no match + aaa + +/.*\d/info,no_dotstar_anchor,auto_callout +\= Expect no match + aaa + +/.*\d/dotall,info + +/.*\d/dotall,no_dotstar_anchor,info + +/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info + +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 + aA123\=ovector=2 + aA123\=ovector=3 + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 + aa123\=ovector=2 + aa123\=ovector=3 + +/(?(?J)(?1(111111)11|)1|1|)(?()1)/ + +/(?(?J)(?))(?-J)\k/ + +# Quantifiers are not allowed on condition assertions, but are otherwise +# OK in conditions. + +/(?(?=0)?)+/ + +/(?(?=0)(?=00)?00765)/ + 00765 + +/(?(?=0)(?=00)?00765|(?!3).56)/ + 00765 + 456 +\= Expect no match + 356 + +'^(a)*+(\w)' + g + g\=ovector=1 + +'^(?:a)*+(\w)' + g + g\=ovector=1 + +# These two pattern showeds up compile-time bugs + +"((?2){0,1999}())?" + +/((?+1)(\1))/B + +# Callouts with string arguments + +/a(?C"/ + +/a(?C"a/ + +/a(?C"a"/ + +/a(?C"a"bcde(?C"b")xyz/ + +/a(?C"a)b""c")/B + +/ab(?C" any text with spaces ")cde/B + abcde + 12abcde + +/^a(b)c(?C1)def/ + abcdef + +/^a(b)c(?C"AB")def/ + abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture + +/^a(b)c(?C{AB})def/B + abcdef\=callout_capture + +/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + +/(?:a(?C`code`)){3}/B + +/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info + abcdefg + xyz123 + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg + +/(?:a(?C`code`)){3}X/ + aaaXY + +# Binary zero in callout string +# a ( ? C ' x z ' ) b +/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info + abcdefgh + +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb +\= Expect no match + aaa + +# JIT gives a different error message for the infinite recursion + +"(*NO_JIT)((?2)+)((?1)){" + abcd{ + +# Perl fails to diagnose the absence of an assertion + +"(?(?.*!.*)?)" + +"X((?2)()*+){2}+"B + +"X((?2)()*+){2}"B + +/(?<=\bABQ(3(?-7)))/ + +/(?<=\bABQ(3(?+7)))/ + +";(?<=()((?3))((?2)))" + +# Perl loops on this (PCRE2 used to!) + +/(?<=\Ka)/g,aftertext,allow_lookaround_bsk + aaaaa + +/(?<=\Ka)/altglobal,aftertext,allow_lookaround_bsk + aaaaa + +/((?2){73}(?2))((?1))/info + +/abc/ +\= Expect no match + \[9x!xxx(]{9999} + +/(abc)*/ + \[abc]{5} + +/^/gm + \n\n\n + +/^/gm,alt_circumflex + \n\n\n + +/((((((((x))))))))\81/ + xx1 + +/((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ + xx + +/\80/ + +/A\8B\9C/ + A8B9C + +/(?x:((?'a')) # comment (with parentheses) and | vertical +(?-x:#not a comment (?'b')) # this is a comment () +(?'c')) # not a comment (?'d')/info + +/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames + A23B + B32A + +# These are some patterns that used to cause buffer overflows or other errors +# while compiling. + +/.((?2)(?R)|\1|$)()/B + +/.((?3)(?R)()(?2)|\1|$)()/B + +/(\9*+(?2);\3++()2|)++{/ + +/\V\x85\9*+((?2)\3++()2)*:2/ + +/(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames + +/(((?(X)){0,2}) (?'x'((?'X')((?'X')))))/dupnames + +/(((?(R)){0,2}) (?'x'((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" + +/A(?'')Z/ + +"(?J:(?|(?'R')(\k'R')|((?'R'))))" + +/(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ + +/^(?:(?(1)x|)+)+$()/B + +/[[:>:]](?<)/ + +/((?x)(*:0))#(?'/ + +/(?C$[$)(?<]/ + +/(?C$)$)(?<]/ + +/(?(R))*+/B + abcd + +/((?x)(?#))#(?'/ + +/((?x)(?#))#(?'abc')/I + +/[[:\\](?<[::]/ + +/[[:\\](?'abc')[a:]/I + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" + +/()(?(R)0)*+/B + +/(?R-:(?>abcd<< + +/abcd/g,replace=\$1$2\,substitute_literal + XabcdYabcdZ + +/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended + abcDE + +/abcd/replace=xy\kz,substitute_extended + abcd + +/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2} + ab + ac + ab\=replace=${1:+$1\:$1:$2} + ac\=replace=${1:+$1\:$1:$2} + >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal + +/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2} + ab + ac + +/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}< + a + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + XbY\=replace=\Ux${1:+$1$1}y + +/a/substitute_extended,replace=${*MARK:+a:b} + a + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended + +/abcd/substitute_extended,replace=>$1< + abcd + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + [b] +\= Expect error + (a)\=ovector=1 + +/(a)|(b)/replace=<$1> +\= Expect error + b + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + +/abcd/replace=wxyz,substitute_matched + abcd + pqrs + +/abcd/g + >abcd1234abcd5678<\=replace=wxyz,substitute_matched + +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I + +/((p(?'K/ + +/((p(?'K/no_auto_capture + +/abc/replace=A$3123456789Z + abc + +/(?$1<,substitute_unset_empty + cat + xbcom + +/a|(b)c/ + cat\=replace=>$1< + cat\=replace=>$1<,substitute_unset_empty + xbcom\=replace=>$1<,substitute_unset_empty + +/a|(b)c/substitute_extended + cat\=replace=>${2:-xx}< + cat\=replace=>${2:-xx}<,substitute_unknown_unset + cat\=replace=>${X:-xx}<,substitute_unknown_unset + +/a|(?'X'b)c/replace=>$X<,substitute_unset_empty + cat + xbcom + +/a|(?'X'b)c/replace=>$Y<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/a|(b)c/replace=>$2<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/()()()/use_offset_limit + \=ovector=11000000000 + \=callout_fail=11000000000 + \=callout_fail=1:11000000000 + \=callout_data=11000000000 + \=callout_data=-11000000000 + \=offset_limit=1100000000000000000000 + \=copy=11000000000 + +/(*MARK:A\x00b)/mark + abc + +/(*MARK:A\x00b)/mark,alt_verbnames + abc + +/"(*MARK:A" 00 "b)"/mark,hex + abc + +/"(*MARK:A" 00 "b)"/mark,hex,alt_verbnames + abc + +/efg/hex + +/eff/hex + +/effg/hex + +/(?J)(?'a'))(?'a')/ + +/(?<=((?C)0))/ + 9010 +\= Expect no match + abc + +/aaa/ +\[abc]{10000000000000000000000000000} +\[a]{3} + +/\[AB]{6000000000000000000000}/expand + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex + +/'(*'/hex + +/'('/hex + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + b11bz +\= Expect no match + b11az + +/(?|([ab]))...(?<=\1)z/ + +/([ab])(\1)...(?<=\2)z/ + aa11az + +/(a\2)(b\1)(?<=\2)/ + +/(?[ab])...(?<=\k'A')z/ + a11az + b11bz +\= Expect no match + b11az + +/(?[ab])...(?<=\k'A')(?)z/dupnames + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + +/ab(?C1)c/auto_callout + abc + +/'ab(?C1)c'/hex,auto_callout + abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ + a-a9-a + +/[A-[:digit:]]+/ + A-A9-A + +/[a-\d]+/ + a-a9-a + +/(?abc)(?(R)xyz)/B + +/(?abc)(?(R)xyz)/B + +/(?=.*[A-Z])/I + +/()(?<=(?0))/ + +/(?*?\g'0/use_length + +/.>*?\g'0/ + +/{„Í„Í̈́Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́Í̈́Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 + +"(?(?C))" + +/(?(?(?(?(?(?))))))/ + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc + +//replace=0 + \=offset=7 + +/(?<=\G.)/g,replace=+ + abc + +".+\QX\E+"B,no_auto_possess + +".+\QX\E+"B,auto_callout,no_auto_possess + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 + +/()(\g+65534)/ + +/()(\g+65533)/ + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I + +/(a)(b)\2\1\1\1\1/I + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames + +// + \=ovector=7777777777 + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD + +"(?1){2}(a)"B + +"(?1){2,4}(a)"B + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + AZBDZ + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + +'(?:a(*:aa))b|ac' mark + ac + +/(R?){65}/ + (R?){65} + +/\[(a)]{60}/expand + aaaa + +/(?=999)yes)^bc/I + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I + +/(*LIMIT_HEAP=0)xxx/I + +/\d{0,3}(*:abc)(?C1)xxx/callout_info + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb + cc + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X9 + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t + XYY + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 + XYY + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 + X9Y + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A + X++ + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. + Xaa + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX +\= Expect no match + XAB + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI + +/[a b c]/BxxxI + +/[a b c]/B,extended_more + +/[ a b c ]/B,extended_more + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B + +/(a)(?-n:(b))(c)/nB + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal + +/\N{\c/IB,bad_escape_is_literal + +/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal + +/[Q-\N]/B,bad_escape_is_literal + +/[\s-_]/bad_escape_is_literal + +/[_-\s]/bad_escape_is_literal + +/[\B\R\X]/B + +/[\B\R\X]/B,bad_escape_is_literal + +/[A-\BP-\RV-\X]/B + +/[A-\BP-\RV-\X]/B,bad_escape_is_literal + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + +/a\b(c/literal,caseless + a\\b(c + a\\B(c + +/a\b(c/literal,firstline + XYYa\\b(c +\= Expect no match + X\na\\b(c + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 +\= Expect no match + XXXXa\\b?c\=offset_limit=3 + +/a\b(c/literal,anchored,endanchored + a\\b(c +\= Expect no match + Xa\\b(c + a\\b(cX + Xa\\b(cX + +//literal,extended + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c + +/(*CR)abc/literal + (*CR)abc + +/cat|dog/I,match_word + the cat sat +\= Expect no match + caterpillar + snowcat + syndicate + +/(cat)|dog/I,match_line,literal + (cat)|dog +\= Expect no match + the cat sat + caterpillar + snowcat + syndicate + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard +\= Expect no match + Not a whole line + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/[[:digit:]-a]/ + +/[[:digit:]-[:print:]]/ + +/[\d-a]/ + +/[\H-z]/ + +/[\d-[:print:]]/ + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I + ab + aaab + +"(?=(a))\1?b"I + ab + aaab + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/\n/firstline + xyz\nabc + +/\nabc/firstline + xyz\nabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc + +/[abc]/firstline +\= Expect no match + \na + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?a?))^(?&optional_a)a$/B + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B + +/^(a?)b(?1)a/B + +/^(a?)+b(?1)a/B + +/^(a?)++b(?1)a/B + +/^(a?)+b/B + +/(?=a+)a(a+)++b/B + +/(?<=(?=.){4,5}x)/B + +# Perl behaves differently with these when optimization is turned off + +/a(*PRUNE:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy + +/a(*THEN:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy + +/(?^x-i)AB/ + +/(?^-i)AB/ + +/(?x-i-i)/ + +/(?(?=^))b/I + abc + +/(?(?=^)|)b/I + abc + +/(?(?=^)|^)b/I + bbc +\= Expect no match + abc + +/(?(1)^|^())/I + +/(?(1)^())b/I + +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^()|^)+b/I,aftertext + bbc +\= Expect no match + abc + +/(?(1)^()|^)*b/I,aftertext + bbc + abc + xbc + +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^a()|^a)+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^|^(a))+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^a()|^a)*b/I,aftertext + abc + bbc + xbc + +/a(b)c|xyz/g,allvector,replace=<$0> + abcdefabcpqr\=ovector=4 + abxyz\=ovector=4 + abcdefxyz\=ovector=4 + +/a(b)c|xyz/allvector + abcdef\=ovector=4 + abxyz\=ovector=4 + +/a(b)c|xyz/g,replace=<$0>,substitute_callout + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz99abc55\=substitute_stop=2 + 12abc34xyz99abc55\=substitute_skip=1 + 12abc34xyz99abc55\=substitute_skip=2 + +/a(b)c|xyz/g,replace=<$0> + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz\=substitute_stop=2 + 12abc34xyz\=substitute_skip=1 + +/a(b)c|xyz/replace=<$0> + abcdefabcpqr + 12abc34xyz\=substitute_skip=1 + 12abc34xyz\=substitute_stop=1 + +/abc\rdef/ + abc\ndef + +/abc\rdef\x{0d}xyz/escaped_cr_is_lf + abc\ndef\rxyz +\= Expect no match + abc\ndef\nxyz + +/(?(*ACCEPT)xxx)/ + +/(?(*atomic:xx)xxx)/ + +/(?(*script_run:xxx)zzz)/ + +/foobar/ + the foobar thing\=copy_matched_subject + the foobar thing\=copy_matched_subject,zero_terminate + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + +/(*:XX)^abc/I + +/(*COMMIT:XX)^abc/I + +/(*ACCEPT:XX)^abc/I + +/abc/replace=xyz + abc\=null_context + +/abc/replace=xyz,substitute_callout + abc +\= Expect error message + abc\=null_context + +/\[()]{65535}()/expand + +/\[()]{65535}(?)/expand + +/a(?:(*ACCEPT))??bc/ + abc + axy + +/a(*ACCEPT)??bc/ + abc + axy + +/a(*ACCEPT:XX)??bc/mark + abc + axy + +/(*:\)?/ + +/(*:\Q \E){5}/alt_verbnames + +/(?=abc)/I + +/(?|(X)|(XY))\1abc/I + +/(?|(a)|(bcde))(c)\2/I + +/(?|(a)|(bcde))(c)\1/I + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames + +/((a|)+)+Z/I + +/((?=a))[abcd]/I + +/A(?:(*ACCEPT))?B/info + +/(A(*ACCEPT)??B)C/ + ABC + AXY + +/(?<=(?<=a)b)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/(?<=ab)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/(?<=a(?<=a|a)c)/I + +/(?<=a(?<=a|ba)c)/I + +/(?<=(?<=a)b)(?.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/(*plb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(*naplb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(?<*(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(*non_atomic_positive_lookahead:ab)/B + +/(*non_atomic_positive_lookbehind:ab)/B + +/(*pla:ab+)/B + +/(*napla:ab+)/B + +/(*napla:)+/ + +/(*naplb:)+/ + +/(*napla:^x|^y)/I + +/(*napla:abc|abd)/I + +/(*napla:a|(.)(*ACCEPT)zz)\1../ + abcd + +/(*napla:a(*ACCEPT)zz|(.))\1../ + abcd + +/(*napla:a|(*COMMIT)(.))\1\1/ + aabc +\= Expect no match + abbc + +/(*napla:a|(.))\1\1/ + aabc + abbc + +/(*naplb:ab?c|PQ).../g + abcdefgacxyzPQR123 + +# ---- + +# Expect error (recursion => not fixed length) +/(\2)((?=(?<=\1)))/ + +/c*+(?<=[bc])/ + abc\=ph + ab\=ph + abc\=ps + ab\=ps + +/c++(?<=[bc])/ + abc\=ph + ab\=ph + +/(?<=(?=.(?<=x)))/ + abx + ab\=ph + bxyz + xyz + +/\z/ + abc\=ph + abc\=ps + +/\Z/ + abc\=ph + abc\=ps + abc\n\=ph + abc\n\=ps + +/(?![ab]).*/ + ab\=ph + +/c*+/ + ab\=ph,offset=2 + +/\A\s*(a|(?:[^`]{28500}){4})/I + a + +/\A\s*((?:[^`]{28500}){4})/I + +/\A\s*((?:[^`]{28500}){4}|a)/I + a + +/(?a)(?()b)((?<=b).*)/B + +/(?(1)b)((?<=b).*)/B + +/(?(R1)b)((?<=b).*)/B + +/(?(DEFINE)b)((?<=b).*)/B + +/(?(VERSION=10.3)b)((?<=b).*)/B + +/[aA]b[cC]/IB + +/[cc]abcd/I + +/[Cc]abcd/I + +/[c]abcd/I + +/(?:c|C)abcd/I + +/(a)?a/I + manm + +/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/ + *abc_12345abc + +/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/ + *abc_12345abc + +/^((\1+)(?C)|\d)+133X$/ + 111133X\=callout_capture + +/abc/replace=xyz,substitute_replacement_only + 123abc456 + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only + "abcde-abcde-" + +/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz99abc55\=substitute_stop=2 + 12abc34xyz99abc55\=substitute_skip=1 + 12abc34xyz99abc55\=substitute_skip=2 + +/a(..)d/replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + xyzabcdxyzabcdxyz\=ovector=2 +\= Expect error + xyzabcdxyzabcdxyz\=ovector=1 + +/a(..)d/g,replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + xyzabcdxyzabcdxyz\=ovector=2 +\= Expect error + xyzabcdxyzabcdxyz\=ovector=1 + xyzabcdxyzabcdxyz\=ovector=1,substitute_unset_empty + +/55|a(..)d/g,replace=>$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty +\= Expect error + xyz55abcdxyzabcdxyz\=ovector=2 + +/55|a(..)d/replace=>$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/55|a(..)d/replace=>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/55|a(..)d/g,replace=>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/abc/replace=,caseless + XabcY + XABCY + +/abc/replace=[4],caseless + XabcY + XABCY + +/abc/replace=*,caseless + XabcY + XABCY + XabcY\=replace= + +# Expect non-fixed-length error + +"(?<=X(?(DEFINE)(.*))(?1))." + +/\sxxx\s/tables=1 +\= Expect no match + AB\x{85}xxx\x{a0}XYZ + +/\sxxx\s/tables=2 + AB\x{85}xxx\x{a0}XYZ + +/^\w+/tables=2 + École + +/^\w+/tables=3 + École + +#loadtables ./testbtables + +/^\w+/tables=3 + École + +/"(*MARK:>" 00 "<).."/hex,mark,no_start_optimize + AB + A\=ph +\= Expect no match + A + +/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize + AB + +/(?(VERSION=0.0/ + +# Perl has made \K in lookarounds an error. PCRE2 now rejects as well, unless +# explicitly authorized. + +/(?=a\Kb)ab/ + +/(?=a\Kb)ab/allow_lookaround_bsk + ab + +/(?!a\Kb)ac/ + +/(?!a\Kb)ac/allow_lookaround_bsk + ac + +/^abc(?<=b\Kc)d/ + +/^abc(?<=b\Kc)d/allow_lookaround_bsk + abcd + +/^abc(?X<\=null_replacement + +/X+/replace=[20] + >XX<\=null_replacement + +# --------- + +/[Aa]{2}/BI + aabcd + +/A{2}/iBI + aabcd + +/[Aa]{2,3}/BI + aabcd + +-- + \[X]{-10} + +# Check imposition of maximum by match_data_create(). + +/abcd/ + abcd\=ovector=65536 + +# Use recurse to test \K and Mark in atomic scope. +/(?>this line\s*((?R)|)\K)/ + this line this line this line + +/(?>this line\s*((?R)|)(*MARK:A))/ + this line this line this line + +# Check use of NULL pattern with zero length. + +//null_pattern,use_length + abc + +//null_pattern + +/bad null pattern/null_pattern,use_length + +/bad null pattern/null_pattern + +# -------- Variable length lookbehinds -------- +/12345(?<=\d{1,256})X/ + +/(?<=(\d{1,256}))X/max_varlookbehind=256 + 12345XYZ + +/12345(?<=a?bc)X/max_varlookbehind=0 + +/12345(?<=abc)X/max_varlookbehind=0 + +/(?a)|(?Pb))(?P=same))+/g,dupnames + bbbaaabaabb + +# -------- + +/ +/anchored, firstline + \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + +/ +/firstline + \x0a + abc\x0adef + +# This test is currently broken in the interpreter +# /|a(?0)/endanchored +# aaaa + +/A +/extended + +/(*ACCEPT)+/B,auto_callout + +/a\z/ + a + a\=noteol + +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x +/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i + +#save testsaved1 + +# Do it again for some more patterns. + +/(*MARK:A)(*SKIP:B)(C|X)/mark +/(?:(?foo)|(?bar))\k/dupnames + +#save testsaved2 +#pattern -push + +# Reload the patterns, then pop them one by one and check them. + +#load testsaved1 +#load testsaved2 + +#pop info + foofoo + barbar + +#pop mark + C +\= Expect no match + D + +#pop + AmanaplanacanalPanama + +#pop info + metcalfe 33 + +# Check for an error when different tables are used. + +/abc/push,tables=1 +/xyz/push,tables=2 +#save testsaved1 + +#pop + xyz + +#pop + abc + +#pop should give an error + pqr + +/abcd/pushcopy + abcd + +#pop + abcd + +#pop should give an error + +/abcd/push +#popcopy + abcd + +#pop + abcd + +/abcd/push +#save testsaved1 +#pop should give an error + +#load testsaved1 +#popcopy + abcd + +#pop + abcd + +#pop should give an error + +/abcd/pushtablescopy + abcd + +#popcopy + abcd + +#pop + abcd + +# Must only specify one of these + +//push,pushcopy + +//push,pushtablescopy + +//pushcopy,pushtablescopy + +# End of testinput20 diff --git a/testdata/testinput21 b/testdata/testinput21 new file mode 100644 index 0000000..1d1fbed --- /dev/null +++ b/testdata/testinput21 @@ -0,0 +1,16 @@ +# These are tests of \C that do not involve UTF. They are not run when \C is +# disabled by compiling with --enable-never-backslash-C. + +/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx + +/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx + +/ab\Cde/never_backslash_c + +/ab\Cde/info + abXde + +/(?<=ab\Cde)X/ + abZdeX + +# End of testinput21 diff --git a/testdata/testinput22 b/testdata/testinput22 new file mode 100644 index 0000000..5e01fdc --- /dev/null +++ b/testdata/testinput22 @@ -0,0 +1,107 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info + abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx + +/\C+\X \X+\C/Bx,utf + +/\C\X*TӅ; +{0,6}\v+ F +/utf +\= Expect no match + Ӆ\x0a + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ + +/X(\C{3})/utf + X\x{1234} + X\x{11234}Y + X\x{11234}YZ + +/X(\C{4})/utf + X\x{1234}YZ + X\x{11234}YZ + X\x{11234}YZW + +/X\C*/utf + XYZabcdce + +/X\C*?/utf + XYZabcde + +/X\C{3,5}/utf + Xabcdefg + X\x{1234} + X\x{1234}YZ + X\x{1234}\x{512} + X\x{1234}\x{512}YZ + X\x{11234}Y + X\x{11234}YZ + X\x{11234}\x{512} + X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + +/X\C{3,5}?/utf + Xabcdefg + X\x{1234} + X\x{1234}YZ + X\x{1234}\x{512} + X\x{11234}Y + X\x{11234}YZ + X\x{11234}\x{512}YZ + X\x{11234} + +/a\Cb/utf + aXb + a\nb + a\x{100}b + +/a\C\Cb/utf + a\x{100}b + a\x{12257}b + a\x{12257}\x{11234}b + +/ab\Cde/utf + abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + X\nabc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab + +/\C[^\v]+\x80/utf + [Aá¿»BŀC] + +/\C[^\d]+\x80/utf + [Aá¿»BŀC] + +# End of testinput22 diff --git a/testdata/testinput23 b/testdata/testinput23 new file mode 100644 index 0000000..d0a9bc4 --- /dev/null +++ b/testdata/testinput23 @@ -0,0 +1,7 @@ +# This test is run when PCRE2 has been built with --enable-never-backslash-C, +# which disables the use of \C. All we can do is check that it gives the +# correct error message. + +/a\Cb/ + +# End of testinput23 diff --git a/testdata/testinput24 b/testdata/testinput24 new file mode 100644 index 0000000..380e23c --- /dev/null +++ b/testdata/testinput24 @@ -0,0 +1,396 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in non-UTF mode. + +#forbid_utf +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +/abc/posix + +# Separator must be / \ or . + +/a*b/convert_glob_separator=% + +# Can't have separator in a class + +"[ab/cd]" + +"[,-/]" + +/[ab/ + +# Length check + +/abc/convert_length=11 + +/abc/convert_length=12 + +# Now some actual tests + +/a?b[]xy]*c/ + azb]1234c + +# Tests from the gitwildmatch list, with some additions + +/foo/ + foo +/= Expect no match + bar + +// + \ + +/???/ + foo +\= Expect no match + foobar + +/*/ + foo + \ + +/f*/ + foo + f + +/*f/ + oof +\= Expect no match + foo + +/*foo*/ + foo + food + aprilfool + +/*ob*a*r*/ + foobar + +/*ab/ + aaaaaaabababab + +/foo\*/ + foo* + +/foo\*bar/ +\= Expect no match + foobar + +/f\\oo/ + f\\oo + +/*[al]?/ + ball + +/[ten]/ +\= Expect no match + ten + +/t[a-g]n/ + ten + +/a[]]b/ + a]b + +/a[]a-]b/ + +/a[]-]b/ + a-b + a]b +\= Expect no match + aab + +/a[]a-z]b/ + aab + +/]/ + ] + +/t[!a-g]n/ + ton +\= Expect no match + ten + +'[[:alpha:]][[:digit:]][[:upper:]]' + a1B + +'[[:digit:][:upper:][:space:]]' + A + 1 + \ \= +\= Expect no match + a + . + +'[a-c[:digit:]x-z]' + 5 + b + y +\= Expect no match + q + +# End of gitwildmatch tests + +/*.j?g/ + pic01.jpg + .jpg + pic02.jxg +\= Expect no match + pic03.j/g + +/A[+-0]B/ + A+B + A.B + A0B +\= Expect no match + A/B + +/*x?z/ + abc.xyz +\= Expect no match + .xyz + +/?x?z/ + axyz +\= Expect no match + .xyz + +"[,-0]x?z" + ,xyz +\= Expect no match + /xyz + .xyz + +".x*" + .xabc + +/a[--0]z/ + a-z + a.z + a0z +\= Expect no match + a/z + a1z + +/<[a-c-d]>/ + + + + + <-> + +/a[[:digit:].]z/ + a1z + a.z +\= Expect no match + a:z + +/a[[:digit].]z/ + a[.]z + a:.]z + ad.]z + +/<[[:a[:digit:]b]>/ + <[> + <:> + + <9> + +\= Expect no match + + +/a*b/convert_glob_separator=\ + +/a*b/convert_glob_separator=. + +/a*b/convert_glob_separator=/ + +# Non control character checking + +/A\B\\C\D/ + +/\\{}\?\*+\[\]()|.^$/ + +/*a*\/*b*/ + +/?a?\/?b?/ + +/[a\\b\c][]][-][\]\-]/ + +/[^a\\b\c][!]][!-][^\]\-]/ + +/[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]]/ + +"[/-/]" + +/[-----]/ + +/[------]/ + +/[!------]/ + +/[[:alpha:]-a]/ + +/[[:alpha:]][[:punct:]][[:ascii:]]/ + +/[a-[:alpha:]]/ + +/[[:alpha:/ + +/[[:alpha:]/ + +/[[:alphaa:]]/ + +/[[:xdigi:]]/ + +/[[:xdigit::]]/ + +/****/ + +/**\/abc/ + abc + x/abc + xabc + +/abc\/**/ + +/abc\/**\/abc/ + +/**\/*a*b*g*n*t/ + abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt + +/**\/*a*\/**/ + xx/xx/xx/xax/xx/xb + +/**\/*a*/ + xx/xx/xx/xax + xx/xx/xx/xax/xx + +/**\/*a*\/**\/*b*/ + xx/xx/xx/xax/xx/xb + xx/xx/xx/xax/xx/x + +"**a"convert=glob + a + c/b/a + c/b/aaa + +"a**/b"convert=glob + a/b + ab + +"a/**b"convert=glob + a/b + ab + +#pattern convert=glob:glob_no_starstar + +/***/ + +/**a**/ + +#pattern convert=unset +#pattern convert=glob:glob_no_wild_separator + +/*/ + +/*a*/ + +/**a**/ + +/a*b/ + +/*a*b*/ + +/??a??/ + +#pattern convert=unset +#pattern convert=glob,convert_glob_escape=0 + +/a\b\cd/ + +/**\/a/ + +/a`*b/convert_glob_escape=` + +/a`*b/convert_glob_escape=0 + +/a`*b/convert_glob_escape=x + +# -------- Tests of extended POSIX conversion -------- + +#pattern convert=unset:posix_extended + +/<[[:a[:digit:]b]>/ + <[> + <:> + + <9> + +\= Expect no match + + +/a+\1b\\c|d[ab\c]/ + +/<[]bc]>/ + <]> + + + +/<[^]bc]>/ + <.> +\= Expect no match + <]> + + +/(a)\1b/ + a1b +\= Expect no match + aab + +/(ab)c)d]/ + Xabc)d]Y + +/a***b/ + +# -------- Tests of basic POSIX conversion -------- + +#pattern convert=unset:posix_basic + +/a*b+c\+[def](ab)\(cd\)/ + +/\(a\)\1b/ + aab +\= Expect no match + a1b + +/how.to how\.to/ + how\nto how.to +\= Expect no match + how\x{0}to how.to + +/^how to \^how to/ + +/^*abc/ + +/*abc/ + X*abcY + +/**abc/ + XabcY + X*abcY + X**abcY + +/*ab\(*cd\)/ + +/^b\(c^d\)\(^e^f\)/ + +/a***b/ + +# End of testinput24 diff --git a/testdata/testinput25 b/testdata/testinput25 new file mode 100644 index 0000000..8bb5bb2 --- /dev/null +++ b/testdata/testinput25 @@ -0,0 +1,22 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in UTF mode. + +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +# The fact that this one works in 13 bytes in the 8-bit library shows that the +# output is in UTF-8, though pcre2test shows the character as an escape. + +/'>' c4 a3 '<'/hex,utf,convert_length=13 + +# This expansion creates a string that is too long for the input buffer. + +/\[()]{65535}()/expand + +# End of testinput25 diff --git a/testdata/testinput26 b/testdata/testinput26 new file mode 100644 index 0000000..94b3abc --- /dev/null +++ b/testdata/testinput26 @@ -0,0 +1,2750 @@ +# These tests are generated by maint/GenerateTest26.py, do not edit. + +# Unicode Script Extension tests. + +# Base script check +/^\p{sc=Latin}/utf + A + +/^\p{Script=Latn}/utf + \x{1df2a} + +# Script extension check +/^\p{Latin}/utf + \x{363} + +/^\p{scx=Latn}/utf + \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{363} + +/^\p{sc=Latin}/utf + \x{363} + +# Character not in script +/^\p{Latin}/utf + \x{1df2b} + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{342} + +/^\p{Script_Extensions=Grek}/utf + \x{1dc1} + +# Script extension only character +/^\p{Greek}/utf + \x{342} + +/^\p{sc=Greek}/utf + \x{342} + +# Character not in script +/^\p{Greek}/utf + \x{1d246} + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + +/^\p{Script=Cyrl}/utf + \x{1e08f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{483} + +/^\p{scx=Cyrl}/utf + \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2e43} + +/^\p{sc=Cyrillic}/utf + \x{2e43} + +# Character not in script +/^\p{Cyrillic}/utf + \x{1e090} + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{102e0} + +/^\p{sc=Arabic}/utf + \x{102e0} + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{60c} + +/^\p{scx=Syrc}/utf + \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{1dfa} + +/^\p{sc=Syriac}/utf + \x{1dfa} + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{fdf2} + +/^\p{sc=Thaana}/utf + \x{fdf2} + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900} + +/^\p{Script=Deva}/utf + \x{11b09} + +# Script extension check +/^\p{Devanagari}/utf + \x{951} + +/^\p{scx=Deva}/utf + \x{a8f3} + +# Script extension only character +/^\p{Devanagari}/utf + \x{1cd1} + +/^\p{sc=Devanagari}/utf + \x{1cd1} + +# Character not in script +/^\p{Devanagari}/utf + \x{11b0a} + +# Base script check +/^\p{sc=Bengali}/utf + \x{980} + +/^\p{Script=Beng}/utf + \x{9fe} + +# Script extension check +/^\p{Bengali}/utf + \x{951} + +/^\p{Script_Extensions=Beng}/utf + \x{a8f1} + +# Script extension only character +/^\p{Bengali}/utf + \x{1cf7} + +/^\p{sc=Bengali}/utf + \x{1cf7} + +# Character not in script +/^\p{Bengali}/utf + \x{a8f2} + +# Base script check +/^\p{sc=Gurmukhi}/utf + \x{a01} + +/^\p{Script=Guru}/utf + \x{a76} + +# Script extension check +/^\p{Gurmukhi}/utf + \x{951} + +/^\p{scx=Guru}/utf + \x{a839} + +# Script extension only character +/^\p{Gurmukhi}/utf + \x{a836} + +/^\p{sc=Gurmukhi}/utf + \x{a836} + +# Character not in script +/^\p{Gurmukhi}/utf + \x{a83a} + +# Base script check +/^\p{sc=Gujarati}/utf + \x{a81} + +/^\p{Script=Gujr}/utf + \x{aff} + +# Script extension check +/^\p{Gujarati}/utf + \x{951} + +/^\p{Script_Extensions=Gujr}/utf + \x{a839} + +# Script extension only character +/^\p{Gujarati}/utf + \x{a836} + +/^\p{sc=Gujarati}/utf + \x{a836} + +# Character not in script +/^\p{Gujarati}/utf + \x{a83a} + +# Base script check +/^\p{sc=Oriya}/utf + \x{b01} + +/^\p{Script=Orya}/utf + \x{b77} + +# Script extension check +/^\p{Oriya}/utf + \x{951} + +/^\p{scx=Orya}/utf + \x{1cf2} + +# Script extension only character +/^\p{Oriya}/utf + \x{1cda} + +/^\p{sc=Oriya}/utf + \x{1cda} + +# Character not in script +/^\p{Oriya}/utf + \x{1cf3} + +# Base script check +/^\p{sc=Tamil}/utf + \x{b82} + +/^\p{Script=Taml}/utf + \x{11fff} + +# Script extension check +/^\p{Tamil}/utf + \x{951} + +/^\p{Script_Extensions=Taml}/utf + \x{11fd3} + +# Script extension only character +/^\p{Tamil}/utf + \x{a8f3} + +/^\p{sc=Tamil}/utf + \x{a8f3} + +# Character not in script +/^\p{Tamil}/utf + \x{12000} + +# Base script check +/^\p{sc=Telugu}/utf + \x{c00} + +/^\p{Script=Telu}/utf + \x{c7f} + +# Script extension check +/^\p{Telugu}/utf + \x{951} + +/^\p{scx=Telu}/utf + \x{1cf2} + +# Script extension only character +/^\p{Telugu}/utf + \x{1cda} + +/^\p{sc=Telugu}/utf + \x{1cda} + +# Character not in script +/^\p{Telugu}/utf + \x{1cf3} + +# Base script check +/^\p{sc=Kannada}/utf + \x{c80} + +/^\p{Script=Knda}/utf + \x{cf3} + +# Script extension check +/^\p{Kannada}/utf + \x{951} + +/^\p{Script_Extensions=Knda}/utf + \x{a835} + +# Script extension only character +/^\p{Kannada}/utf + \x{1cf4} + +/^\p{sc=Kannada}/utf + \x{1cf4} + +# Character not in script +/^\p{Kannada}/utf + \x{a836} + +# Base script check +/^\p{sc=Malayalam}/utf + \x{d00} + +/^\p{Script=Mlym}/utf + \x{d7f} + +# Script extension check +/^\p{Malayalam}/utf + \x{951} + +/^\p{scx=Mlym}/utf + \x{a832} + +# Script extension only character +/^\p{Malayalam}/utf + \x{1cda} + +/^\p{sc=Malayalam}/utf + \x{1cda} + +# Character not in script +/^\p{Malayalam}/utf + \x{a833} + +# Base script check +/^\p{sc=Sinhala}/utf + \x{d81} + +/^\p{Script=Sinh}/utf + \x{111f4} + +# Script extension check +/^\p{Sinhala}/utf + \x{964} + +/^\p{Script_Extensions=Sinh}/utf + \x{965} + +# Script extension only character +/^\p{Sinhala}/utf + \x{964} + +/^\p{sc=Sinhala}/utf + \x{964} + +# Character not in script +/^\p{Sinhala}/utf + \x{111f5} + +# Base script check +/^\p{sc=Myanmar}/utf + \x{1000} + +/^\p{Script=Mymr}/utf + \x{aa7f} + +# Script extension check +/^\p{Myanmar}/utf + \x{1040} + +/^\p{scx=Mymr}/utf + \x{a92e} + +# Script extension only character +/^\p{Myanmar}/utf + \x{a92e} + +/^\p{sc=Myanmar}/utf + \x{a92e} + +# Character not in script +/^\p{Myanmar}/utf + \x{aa80} + +# Base script check +/^\p{sc=Georgian}/utf + \x{10a0} + +/^\p{Script=Geor}/utf + \x{2d2d} + +# Script extension check +/^\p{Georgian}/utf + \x{10fb} + +/^\p{Script_Extensions=Geor}/utf + \x{10fb} + +# Script extension only character +/^\p{Georgian}/utf + \x{10fb} + +/^\p{sc=Georgian}/utf + \x{10fb} + +# Character not in script +/^\p{Georgian}/utf + \x{2d2e} + +# Base script check +/^\p{sc=Hangul}/utf + \x{1100} + +/^\p{Script=Hang}/utf + \x{ffdc} + +# Script extension check +/^\p{Hangul}/utf + \x{3001} + +/^\p{scx=Hang}/utf + \x{ff65} + +# Script extension only character +/^\p{Hangul}/utf + \x{3003} + +/^\p{sc=Hangul}/utf + \x{3003} + +# Character not in script +/^\p{Hangul}/utf + \x{ffdd} + +# Base script check +/^\p{sc=Mongolian}/utf + \x{1800} + +/^\p{Script=Mong}/utf + \x{1166c} + +# Script extension check +/^\p{Mongolian}/utf + \x{1802} + +/^\p{Script_Extensions=Mong}/utf + \x{202f} + +# Script extension only character +/^\p{Mongolian}/utf + \x{202f} + +/^\p{sc=Mongolian}/utf + \x{202f} + +# Character not in script +/^\p{Mongolian}/utf + \x{1166d} + +# Base script check +/^\p{sc=Hiragana}/utf + \x{3041} + +/^\p{Script=Hira}/utf + \x{1f200} + +# Script extension check +/^\p{Hiragana}/utf + \x{3001} + +/^\p{scx=Hira}/utf + \x{ff9f} + +# Script extension only character +/^\p{Hiragana}/utf + \x{3031} + +/^\p{sc=Hiragana}/utf + \x{3031} + +# Character not in script +/^\p{Hiragana}/utf + \x{1f201} + +# Base script check +/^\p{sc=Katakana}/utf + \x{30a1} + +/^\p{Script=Kana}/utf + \x{1b167} + +# Script extension check +/^\p{Katakana}/utf + \x{3001} + +/^\p{Script_Extensions=Kana}/utf + \x{ff9f} + +# Script extension only character +/^\p{Katakana}/utf + \x{3031} + +/^\p{sc=Katakana}/utf + \x{3031} + +# Character not in script +/^\p{Katakana}/utf + \x{1b168} + +# Base script check +/^\p{sc=Bopomofo}/utf + \x{2ea} + +/^\p{Script=Bopo}/utf + \x{31bf} + +# Script extension check +/^\p{Bopomofo}/utf + \x{3001} + +/^\p{scx=Bopo}/utf + \x{ff65} + +# Script extension only character +/^\p{Bopomofo}/utf + \x{302a} + +/^\p{sc=Bopomofo}/utf + \x{302a} + +# Character not in script +/^\p{Bopomofo}/utf + \x{ff66} + +# Base script check +/^\p{sc=Han}/utf + \x{2e80} + +/^\p{Script=Hani}/utf + \x{323af} + +# Script extension check +/^\p{Han}/utf + \x{3001} + +/^\p{Script_Extensions=Hani}/utf + \x{1f251} + +# Script extension only character +/^\p{Han}/utf + \x{3006} + +/^\p{sc=Han}/utf + \x{3006} + +# Character not in script +/^\p{Han}/utf + \x{323b0} + +# Base script check +/^\p{sc=Yi}/utf + \x{a000} + +/^\p{Script=Yiii}/utf + \x{a4c6} + +# Script extension check +/^\p{Yi}/utf + \x{3001} + +/^\p{scx=Yiii}/utf + \x{ff65} + +# Script extension only character +/^\p{Yi}/utf + \x{3001} + +/^\p{sc=Yi}/utf + \x{3001} + +# Character not in script +/^\p{Yi}/utf + \x{ff66} + +# Base script check +/^\p{sc=Tagalog}/utf + \x{1700} + +/^\p{Script=Tglg}/utf + \x{171f} + +# Script extension check +/^\p{Tagalog}/utf + \x{1735} + +/^\p{Script_Extensions=Tglg}/utf + \x{1736} + +# Script extension only character +/^\p{Tagalog}/utf + \x{1735} + +/^\p{sc=Tagalog}/utf + \x{1735} + +# Character not in script +/^\p{Tagalog}/utf + \x{1737} + +# Base script check +/^\p{sc=Hanunoo}/utf + \x{1720} + +/^\p{Script=Hano}/utf + \x{1734} + +# Script extension check +/^\p{Hanunoo}/utf + \x{1735} + +/^\p{scx=Hano}/utf + \x{1736} + +# Script extension only character +/^\p{Hanunoo}/utf + \x{1735} + +/^\p{sc=Hanunoo}/utf + \x{1735} + +# Character not in script +/^\p{Hanunoo}/utf + \x{1737} + +# Base script check +/^\p{sc=Buhid}/utf + \x{1740} + +/^\p{Script=Buhd}/utf + \x{1753} + +# Script extension check +/^\p{Buhid}/utf + \x{1735} + +/^\p{Script_Extensions=Buhd}/utf + \x{1736} + +# Script extension only character +/^\p{Buhid}/utf + \x{1735} + +/^\p{sc=Buhid}/utf + \x{1735} + +# Character not in script +/^\p{Buhid}/utf + \x{1754} + +# Base script check +/^\p{sc=Tagbanwa}/utf + \x{1760} + +/^\p{Script=Tagb}/utf + \x{1773} + +# Script extension check +/^\p{Tagbanwa}/utf + \x{1735} + +/^\p{scx=Tagb}/utf + \x{1736} + +# Script extension only character +/^\p{Tagbanwa}/utf + \x{1735} + +/^\p{sc=Tagbanwa}/utf + \x{1735} + +# Character not in script +/^\p{Tagbanwa}/utf + \x{1774} + +# Base script check +/^\p{sc=Limbu}/utf + \x{1900} + +/^\p{Script=Limb}/utf + \x{194f} + +# Script extension check +/^\p{Limbu}/utf + \x{965} + +/^\p{Script_Extensions=Limb}/utf + \x{965} + +# Script extension only character +/^\p{Limbu}/utf + \x{965} + +/^\p{sc=Limbu}/utf + \x{965} + +# Character not in script +/^\p{Limbu}/utf + \x{1950} + +# Base script check +/^\p{sc=Tai_Le}/utf + \x{1950} + +/^\p{Script=Tale}/utf + \x{1974} + +# Script extension check +/^\p{Tai_Le}/utf + \x{1040} + +/^\p{scx=Tale}/utf + \x{1049} + +# Script extension only character +/^\p{Tai_Le}/utf + \x{1040} + +/^\p{sc=Tai_Le}/utf + \x{1040} + +# Character not in script +/^\p{Tai_Le}/utf + \x{1975} + +# Base script check +/^\p{sc=Linear_B}/utf + \x{10000} + +/^\p{Script=Linb}/utf + \x{100fa} + +# Script extension check +/^\p{Linear_B}/utf + \x{10100} + +/^\p{Script_Extensions=Linb}/utf + \x{1013f} + +# Script extension only character +/^\p{Linear_B}/utf + \x{10102} + +/^\p{sc=Linear_B}/utf + \x{10102} + +# Character not in script +/^\p{Linear_B}/utf + \x{10140} + +# Base script check +/^\p{sc=Cypriot}/utf + \x{10800} + +/^\p{Script=Cprt}/utf + \x{1083f} + +# Script extension check +/^\p{Cypriot}/utf + \x{10100} + +/^\p{scx=Cprt}/utf + \x{1013f} + +# Script extension only character +/^\p{Cypriot}/utf + \x{10102} + +/^\p{sc=Cypriot}/utf + \x{10102} + +# Character not in script +/^\p{Cypriot}/utf + \x{10840} + +# Base script check +/^\p{sc=Buginese}/utf + \x{1a00} + +/^\p{Script=Bugi}/utf + \x{1a1f} + +# Script extension check +/^\p{Buginese}/utf + \x{a9cf} + +/^\p{Script_Extensions=Bugi}/utf + \x{a9cf} + +# Script extension only character +/^\p{Buginese}/utf + \x{a9cf} + +/^\p{sc=Buginese}/utf + \x{a9cf} + +# Character not in script +/^\p{Buginese}/utf + \x{a9d0} + +# Base script check +/^\p{sc=Coptic}/utf + \x{3e2} + +/^\p{Script=Copt}/utf + \x{2cff} + +# Script extension check +/^\p{Coptic}/utf + \x{102e0} + +/^\p{scx=Copt}/utf + \x{102fb} + +# Script extension only character +/^\p{Coptic}/utf + \x{102e0} + +/^\p{sc=Coptic}/utf + \x{102e0} + +# Character not in script +/^\p{Coptic}/utf + \x{102fc} + +# Base script check +/^\p{sc=Glagolitic}/utf + \x{2c00} + +/^\p{Script=Glag}/utf + \x{1e02a} + +# Script extension check +/^\p{Glagolitic}/utf + \x{484} + +/^\p{Script_Extensions=Glag}/utf + \x{a66f} + +# Script extension only character +/^\p{Glagolitic}/utf + \x{484} + +/^\p{sc=Glagolitic}/utf + \x{484} + +# Character not in script +/^\p{Glagolitic}/utf + \x{1e02b} + +# Base script check +/^\p{sc=Syloti_Nagri}/utf + \x{a800} + +/^\p{Script=Sylo}/utf + \x{a82c} + +# Script extension check +/^\p{Syloti_Nagri}/utf + \x{964} + +/^\p{scx=Sylo}/utf + \x{9ef} + +# Script extension only character +/^\p{Syloti_Nagri}/utf + \x{9e6} + +/^\p{sc=Syloti_Nagri}/utf + \x{9e6} + +# Character not in script +/^\p{Syloti_Nagri}/utf + \x{a82d} + +# Base script check +/^\p{sc=Phags_Pa}/utf + \x{a840} + +/^\p{Script=Phag}/utf + \x{a877} + +# Script extension check +/^\p{Phags_Pa}/utf + \x{1802} + +/^\p{Script_Extensions=Phag}/utf + \x{1805} + +# Script extension only character +/^\p{Phags_Pa}/utf + \x{1802} + +/^\p{sc=Phags_Pa}/utf + \x{1802} + +# Character not in script +/^\p{Phags_Pa}/utf + \x{a878} + +# Base script check +/^\p{sc=Nko}/utf + \x{7c0} + +/^\p{Script=Nkoo}/utf + \x{7ff} + +# Script extension check +/^\p{Nko}/utf + \x{60c} + +/^\p{scx=Nkoo}/utf + \x{fd3f} + +# Script extension only character +/^\p{Nko}/utf + \x{fd3e} + +/^\p{sc=Nko}/utf + \x{fd3e} + +# Character not in script +/^\p{Nko}/utf + \x{fd40} + +# Base script check +/^\p{sc=Kayah_Li}/utf + \x{a900} + +/^\p{Script=Kali}/utf + \x{a92f} + +# Script extension check +/^\p{Kayah_Li}/utf + \x{a92e} + +/^\p{Script_Extensions=Kali}/utf + \x{a92e} + +# Script extension only character +/^\p{Kayah_Li}/utf + \x{a92e} + +/^\p{sc=Kayah_Li}/utf + \x{a92e} + +# Character not in script +/^\p{Kayah_Li}/utf + \x{a930} + +# Base script check +/^\p{sc=Javanese}/utf + \x{a980} + +/^\p{Script=Java}/utf + \x{a9df} + +# Script extension check +/^\p{Javanese}/utf + \x{a9cf} + +/^\p{scx=Java}/utf + \x{a9cf} + +# Script extension only character +/^\p{Javanese}/utf + \x{a9cf} + +/^\p{sc=Javanese}/utf + \x{a9cf} + +# Character not in script +/^\p{Javanese}/utf + \x{a9e0} + +# Base script check +/^\p{sc=Kaithi}/utf + \x{11080} + +/^\p{Script=Kthi}/utf + \x{110cd} + +# Script extension check +/^\p{Kaithi}/utf + \x{966} + +/^\p{Script_Extensions=Kthi}/utf + \x{a839} + +# Script extension only character +/^\p{Kaithi}/utf + \x{966} + +/^\p{sc=Kaithi}/utf + \x{966} + +# Character not in script +/^\p{Kaithi}/utf + \x{110ce} + +# Base script check +/^\p{sc=Mandaic}/utf + \x{840} + +/^\p{Script=Mand}/utf + \x{85e} + +# Script extension check +/^\p{Mandaic}/utf + \x{640} + +/^\p{scx=Mand}/utf + \x{640} + +# Script extension only character +/^\p{Mandaic}/utf + \x{640} + +/^\p{sc=Mandaic}/utf + \x{640} + +# Character not in script +/^\p{Mandaic}/utf + \x{85f} + +# Base script check +/^\p{sc=Chakma}/utf + \x{11100} + +/^\p{Script=Cakm}/utf + \x{11147} + +# Script extension check +/^\p{Chakma}/utf + \x{9e6} + +/^\p{Script_Extensions=Cakm}/utf + \x{1049} + +# Script extension only character +/^\p{Chakma}/utf + \x{9e6} + +/^\p{sc=Chakma}/utf + \x{9e6} + +# Character not in script +/^\p{Chakma}/utf + \x{11148} + +# Base script check +/^\p{sc=Sharada}/utf + \x{11180} + +/^\p{Script=Shrd}/utf + \x{111df} + +# Script extension check +/^\p{Sharada}/utf + \x{951} + +/^\p{scx=Shrd}/utf + \x{1ce0} + +# Script extension only character +/^\p{Sharada}/utf + \x{1cd7} + +/^\p{sc=Sharada}/utf + \x{1cd7} + +# Character not in script +/^\p{Sharada}/utf + \x{111e0} + +# Base script check +/^\p{sc=Takri}/utf + \x{11680} + +/^\p{Script=Takr}/utf + \x{116c9} + +# Script extension check +/^\p{Takri}/utf + \x{964} + +/^\p{Script_Extensions=Takr}/utf + \x{a839} + +# Script extension only character +/^\p{Takri}/utf + \x{a836} + +/^\p{sc=Takri}/utf + \x{a836} + +# Character not in script +/^\p{Takri}/utf + \x{116ca} + +# Base script check +/^\p{sc=Duployan}/utf + \x{1bc00} + +/^\p{Script=Dupl}/utf + \x{1bc9f} + +# Script extension check +/^\p{Duployan}/utf + \x{1bca0} + +/^\p{scx=Dupl}/utf + \x{1bca3} + +# Script extension only character +/^\p{Duployan}/utf + \x{1bca0} + +/^\p{sc=Duployan}/utf + \x{1bca0} + +# Character not in script +/^\p{Duployan}/utf + \x{1bca4} + +# Base script check +/^\p{sc=Grantha}/utf + \x{11300} + +/^\p{Script=Gran}/utf + \x{11374} + +# Script extension check +/^\p{Grantha}/utf + \x{951} + +/^\p{Script_Extensions=Gran}/utf + \x{11fd3} + +# Script extension only character +/^\p{Grantha}/utf + \x{1cd3} + +/^\p{sc=Grantha}/utf + \x{1cd3} + +# Character not in script +/^\p{Grantha}/utf + \x{11fd4} + +# Base script check +/^\p{sc=Khojki}/utf + \x{11200} + +/^\p{Script=Khoj}/utf + \x{11241} + +# Script extension check +/^\p{Khojki}/utf + \x{ae6} + +/^\p{scx=Khoj}/utf + \x{a839} + +# Script extension only character +/^\p{Khojki}/utf + \x{ae6} + +/^\p{sc=Khojki}/utf + \x{ae6} + +# Character not in script +/^\p{Khojki}/utf + \x{11242} + +# Base script check +/^\p{sc=Linear_A}/utf + \x{10600} + +/^\p{Script=Lina}/utf + \x{10767} + +# Script extension check +/^\p{Linear_A}/utf + \x{10107} + +/^\p{Script_Extensions=Lina}/utf + \x{10133} + +# Script extension only character +/^\p{Linear_A}/utf + \x{10107} + +/^\p{sc=Linear_A}/utf + \x{10107} + +# Character not in script +/^\p{Linear_A}/utf + \x{10768} + +# Base script check +/^\p{sc=Mahajani}/utf + \x{11150} + +/^\p{Script=Mahj}/utf + \x{11176} + +# Script extension check +/^\p{Mahajani}/utf + \x{964} + +/^\p{scx=Mahj}/utf + \x{a839} + +# Script extension only character +/^\p{Mahajani}/utf + \x{966} + +/^\p{sc=Mahajani}/utf + \x{966} + +# Character not in script +/^\p{Mahajani}/utf + \x{11177} + +# Base script check +/^\p{sc=Manichaean}/utf + \x{10ac0} + +/^\p{Script=Mani}/utf + \x{10af6} + +# Script extension check +/^\p{Manichaean}/utf + \x{640} + +/^\p{Script_Extensions=Mani}/utf + \x{10af2} + +# Script extension only character +/^\p{Manichaean}/utf + \x{640} + +/^\p{sc=Manichaean}/utf + \x{640} + +# Character not in script +/^\p{Manichaean}/utf + \x{10af7} + +# Base script check +/^\p{sc=Modi}/utf + \x{11600} + +/^\p{Script=Modi}/utf + \x{11659} + +# Script extension check +/^\p{Modi}/utf + \x{a830} + +/^\p{scx=Modi}/utf + \x{a839} + +# Script extension only character +/^\p{Modi}/utf + \x{a836} + +/^\p{sc=Modi}/utf + \x{a836} + +# Character not in script +/^\p{Modi}/utf + \x{1165a} + +# Base script check +/^\p{sc=Old_Permic}/utf + \x{10350} + +/^\p{Script=Perm}/utf + \x{1037a} + +# Script extension check +/^\p{Old_Permic}/utf + \x{483} + +/^\p{Script_Extensions=Perm}/utf + \x{483} + +# Script extension only character +/^\p{Old_Permic}/utf + \x{483} + +/^\p{sc=Old_Permic}/utf + \x{483} + +# Character not in script +/^\p{Old_Permic}/utf + \x{1037b} + +# Base script check +/^\p{sc=Psalter_Pahlavi}/utf + \x{10b80} + +/^\p{Script=Phlp}/utf + \x{10baf} + +# Script extension check +/^\p{Psalter_Pahlavi}/utf + \x{640} + +/^\p{scx=Phlp}/utf + \x{640} + +# Script extension only character +/^\p{Psalter_Pahlavi}/utf + \x{640} + +/^\p{sc=Psalter_Pahlavi}/utf + \x{640} + +# Character not in script +/^\p{Psalter_Pahlavi}/utf + \x{10bb0} + +# Base script check +/^\p{sc=Khudawadi}/utf + \x{112b0} + +/^\p{Script=Sind}/utf + \x{112f9} + +# Script extension check +/^\p{Khudawadi}/utf + \x{964} + +/^\p{Script_Extensions=Sind}/utf + \x{a839} + +# Script extension only character +/^\p{Khudawadi}/utf + \x{a836} + +/^\p{sc=Khudawadi}/utf + \x{a836} + +# Character not in script +/^\p{Khudawadi}/utf + \x{112fa} + +# Base script check +/^\p{sc=Tirhuta}/utf + \x{11480} + +/^\p{Script=Tirh}/utf + \x{114d9} + +# Script extension check +/^\p{Tirhuta}/utf + \x{951} + +/^\p{scx=Tirh}/utf + \x{a839} + +# Script extension only character +/^\p{Tirhuta}/utf + \x{1cf2} + +/^\p{sc=Tirhuta}/utf + \x{1cf2} + +# Character not in script +/^\p{Tirhuta}/utf + \x{114da} + +# Base script check +/^\p{sc=Multani}/utf + \x{11280} + +/^\p{Script=Mult}/utf + \x{112a9} + +# Script extension check +/^\p{Multani}/utf + \x{a66} + +/^\p{Script_Extensions=Mult}/utf + \x{a6f} + +# Script extension only character +/^\p{Multani}/utf + \x{a66} + +/^\p{sc=Multani}/utf + \x{a66} + +# Character not in script +/^\p{Multani}/utf + \x{112aa} + +# Base script check +/^\p{sc=Adlam}/utf + \x{1e900} + +/^\p{Script=Adlm}/utf + \x{1e95f} + +# Script extension check +/^\p{Adlam}/utf + \x{61f} + +/^\p{scx=Adlm}/utf + \x{640} + +# Script extension only character +/^\p{Adlam}/utf + \x{61f} + +/^\p{sc=Adlam}/utf + \x{61f} + +# Character not in script +/^\p{Adlam}/utf + \x{1e960} + +# Base script check +/^\p{sc=Masaram_Gondi}/utf + \x{11d00} + +/^\p{Script=Gonm}/utf + \x{11d59} + +# Script extension check +/^\p{Masaram_Gondi}/utf + \x{964} + +/^\p{Script_Extensions=Gonm}/utf + \x{965} + +# Script extension only character +/^\p{Masaram_Gondi}/utf + \x{964} + +/^\p{sc=Masaram_Gondi}/utf + \x{964} + +# Character not in script +/^\p{Masaram_Gondi}/utf + \x{11d5a} + +# Base script check +/^\p{sc=Dogra}/utf + \x{11800} + +/^\p{Script=Dogr}/utf + \x{1183b} + +# Script extension check +/^\p{Dogra}/utf + \x{964} + +/^\p{scx=Dogr}/utf + \x{a839} + +# Script extension only character +/^\p{Dogra}/utf + \x{966} + +/^\p{sc=Dogra}/utf + \x{966} + +# Character not in script +/^\p{Dogra}/utf + \x{1183c} + +# Base script check +/^\p{sc=Gunjala_Gondi}/utf + \x{11d60} + +/^\p{Script=Gong}/utf + \x{11da9} + +# Script extension check +/^\p{Gunjala_Gondi}/utf + \x{964} + +/^\p{Script_Extensions=Gong}/utf + \x{965} + +# Script extension only character +/^\p{Gunjala_Gondi}/utf + \x{964} + +/^\p{sc=Gunjala_Gondi}/utf + \x{964} + +# Character not in script +/^\p{Gunjala_Gondi}/utf + \x{11daa} + +# Base script check +/^\p{sc=Hanifi_Rohingya}/utf + \x{10d00} + +/^\p{Script=Rohg}/utf + \x{10d39} + +# Script extension check +/^\p{Hanifi_Rohingya}/utf + \x{60c} + +/^\p{scx=Rohg}/utf + \x{6d4} + +# Script extension only character +/^\p{Hanifi_Rohingya}/utf + \x{6d4} + +/^\p{sc=Hanifi_Rohingya}/utf + \x{6d4} + +# Character not in script +/^\p{Hanifi_Rohingya}/utf + \x{10d3a} + +# Base script check +/^\p{sc=Sogdian}/utf + \x{10f30} + +/^\p{Script=Sogd}/utf + \x{10f59} + +# Script extension check +/^\p{Sogdian}/utf + \x{640} + +/^\p{Script_Extensions=Sogd}/utf + \x{640} + +# Script extension only character +/^\p{Sogdian}/utf + \x{640} + +/^\p{sc=Sogdian}/utf + \x{640} + +# Character not in script +/^\p{Sogdian}/utf + \x{10f5a} + +# Base script check +/^\p{sc=Nandinagari}/utf + \x{119a0} + +/^\p{Script=Nand}/utf + \x{119e4} + +# Script extension check +/^\p{Nandinagari}/utf + \x{964} + +/^\p{scx=Nand}/utf + \x{a835} + +# Script extension only character +/^\p{Nandinagari}/utf + \x{1cfa} + +/^\p{sc=Nandinagari}/utf + \x{1cfa} + +# Character not in script +/^\p{Nandinagari}/utf + \x{119e5} + +# Base script check +/^\p{sc=Yezidi}/utf + \x{10e80} + +/^\p{Script=Yezi}/utf + \x{10eb1} + +# Script extension check +/^\p{Yezidi}/utf + \x{60c} + +/^\p{Script_Extensions=Yezi}/utf + \x{669} + +# Script extension only character +/^\p{Yezidi}/utf + \x{660} + +/^\p{sc=Yezidi}/utf + \x{660} + +# Character not in script +/^\p{Yezidi}/utf + \x{10eb2} + +# Base script check +/^\p{sc=Cypro_Minoan}/utf + \x{12f90} + +/^\p{Script=Cpmn}/utf + \x{12ff2} + +# Script extension check +/^\p{Cypro_Minoan}/utf + \x{10100} + +/^\p{scx=Cpmn}/utf + \x{10101} + +# Script extension only character +/^\p{Cypro_Minoan}/utf + \x{10100} + +/^\p{sc=Cypro_Minoan}/utf + \x{10100} + +# Character not in script +/^\p{Cypro_Minoan}/utf + \x{12ff3} + +# Base script check +/^\p{sc=Old_Uyghur}/utf + \x{10f70} + +/^\p{Script=Ougr}/utf + \x{10f89} + +# Script extension check +/^\p{Old_Uyghur}/utf + \x{640} + +/^\p{Script_Extensions=Ougr}/utf + \x{10af2} + +# Script extension only character +/^\p{Old_Uyghur}/utf + \x{10af2} + +/^\p{sc=Old_Uyghur}/utf + \x{10af2} + +# Character not in script +/^\p{Old_Uyghur}/utf + \x{10f8a} + +# Base script check +/^\p{sc=Common}/utf + \x{00} + +/^\p{Script=Zyyy}/utf + \x{e007f} + +# Character not in script +/^\p{Common}/utf + \x{e0080} + +# Base script check +/^\p{sc=Armenian}/utf + \x{531} + +/^\p{Script=Armn}/utf + \x{fb17} + +# Character not in script +/^\p{Armenian}/utf + \x{fb18} + +# Base script check +/^\p{sc=Hebrew}/utf + \x{591} + +/^\p{Script=Hebr}/utf + \x{fb4f} + +# Character not in script +/^\p{Hebrew}/utf + \x{fb50} + +# Base script check +/^\p{sc=Thai}/utf + \x{e01} + +/^\p{Script=Thai}/utf + \x{e5b} + +# Character not in script +/^\p{Thai}/utf + \x{e5c} + +# Base script check +/^\p{sc=Lao}/utf + \x{e81} + +/^\p{Script=Laoo}/utf + \x{edf} + +# Character not in script +/^\p{Lao}/utf + \x{ee0} + +# Base script check +/^\p{sc=Tibetan}/utf + \x{f00} + +/^\p{Script=Tibt}/utf + \x{fda} + +# Character not in script +/^\p{Tibetan}/utf + \x{fdb} + +# Base script check +/^\p{sc=Ethiopic}/utf + \x{1200} + +/^\p{Script=Ethi}/utf + \x{1e7fe} + +# Character not in script +/^\p{Ethiopic}/utf + \x{1e7ff} + +# Base script check +/^\p{sc=Cherokee}/utf + \x{13a0} + +/^\p{Script=Cher}/utf + \x{abbf} + +# Character not in script +/^\p{Cherokee}/utf + \x{abc0} + +# Base script check +/^\p{sc=Canadian_Aboriginal}/utf + \x{1400} + +/^\p{Script=Cans}/utf + \x{11abf} + +# Character not in script +/^\p{Canadian_Aboriginal}/utf + \x{11ac0} + +# Base script check +/^\p{sc=Ogham}/utf + \x{1680} + +/^\p{Script=Ogam}/utf + \x{169c} + +# Character not in script +/^\p{Ogham}/utf + \x{169d} + +# Base script check +/^\p{sc=Runic}/utf + \x{16a0} + +/^\p{Script=Runr}/utf + \x{16f8} + +# Character not in script +/^\p{Runic}/utf + \x{16f9} + +# Base script check +/^\p{sc=Khmer}/utf + \x{1780} + +/^\p{Script=Khmr}/utf + \x{19ff} + +# Character not in script +/^\p{Khmer}/utf + \x{1a00} + +# Base script check +/^\p{sc=Old_Italic}/utf + \x{10300} + +/^\p{Script=Ital}/utf + \x{1032f} + +# Character not in script +/^\p{Old_Italic}/utf + \x{10330} + +# Base script check +/^\p{sc=Gothic}/utf + \x{10330} + +/^\p{Script=Goth}/utf + \x{1034a} + +# Character not in script +/^\p{Gothic}/utf + \x{1034b} + +# Base script check +/^\p{sc=Deseret}/utf + \x{10400} + +/^\p{Script=Dsrt}/utf + \x{1044f} + +# Character not in script +/^\p{Deseret}/utf + \x{10450} + +# Base script check +/^\p{sc=Inherited}/utf + \x{300} + +/^\p{Script=Zinh}/utf + \x{e01ef} + +# Character not in script +/^\p{Inherited}/utf + \x{e01f0} + +# Base script check +/^\p{sc=Ugaritic}/utf + \x{10380} + +/^\p{Script=Ugar}/utf + \x{1039f} + +# Character not in script +/^\p{Ugaritic}/utf + \x{103a0} + +# Base script check +/^\p{sc=Shavian}/utf + \x{10450} + +/^\p{Script=Shaw}/utf + \x{1047f} + +# Character not in script +/^\p{Shavian}/utf + \x{10480} + +# Base script check +/^\p{sc=Osmanya}/utf + \x{10480} + +/^\p{Script=Osma}/utf + \x{104a9} + +# Character not in script +/^\p{Osmanya}/utf + \x{104aa} + +# Base script check +/^\p{sc=Braille}/utf + \x{2800} + +/^\p{Script=Brai}/utf + \x{28ff} + +# Character not in script +/^\p{Braille}/utf + \x{2900} + +# Base script check +/^\p{sc=New_Tai_Lue}/utf + \x{1980} + +/^\p{Script=Talu}/utf + \x{19df} + +# Character not in script +/^\p{New_Tai_Lue}/utf + \x{19e0} + +# Base script check +/^\p{sc=Tifinagh}/utf + \x{2d30} + +/^\p{Script=Tfng}/utf + \x{2d7f} + +# Character not in script +/^\p{Tifinagh}/utf + \x{2d80} + +# Base script check +/^\p{sc=Old_Persian}/utf + \x{103a0} + +/^\p{Script=Xpeo}/utf + \x{103d5} + +# Character not in script +/^\p{Old_Persian}/utf + \x{103d6} + +# Base script check +/^\p{sc=Kharoshthi}/utf + \x{10a00} + +/^\p{Script=Khar}/utf + \x{10a58} + +# Character not in script +/^\p{Kharoshthi}/utf + \x{10a59} + +# Base script check +/^\p{sc=Balinese}/utf + \x{1b00} + +/^\p{Script=Bali}/utf + \x{1b7e} + +# Character not in script +/^\p{Balinese}/utf + \x{1b7f} + +# Base script check +/^\p{sc=Cuneiform}/utf + \x{12000} + +/^\p{Script=Xsux}/utf + \x{12543} + +# Character not in script +/^\p{Cuneiform}/utf + \x{12544} + +# Base script check +/^\p{sc=Phoenician}/utf + \x{10900} + +/^\p{Script=Phnx}/utf + \x{1091f} + +# Character not in script +/^\p{Phoenician}/utf + \x{10920} + +# Base script check +/^\p{sc=Sundanese}/utf + \x{1b80} + +/^\p{Script=Sund}/utf + \x{1cc7} + +# Character not in script +/^\p{Sundanese}/utf + \x{1cc8} + +# Base script check +/^\p{sc=Lepcha}/utf + \x{1c00} + +/^\p{Script=Lepc}/utf + \x{1c4f} + +# Character not in script +/^\p{Lepcha}/utf + \x{1c50} + +# Base script check +/^\p{sc=Ol_Chiki}/utf + \x{1c50} + +/^\p{Script=Olck}/utf + \x{1c7f} + +# Character not in script +/^\p{Ol_Chiki}/utf + \x{1c80} + +# Base script check +/^\p{sc=Vai}/utf + \x{a500} + +/^\p{Script=Vaii}/utf + \x{a62b} + +# Character not in script +/^\p{Vai}/utf + \x{a62c} + +# Base script check +/^\p{sc=Saurashtra}/utf + \x{a880} + +/^\p{Script=Saur}/utf + \x{a8d9} + +# Character not in script +/^\p{Saurashtra}/utf + \x{a8da} + +# Base script check +/^\p{sc=Rejang}/utf + \x{a930} + +/^\p{Script=Rjng}/utf + \x{a95f} + +# Character not in script +/^\p{Rejang}/utf + \x{a960} + +# Base script check +/^\p{sc=Lycian}/utf + \x{10280} + +/^\p{Script=Lyci}/utf + \x{1029c} + +# Character not in script +/^\p{Lycian}/utf + \x{1029d} + +# Base script check +/^\p{sc=Carian}/utf + \x{102a0} + +/^\p{Script=Cari}/utf + \x{102d0} + +# Character not in script +/^\p{Carian}/utf + \x{102d1} + +# Base script check +/^\p{sc=Lydian}/utf + \x{10920} + +/^\p{Script=Lydi}/utf + \x{1093f} + +# Character not in script +/^\p{Lydian}/utf + \x{10940} + +# Base script check +/^\p{sc=Cham}/utf + \x{aa00} + +/^\p{Script=Cham}/utf + \x{aa5f} + +# Character not in script +/^\p{Cham}/utf + \x{aa60} + +# Base script check +/^\p{sc=Tai_Tham}/utf + \x{1a20} + +/^\p{Script=Lana}/utf + \x{1aad} + +# Character not in script +/^\p{Tai_Tham}/utf + \x{1aae} + +# Base script check +/^\p{sc=Tai_Viet}/utf + \x{aa80} + +/^\p{Script=Tavt}/utf + \x{aadf} + +# Character not in script +/^\p{Tai_Viet}/utf + \x{aae0} + +# Base script check +/^\p{sc=Avestan}/utf + \x{10b00} + +/^\p{Script=Avst}/utf + \x{10b3f} + +# Character not in script +/^\p{Avestan}/utf + \x{10b40} + +# Base script check +/^\p{sc=Egyptian_Hieroglyphs}/utf + \x{13000} + +/^\p{Script=Egyp}/utf + \x{13455} + +# Character not in script +/^\p{Egyptian_Hieroglyphs}/utf + \x{13456} + +# Base script check +/^\p{sc=Samaritan}/utf + \x{800} + +/^\p{Script=Samr}/utf + \x{83e} + +# Character not in script +/^\p{Samaritan}/utf + \x{83f} + +# Base script check +/^\p{sc=Lisu}/utf + \x{a4d0} + +/^\p{Script=Lisu}/utf + \x{11fb0} + +# Character not in script +/^\p{Lisu}/utf + \x{11fb1} + +# Base script check +/^\p{sc=Bamum}/utf + \x{a6a0} + +/^\p{Script=Bamu}/utf + \x{16a38} + +# Character not in script +/^\p{Bamum}/utf + \x{16a39} + +# Base script check +/^\p{sc=Meetei_Mayek}/utf + \x{aae0} + +/^\p{Script=Mtei}/utf + \x{abf9} + +# Character not in script +/^\p{Meetei_Mayek}/utf + \x{abfa} + +# Base script check +/^\p{sc=Imperial_Aramaic}/utf + \x{10840} + +/^\p{Script=Armi}/utf + \x{1085f} + +# Character not in script +/^\p{Imperial_Aramaic}/utf + \x{10860} + +# Base script check +/^\p{sc=Old_South_Arabian}/utf + \x{10a60} + +/^\p{Script=Sarb}/utf + \x{10a7f} + +# Character not in script +/^\p{Old_South_Arabian}/utf + \x{10a80} + +# Base script check +/^\p{sc=Inscriptional_Parthian}/utf + \x{10b40} + +/^\p{Script=Prti}/utf + \x{10b5f} + +# Character not in script +/^\p{Inscriptional_Parthian}/utf + \x{10b60} + +# Base script check +/^\p{sc=Inscriptional_Pahlavi}/utf + \x{10b60} + +/^\p{Script=Phli}/utf + \x{10b7f} + +# Character not in script +/^\p{Inscriptional_Pahlavi}/utf + \x{10b80} + +# Base script check +/^\p{sc=Old_Turkic}/utf + \x{10c00} + +/^\p{Script=Orkh}/utf + \x{10c48} + +# Character not in script +/^\p{Old_Turkic}/utf + \x{10c49} + +# Base script check +/^\p{sc=Batak}/utf + \x{1bc0} + +/^\p{Script=Batk}/utf + \x{1bff} + +# Character not in script +/^\p{Batak}/utf + \x{1c00} + +# Base script check +/^\p{sc=Brahmi}/utf + \x{11000} + +/^\p{Script=Brah}/utf + \x{1107f} + +# Character not in script +/^\p{Brahmi}/utf + \x{11080} + +# Base script check +/^\p{sc=Meroitic_Cursive}/utf + \x{109a0} + +/^\p{Script=Merc}/utf + \x{109ff} + +# Character not in script +/^\p{Meroitic_Cursive}/utf + \x{10a00} + +# Base script check +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{10980} + +/^\p{Script=Mero}/utf + \x{1099f} + +# Character not in script +/^\p{Meroitic_Hieroglyphs}/utf + \x{109a0} + +# Base script check +/^\p{sc=Miao}/utf + \x{16f00} + +/^\p{Script=Plrd}/utf + \x{16f9f} + +# Character not in script +/^\p{Miao}/utf + \x{16fa0} + +# Base script check +/^\p{sc=Sora_Sompeng}/utf + \x{110d0} + +/^\p{Script=Sora}/utf + \x{110f9} + +# Character not in script +/^\p{Sora_Sompeng}/utf + \x{110fa} + +# Base script check +/^\p{sc=Caucasian_Albanian}/utf + \x{10530} + +/^\p{Script=Aghb}/utf + \x{1056f} + +# Character not in script +/^\p{Caucasian_Albanian}/utf + \x{10570} + +# Base script check +/^\p{sc=Bassa_Vah}/utf + \x{16ad0} + +/^\p{Script=Bass}/utf + \x{16af5} + +# Character not in script +/^\p{Bassa_Vah}/utf + \x{16af6} + +# Base script check +/^\p{sc=Elbasan}/utf + \x{10500} + +/^\p{Script=Elba}/utf + \x{10527} + +# Character not in script +/^\p{Elbasan}/utf + \x{10528} + +# Base script check +/^\p{sc=Pahawh_Hmong}/utf + \x{16b00} + +/^\p{Script=Hmng}/utf + \x{16b8f} + +# Character not in script +/^\p{Pahawh_Hmong}/utf + \x{16b90} + +# Base script check +/^\p{sc=Mende_Kikakui}/utf + \x{1e800} + +/^\p{Script=Mend}/utf + \x{1e8d6} + +# Character not in script +/^\p{Mende_Kikakui}/utf + \x{1e8d7} + +# Base script check +/^\p{sc=Mro}/utf + \x{16a40} + +/^\p{Script=Mroo}/utf + \x{16a6f} + +# Character not in script +/^\p{Mro}/utf + \x{16a70} + +# Base script check +/^\p{sc=Old_North_Arabian}/utf + \x{10a80} + +/^\p{Script=Narb}/utf + \x{10a9f} + +# Character not in script +/^\p{Old_North_Arabian}/utf + \x{10aa0} + +# Base script check +/^\p{sc=Nabataean}/utf + \x{10880} + +/^\p{Script=Nbat}/utf + \x{108af} + +# Character not in script +/^\p{Nabataean}/utf + \x{108b0} + +# Base script check +/^\p{sc=Palmyrene}/utf + \x{10860} + +/^\p{Script=Palm}/utf + \x{1087f} + +# Character not in script +/^\p{Palmyrene}/utf + \x{10880} + +# Base script check +/^\p{sc=Pau_Cin_Hau}/utf + \x{11ac0} + +/^\p{Script=Pauc}/utf + \x{11af8} + +# Character not in script +/^\p{Pau_Cin_Hau}/utf + \x{11af9} + +# Base script check +/^\p{sc=Siddham}/utf + \x{11580} + +/^\p{Script=Sidd}/utf + \x{115dd} + +# Character not in script +/^\p{Siddham}/utf + \x{115de} + +# Base script check +/^\p{sc=Warang_Citi}/utf + \x{118a0} + +/^\p{Script=Wara}/utf + \x{118ff} + +# Character not in script +/^\p{Warang_Citi}/utf + \x{11900} + +# Base script check +/^\p{sc=Ahom}/utf + \x{11700} + +/^\p{Script=Ahom}/utf + \x{11746} + +# Character not in script +/^\p{Ahom}/utf + \x{11747} + +# Base script check +/^\p{sc=Anatolian_Hieroglyphs}/utf + \x{14400} + +/^\p{Script=Hluw}/utf + \x{14646} + +# Character not in script +/^\p{Anatolian_Hieroglyphs}/utf + \x{14647} + +# Base script check +/^\p{sc=Hatran}/utf + \x{108e0} + +/^\p{Script=Hatr}/utf + \x{108ff} + +# Character not in script +/^\p{Hatran}/utf + \x{10900} + +# Base script check +/^\p{sc=Old_Hungarian}/utf + \x{10c80} + +/^\p{Script=Hung}/utf + \x{10cff} + +# Character not in script +/^\p{Old_Hungarian}/utf + \x{10d00} + +# Base script check +/^\p{sc=SignWriting}/utf + \x{1d800} + +/^\p{Script=Sgnw}/utf + \x{1daaf} + +# Character not in script +/^\p{SignWriting}/utf + \x{1dab0} + +# Base script check +/^\p{sc=Bhaiksuki}/utf + \x{11c00} + +/^\p{Script=Bhks}/utf + \x{11c6c} + +# Character not in script +/^\p{Bhaiksuki}/utf + \x{11c6d} + +# Base script check +/^\p{sc=Marchen}/utf + \x{11c70} + +/^\p{Script=Marc}/utf + \x{11cb6} + +# Character not in script +/^\p{Marchen}/utf + \x{11cb7} + +# Base script check +/^\p{sc=Newa}/utf + \x{11400} + +/^\p{Script=Newa}/utf + \x{11461} + +# Character not in script +/^\p{Newa}/utf + \x{11462} + +# Base script check +/^\p{sc=Osage}/utf + \x{104b0} + +/^\p{Script=Osge}/utf + \x{104fb} + +# Character not in script +/^\p{Osage}/utf + \x{104fc} + +# Base script check +/^\p{sc=Tangut}/utf + \x{16fe0} + +/^\p{Script=Tang}/utf + \x{18d08} + +# Character not in script +/^\p{Tangut}/utf + \x{18d09} + +# Base script check +/^\p{sc=Nushu}/utf + \x{16fe1} + +/^\p{Script=Nshu}/utf + \x{1b2fb} + +# Character not in script +/^\p{Nushu}/utf + \x{1b2fc} + +# Base script check +/^\p{sc=Soyombo}/utf + \x{11a50} + +/^\p{Script=Soyo}/utf + \x{11aa2} + +# Character not in script +/^\p{Soyombo}/utf + \x{11aa3} + +# Base script check +/^\p{sc=Zanabazar_Square}/utf + \x{11a00} + +/^\p{Script=Zanb}/utf + \x{11a47} + +# Character not in script +/^\p{Zanabazar_Square}/utf + \x{11a48} + +# Base script check +/^\p{sc=Makasar}/utf + \x{11ee0} + +/^\p{Script=Maka}/utf + \x{11ef8} + +# Character not in script +/^\p{Makasar}/utf + \x{11ef9} + +# Base script check +/^\p{sc=Medefaidrin}/utf + \x{16e40} + +/^\p{Script=Medf}/utf + \x{16e9a} + +# Character not in script +/^\p{Medefaidrin}/utf + \x{16e9b} + +# Base script check +/^\p{sc=Old_Sogdian}/utf + \x{10f00} + +/^\p{Script=Sogo}/utf + \x{10f27} + +# Character not in script +/^\p{Old_Sogdian}/utf + \x{10f28} + +# Base script check +/^\p{sc=Elymaic}/utf + \x{10fe0} + +/^\p{Script=Elym}/utf + \x{10ff6} + +# Character not in script +/^\p{Elymaic}/utf + \x{10ff7} + +# Base script check +/^\p{sc=Nyiakeng_Puachue_Hmong}/utf + \x{1e100} + +/^\p{Script=Hmnp}/utf + \x{1e14f} + +# Character not in script +/^\p{Nyiakeng_Puachue_Hmong}/utf + \x{1e150} + +# Base script check +/^\p{sc=Wancho}/utf + \x{1e2c0} + +/^\p{Script=Wcho}/utf + \x{1e2ff} + +# Character not in script +/^\p{Wancho}/utf + \x{1e300} + +# Base script check +/^\p{sc=Chorasmian}/utf + \x{10fb0} + +/^\p{Script=Chrs}/utf + \x{10fcb} + +# Character not in script +/^\p{Chorasmian}/utf + \x{10fcc} + +# Base script check +/^\p{sc=Dives_Akuru}/utf + \x{11900} + +/^\p{Script=Diak}/utf + \x{11959} + +# Character not in script +/^\p{Dives_Akuru}/utf + \x{1195a} + +# Base script check +/^\p{sc=Khitan_Small_Script}/utf + \x{16fe4} + +/^\p{Script=Kits}/utf + \x{18cd5} + +# Character not in script +/^\p{Khitan_Small_Script}/utf + \x{18cd6} + +# Base script check +/^\p{sc=Tangsa}/utf + \x{16a70} + +/^\p{Script=Tnsa}/utf + \x{16ac9} + +# Character not in script +/^\p{Tangsa}/utf + \x{16aca} + +# Base script check +/^\p{sc=Toto}/utf + \x{1e290} + +/^\p{Script=Toto}/utf + \x{1e2ae} + +# Character not in script +/^\p{Toto}/utf + \x{1e2af} + +# Base script check +/^\p{sc=Vithkuqi}/utf + \x{10570} + +/^\p{Script=Vith}/utf + \x{105bc} + +# Character not in script +/^\p{Vithkuqi}/utf + \x{105bd} + +# Base script check +/^\p{sc=Kawi}/utf + \x{11f00} + +/^\p{Script=Kawi}/utf + \x{11f59} + +# Character not in script +/^\p{Kawi}/utf + \x{11f5a} + +# Base script check +/^\p{sc=Nag_Mundari}/utf + \x{1e4d0} + +/^\p{Script=Nagm}/utf + \x{1e4f9} + +# Character not in script +/^\p{Nag_Mundari}/utf + \x{1e4fa} + +# End of testinput26 diff --git a/testdata/testinput3 b/testdata/testinput3 new file mode 100644 index 0000000..20f8d4c --- /dev/null +++ b/testdata/testinput3 @@ -0,0 +1,109 @@ +# This set of tests checks local-specific features, using the "fr_FR" locale. +# It is not Perl-compatible. When run via RunTest, the locale is edited to +# be whichever of "fr_FR", "french", or "fr" is found to exist. There is +# different version of this file called wintestinput3 for use on Windows, +# where the locale is called "french" and the tests are run using +# RunTest.bat. + +#forbid_utf + +/^[\w]+/ +\= Expect no match + École + +/^[\w]+/locale=fr_FR + École + +/^[\w]+/ +\= Expect no match + École + +/^[\W]+/ + École + +/^[\W]+/locale=fr_FR +\= Expect no match + École + +/[\b]/ + \b +\= Expect no match + a + +/[\b]/locale=fr_FR + \b +\= Expect no match + a + +/^\w+/ +\= Expect no match + École + +/^\w+/locale=fr_FR + École + +/(.+)\b(.+)/ + École + +/(.+)\b(.+)/locale=fr_FR +\= Expect no match + École + +/École/i + École +\= Expect no match + école + +/École/i,locale=fr_FR + École + école + +/\w/I + +/\w/I,locale=fr_FR + +# All remaining tests are in the fr_FR locale, so set the default. + +#pattern locale=fr_FR + +/^[\xc8-\xc9]/i + École + école + +/^[\xc8-\xc9]/ + École +\= Expect no match + école + +/\xb5/i + µ +\= Expect no match + \x9c + +/\W+/ + >>>\xaa<<< + >>>\xba<<< + +/[\W]+/ + >>>\xaa<<< + >>>\xba<<< + +/[^[:alpha:]]+/ + >>>\xaa<<< + >>>\xba<<< + +/\w+/ + >>>\xaa<<< + >>>\xba<<< + +/[\w]+/ + >>>\xaa<<< + >>>\xba<<< + +/[[:alpha:]]+/ + >>>\xaa<<< + >>>\xba<<< + +/[[:alpha:]][[:lower:]][[:upper:]]/IB + +# End of testinput3 diff --git a/testdata/testinput4 b/testdata/testinput4 new file mode 100644 index 0000000..2205caf --- /dev/null +++ b/testdata/testinput4 @@ -0,0 +1,2874 @@ +# This set of tests is for UTF support, including Unicode properties. The +# Unicode tests are all compatible with all versions of Perl >= 5.10, but +# some of the property tests may differ because of different versions of +# Unicode in use by PCRE2 and Perl. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + +#newline_default lf anycrlf any +#perltest + +/a.b/utf + acb + a\x7fb + a\x{100}b +\= Expect no match + a\nb + +/a(.{3})b/utf + a\x{4000}xyb + a\x{4000}\x7fyb + a\x{4000}\x{100}yb +\= Expect no match + a\x{4000}b + ac\ncb + +/a(.*?)(.)/ + a\xc0\x88b + +/a(.*?)(.)/utf + a\x{100}b + +/a(.*)(.)/ + a\xc0\x88b + +/a(.*)(.)/utf + a\x{100}b + +/a(.)(.)/ + a\xc0\x92bcd + +/a(.)(.)/utf + a\x{240}bcd + +/a(.?)(.)/ + a\xc0\x92bcd + +/a(.?)(.)/utf + a\x{240}bcd + +/a(.??)(.)/ + a\xc0\x92bcd + +/a(.??)(.)/utf + a\x{240}bcd + +/a(.{3})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b +\= Expect no match + a\x{1234}b + ac\ncb + +/a(.{3,})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b + +/a(.{3,}?)b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b + +/a(.{3,5})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + axxxxxbcdefghijb +\= Expect no match + a\x{1234}b + axxxxxxbcdefghijb + +/a(.{3,5}?)b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + axxxxxbcdefghijb +\= Expect no match + a\x{1234}b + axxxxxxbcdefghijb + +/^[a\x{c0}]/utf +\= Expect no match + \x{100} + +/(?<=aXb)cd/utf + aXbcd + +/(?<=a\x{100}b)cd/utf + a\x{100}bcd + +/(?<=a\x{100000}b)cd/utf + a\x{100000}bcd + +/(?:\x{100}){3}b/utf + \x{100}\x{100}\x{100}b +\= Expect no match + \x{100}\x{100}b + +/\x{ab}/utf + \x{ab} + \xc2\xab +\= Expect no match + \x00{ab} + +/(?<=(.))X/utf + WXYZ + \x{256}XYZ +\= Expect no match + XYZ + +/[^a]+/g,utf + bcd + \x{100}aY\x{256}Z + +/^[^a]{2}/utf + \x{100}bc + +/^[^a]{2,}/utf + \x{100}bcAa + +/^[^a]{2,}?/utf + \x{100}bca + +/[^a]+/gi,utf + bcd + \x{100}aY\x{256}Z + +/^[^a]{2}/i,utf + \x{100}bc + +/^[^a]{2,}/i,utf + \x{100}bcAa + +/^[^a]{2,}?/i,utf + \x{100}bca + +/\x{100}{0,0}/utf + abcd + +/\x{100}?/utf + abcd + \x{100}\x{100} + +/\x{100}{0,3}/utf + \x{100}\x{100} + \x{100}\x{100}\x{100}\x{100} + +/\x{100}*/utf + abce + \x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,1}/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,3}/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}+/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}{3}/utf + abcd\x{100}\x{100}\x{100}XX + +/\x{100}{3,5}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + +/\x{100}{3,}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + +/(?<=a\x{100}{2}b)X/utf,aftertext + Xyyya\x{100}\x{100}bXzzz + +/\D*/utf + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\D*/utf + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/\D/utf + 1X2 + 1\x{100}2 + +/>\S/utf + > >X Y + > >\x{100} Y + +/\d/utf + \x{100}3 + +/\s/utf + \x{100} X + +/\D+/utf + 12abcd34 +\= Expect no match + 1234 + +/\D{2,3}/utf + 12abcd34 + 12ab34 +\= Expect no match + 1234 + 12a34 + +/\D{2,3}?/utf + 12abcd34 + 12ab34 +\= Expect no match + 1234 + 12a34 + +/\d+/utf + 12abcd34 + +/\d{2,3}/utf + 12abcd34 + 1234abcd +\= Expect no match + 1.4 + +/\d{2,3}?/utf + 12abcd34 + 1234abcd +\= Expect no match + 1.4 + +/\S+/utf + 12abcd34 +\= Expect no match + \ \ + +/\S{2,3}/utf + 12abcd34 + 1234abcd +\= Expect no match + \ \ + +/\S{2,3}?/utf + 12abcd34 + 1234abcd +\= Expect no match + \ \ + +/>\s+ <34 + +/>\s{2,3} \s{2,3}? \xff< + +/[\xff]/utf + >\x{ff}< + +/[^\xFF]/ + XYZ + +/[^\xff]/utf + XYZ + \x{123} + +/^[ac]*b/utf +\= Expect no match + xb + +/^[ac\x{100}]*b/utf +\= Expect no match + xb + +/^[^x]*b/i,utf +\= Expect no match + xb + +/^[^x]*b/utf +\= Expect no match + xb + +/^\d*b/utf +\= Expect no match + xb + +/(|a)/g,utf + catac + a\x{256}a + +/^\x{85}$/i,utf + \x{85} + +/^ሴ/utf + ሴ + +/^\ሴ/utf + ሴ + +/(?s)(.{1,5})/utf + abcdefg + ab + +/a*\x{100}*\w/utf + a + +/\S\S/g,utf + A\x{a3}BC + +/\S{2}/g,utf + A\x{a3}BC + +/\W\W/g,utf + +\x{a3}== + +/\W{2}/g,utf + +\x{a3}== + +/\S/g,utf + \x{442}\x{435}\x{441}\x{442} + +/[\S]/g,utf + \x{442}\x{435}\x{441}\x{442} + +/\D/g,utf + \x{442}\x{435}\x{441}\x{442} + +/[\D]/g,utf + \x{442}\x{435}\x{441}\x{442} + +/\W/g,utf + \x{2442}\x{2435}\x{2441}\x{2442} + +/[\W]/g,utf + \x{2442}\x{2435}\x{2441}\x{2442} + +/[\S\s]*/utf + abc\n\r\x{442}\x{435}\x{441}\x{442}xyz + +/[\x{41f}\S]/g,utf + \x{442}\x{435}\x{441}\x{442} + +/.[^\S]./g,utf + abc def\x{442}\x{443}xyz\npqr + +/.[^\S\n]./g,utf + abc def\x{442}\x{443}xyz\npqr + +/[[:^alnum:]]/g,utf + +\x{2442} + +/[[:^alpha:]]/g,utf + +\x{2442} + +/[[:^ascii:]]/g,utf + A\x{442} + +/[[:^blank:]]/g,utf + A\x{442} + +/[[:^cntrl:]]/g,utf + A\x{442} + +/[[:^digit:]]/g,utf + A\x{442} + +/[[:^graph:]]/g,utf + \x19\x{e01ff} + +/[[:^lower:]]/g,utf + A\x{422} + +/[[:^print:]]/g,utf + \x{19}\x{e01ff} + +/[[:^punct:]]/g,utf + A\x{442} + +/[[:^space:]]/g,utf + A\x{442} + +/[[:^upper:]]/g,utf + a\x{442} + +/[[:^word:]]/g,utf + +\x{2442} + +/[[:^xdigit:]]/g,utf + M\x{442} + +/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d_^]/utf + +/^[^d]*?$/ + abc + +/^[^d]*?$/utf + abc + +/^[^d]*?$/i + abc + +/^[^d]*?$/i,utf + abc + +/(?i)[\xc3\xa9\xc3\xbd]|[\xc3\xa9\xc3\xbdA]/utf + +/^[a\x{c0}]b/utf + \x{c0}b + +/^([a\x{c0}]*?)aa/utf + a\x{c0}aaaa/ + +/^([a\x{c0}]*?)aa/utf + a\x{c0}aaaa/ + a\x{c0}a\x{c0}aaa/ + +/^([a\x{c0}]*)aa/utf + a\x{c0}aaaa/ + a\x{c0}a\x{c0}aaa/ + +/^([a\x{c0}]*)a\x{c0}/utf + a\x{c0}aaaa/ + a\x{c0}a\x{c0}aaa/ + +/A*/g,utf + AAB\x{123}BAA + +/(abc)\1/i,utf +\= Expect no match + abc + +/(abc)\1/utf +\= Expect no match + abc + +/a(*:a\x{1234}b)/utf,mark + abc + +/a(*:a£b)/utf,mark + abc + +# Noncharacters + +/./utf + \x{fffe} + \x{ffff} + \x{1fffe} + \x{1ffff} + \x{2fffe} + \x{2ffff} + \x{3fffe} + \x{3ffff} + \x{4fffe} + \x{4ffff} + \x{5fffe} + \x{5ffff} + \x{6fffe} + \x{6ffff} + \x{7fffe} + \x{7ffff} + \x{8fffe} + \x{8ffff} + \x{9fffe} + \x{9ffff} + \x{afffe} + \x{affff} + \x{bfffe} + \x{bffff} + \x{cfffe} + \x{cffff} + \x{dfffe} + \x{dffff} + \x{efffe} + \x{effff} + \x{ffffe} + \x{fffff} + \x{10fffe} + \x{10ffff} + \x{fdd0} + \x{fdd1} + \x{fdd2} + \x{fdd3} + \x{fdd4} + \x{fdd5} + \x{fdd6} + \x{fdd7} + \x{fdd8} + \x{fdd9} + \x{fdda} + \x{fddb} + \x{fddc} + \x{fddd} + \x{fdde} + \x{fddf} + \x{fde0} + \x{fde1} + \x{fde2} + \x{fde3} + \x{fde4} + \x{fde5} + \x{fde6} + \x{fde7} + \x{fde8} + \x{fde9} + \x{fdea} + \x{fdeb} + \x{fdec} + \x{fded} + \x{fdee} + \x{fdef} + +/^\d*\w{4}/utf + 1234 +\= Expect no match + 123 + +/^[^b]*\w{4}/utf + aaaa +\= Expect no match + aaa + +/^[^b]*\w{4}/i,utf + aaaa +\= Expect no match + aaa + +/^\x{100}*.{4}/utf + \x{100}\x{100}\x{100}\x{100} +\= Expect no match + \x{100}\x{100}\x{100} + +/^\x{100}*.{4}/i,utf + \x{100}\x{100}\x{100}\x{100} +\= Expect no match + \x{100}\x{100}\x{100} + +/^a+[a\x{200}]/utf + aa + +/^.\B.\B./utf + \x{10123}\x{10124}\x{10125} + +/^#[^\x{ffff}]#[^\x{ffff}]#[^\x{ffff}]#/utf + #\x{10000}#\x{100}#\x{10ffff}# + +# Unicode property support tests + +/^\pC\pL\pM\pN\pP\pS\pZ\s+/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + +/^>\pZ+/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + +/^>[[:space:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + +/^>[[:blank:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + +/^[[:alpha:]]*/utf,ucp + Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d} + +/^[[:alnum:]]*/utf,ucp + Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee} + +/^[[:cntrl:]]*/utf,ucp + \x{0}\x{09}\x{1f}\x{7f}\x{9f} + +/^[[:graph:]]*/utf,ucp + A\x{a1}\x{a0} + +/^[[:print:]]*/utf,ucp + A z\x{a0}\x{a1} + +/^[[:punct:]]*/utf,ucp + .+\x{a1}\x{a0} + +/\p{Zs}*?\R/ +\= Expect no match + a\xFCb + +/\p{Zs}*\R/ +\= Expect no match + a\xFCb + +/â±¥/i,utf + â±¥ + Ⱥx + Ⱥ + +/[â±¥]/i,utf + â±¥ + Ⱥx + Ⱥ + +/Ⱥ/i,utf + Ⱥ + â±¥ + +# These are tests for extended grapheme clusters + +/^\X/utf,aftertext + G\x{34e}\x{34e}X + \x{34e}\x{34e}X + \x04X + \x{1100}X + \x{1100}\x{34e}X + \x{1b04}\x{1b04}X + *These match up to the roman letters + \x{1111}\x{1111}L,L + \x{1111}\x{1111}\x{1169}L,L,V + \x{1111}\x{ae4c}L, LV + \x{1111}\x{ad89}L, LVT + \x{1111}\x{ae4c}\x{1169}L, LV, V + \x{1111}\x{ae4c}\x{1169}\x{1169}L, LV, V, V + \x{1111}\x{ae4c}\x{1169}\x{11fe}L, LV, V, T + \x{1111}\x{ad89}\x{11fe}L, LVT, T + \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T + \x{ad89}\x{11fe}\x{11fe}LVT, T, T + *These match just the first codepoint (invalid sequence) + \x{1111}\x{11fe}L, T + \x{ae4c}\x{1111}LV, L + \x{ae4c}\x{ae4c}LV, LV + \x{ae4c}\x{ad89}LV, LVT + \x{1169}\x{1111}V, L + \x{1169}\x{ae4c}V, LV + \x{1169}\x{ad89}V, LVT + \x{ad89}\x{1111}LVT, L + \x{ad89}\x{1169}LVT, V + \x{ad89}\x{ae4c}LVT, LV + \x{ad89}\x{ad89}LVT, LVT + \x{11fe}\x{1111}T, L + \x{11fe}\x{1169}T, V + \x{11fe}\x{ae4c}T, LV + \x{11fe}\x{ad89}T, LVT + *Test extend and spacing mark + \x{1111}\x{ae4c}\x{0711}L, LV, extend + \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark + \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark + *Test CR, LF, and control + \x0d\x{0711}CR, extend + \x0d\x{1b04}CR, spacingmark + \x0a\x{0711}LF, extend + \x0a\x{1b04}LF, spacingmark + \x0b\x{0711}Control, extend + \x09\x{1b04}Control, spacingmark + *Test Extended Pictographic after bug fix + \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic + \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P + \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P + \x{1f3f3}\x{fe0f}\x{200d}\x{1f308}\x{1f3f4}\x{200d}\x{2620}\x{fe0f}\x{1f3f3}\x{fe0f}\x{200d}\x{1f308}\x{1f3f4}\x{200d}\x{2620}\x{fe0f} + A\x{200d}\x{1f308}B + A\x{200d}B A ZWJ + \x{261D}\x{1F3FB}B Extended_Pictographic Extend + \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator + *There are no Prepend characters, so we can't test Prepend, CR + +/^(?>\X{2})X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}?X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/\X*Z/utf,no_start_optimize +\= Expect no match + A\x{300} + +/\X*(.)/utf,no_start_optimize + A\x{1111}\x{ae4c}\x{1169} + +# -------------------------------------------- + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + +/[z\x{1e9e}]+/i,utf + \x{1e9e}\x{00df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + +/[z\x{00df}]+/i,utf + \x{1e9e}\x{00df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + +/[z\x{1f88}]+/i,utf + \x{1f88}\x{1f80} + +# Check a reference with more than one other case + +/^(\x{00b5})\1{2}$/i,utf + \x{00b5}\x{039c}\x{03bc} + +# Characters with more than one other case; test in classes + +/[z\x{00b5}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/[z\x{039c}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/[z\x{03bc}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/[z\x{00c5}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/[z\x{00e5}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/[z\x{212b}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/[z\x{01c4}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/[z\x{01c5}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/[z\x{01c6}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/[z\x{01c7}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/[z\x{01c8}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/[z\x{01c9}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/[z\x{01ca}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/[z\x{01cb}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/[z\x{01cc}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/[z\x{01f1}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/[z\x{01f2}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/[z\x{01f3}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/[z\x{0345}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/[z\x{0399}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/[z\x{03b9}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/[z\x{1fbe}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/[z\x{0392}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/[z\x{03b2}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/[z\x{03d0}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/[z\x{0395}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/[z\x{03b5}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/[z\x{03f5}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/[z\x{0398}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/[z\x{03b8}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/[z\x{03d1}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/[z\x{03f4}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/[z\x{039a}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/[z\x{03ba}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/[z\x{03f0}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/[z\x{03a0}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/[z\x{03c0}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/[z\x{03d6}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/[z\x{03a1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/[z\x{03c1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/[z\x{03f1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/[z\x{03a3}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/[z\x{03c2}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/[z\x{03c3}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/[z\x{03a6}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/[z\x{03c6}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/[z\x{03d5}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/[z\x{03c9}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/[z\x{03a9}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/[z\x{2126}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/[z\x{1e60}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/[z\x{1e61}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/[z\x{1e9b}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +# Perl 5.12.4 gets these wrong, but 5.15.3 is OK + +/[z\x{004b}]+/i,utf + \x{004b}\x{006b}\x{212a} + +/[z\x{006b}]+/i,utf + \x{004b}\x{006b}\x{212a} + +/[z\x{212a}]+/i,utf + \x{004b}\x{006b}\x{212a} + +/[z\x{0053}]+/i,utf + \x{0053}\x{0073}\x{017f} + +/[z\x{0073}]+/i,utf + \x{0053}\x{0073}\x{017f} + +/[z\x{017f}]+/i,utf + \x{0053}\x{0073}\x{017f} + +# -------------------------------------- + +/(ΣΆΜΟΣ) \1/i,utf + ΣΆΜΟΣ ΣΆΜΟΣ + ΣΆΜΟΣ σάμος + σάμος σάμος + σάμος σάμοσ + σάμος ΣΆΜΟΣ + +/(σάμος) \1/i,utf + ΣΆΜΟΣ ΣΆΜΟΣ + ΣΆΜΟΣ σάμος + σάμος σάμος + σάμος σάμοσ + σάμος ΣΆΜΟΣ + +/(ΣΆΜΟΣ) \1*/i,utf + ΣΆΜΟΣ\x20 + ΣΆΜΟΣ ΣΆΜΟΣσάμοςσάμος + +# Perl matches these + +/\x{00b5}+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/\x{039c}+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/\x{03bc}+/i,utf + \x{00b5}\x{039c}\x{03bc} + + +/\x{00c5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/\x{00e5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/\x{212b}+/i,utf + \x{00c5}\x{00e5}\x{212b} + + +/\x{01c4}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c5}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c6}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + + +/\x{01c7}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c8}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c9}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + + +/\x{01ca}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cb}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cc}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + + +/\x{01f1}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f2}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f3}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + + +/\x{0345}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{0399}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{03b9}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{1fbe}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + + +/\x{0392}+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/\x{03b2}+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/\x{03d0}+/i,utf + \x{0392}\x{03b2}\x{03d0} + + +/\x{0395}+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/\x{03b5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/\x{03f5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + + +/\x{0398}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03b8}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03d1}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03f4}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + + +/\x{039a}+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/\x{03ba}+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/\x{03f0}+/i,utf + \x{039a}\x{03ba}\x{03f0} + + +/\x{03a0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/\x{03c0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/\x{03d6}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + + +/\x{03a1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/\x{03c1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/\x{03f1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + + +/\x{03a3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c2}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + + +/\x{03a6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/\x{03c6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/\x{03d5}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + + +/\x{03c9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/\x{03a9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/\x{2126}+/i,utf + \x{03c9}\x{03a9}\x{2126} + + +/\x{1e60}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e61}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9b}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + +/\x{1f80}+/i,utf + \x{1f88}\x{1f80} + +# Perl 5.12.4 gets these wrong, but 5.15.3 is OK + +/\x{004b}+/i,utf + \x{004b}\x{006b}\x{212a} + +/\x{006b}+/i,utf + \x{004b}\x{006b}\x{212a} + +/\x{212a}+/i,utf + \x{004b}\x{006b}\x{212a} + + +/\x{0053}+/i,utf + \x{0053}\x{0073}\x{017f} + +/\x{0073}+/i,utf + \x{0053}\x{0073}\x{017f} + +/\x{017f}+/i,utf + \x{0053}\x{0073}\x{017f} + +/^\p{Any}*\d{4}/utf + 1234 +\= Expect no match + 123 + +/^\X*\w{4}/utf + 1234 +\= Expect no match + 123 + +/^A\s+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{2005}Z + +/^[[:graph:]]+$/utf,ucp + Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +\= Expect no match + \x{09} + \x{0a} + \x{1D} + \x{20} + \x{85} + \x{a0} + \x{1680} + \x{2028} + \x{2029} + \x{202f} + \x{2065} + \x{3000} + \x{e0002} + \x{e001f} + \x{e0080} + +/^[[:print:]]+$/utf,ucp + Space: \x{a0} + \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} + \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} + \x{202f}\x{205f} + \x{3000} + Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{202f} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +\= Expect no match + \x{09} + \x{1D} + \x{85} + \x{2028} + \x{2029} + \x{2065} + \x{e0002} + \x{e001f} + \x{e0080} + +/^[[:punct:]]+$/utf,ucp + \$+<=>^`|~ + !\"#%&'()*,-./:;?@[\\]_{} + \x{a1}\x{a7} + \x{37e} +\= Expect no match + abcde + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{1680} + \x{2028}\x{2029}\x{202f}\x{2065} + \x{3000}\x{e0002}\x{e001f}\x{e0080} +\= Expect no match + Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} + +/^[[:^print:]]+$/utf,ucp + \x{09}\x{1D}\x{85}\x{2028}\x{2029}\x{2065} + \x{e0002}\x{e001f}\x{e0080} +\= Expect no match + Space: \x{a0} + \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} + \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} + \x{202f}\x{205f} + \x{3000} + Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{202f} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} + +/^[[:^punct:]]+$/utf,ucp + abcde +\= Expect no match + \$+<=>^`|~ + !\"#%&'()*,-./:;?@[\\]_{} + \x{a1}\x{a7} + \x{37e} + +/[RST]+/i,utf,ucp + Ss\x{17f} + +/[R-T]+/i,utf,ucp + Ss\x{17f} + +/[q-u]+/i,utf,ucp + Ss\x{17f} + +/^s?c/im,utf + scat + +# The next four tests are for repeated caseless back references when the +# code unit length of the matched text is different to that of the original +# group in the UTF-8 case. + +/^(\x{23a})\1*(.)/i,utf + \x{23a}\x{23a}\x{23a}\x{23a} + \x{23a}\x{2c65}\x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + +/^(\x{23a})\1*(..)/i,utf + \x{23a}\x{2c65}\x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + +/^(\x{23a})\1*(...)/i,utf + \x{23a}\x{2c65}\x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + +/^(\x{23a})\1*(....)/i,utf +\= Expect no match + \x{23a}\x{2c65}\x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + +/[A-`]/i,utf + abcdefghijklmno + +/[\S\V\H]/utf + +/[^\p{Any}]*+x/utf + x + +/[[:punct:]]/utf,ucp + \x{b4} + +/[[:^ascii:]]/utf,ucp + \x{100} + \x{200} + \x{300} + \x{37e} +\= Expect no match + aa + 99 + +/[[:^ascii:]\w]/utf,ucp + aa + 99 + gg + \x{100} + \x{200} + \x{300} + \x{37e} + +/[\w[:^ascii:]]/utf,ucp + aa + 99 + gg + \x{100} + \x{200} + \x{300} + \x{37e} + +/[^[:ascii:]\W]/utf,ucp + \x{100} + \x{200} +\= Expect no match + aa + 99 + gg + \x{37e} + +/[^[:^ascii:]\d]/utf,ucp + a + ~ + \a + \x{7f} +\= Expect no match + 0 + \x{389} + \x{20ac} + +/(?=.*b)\pL/ + 11bb + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb + +/^\x{123}+?$/utf,no_auto_possess + \x{123}\x{123}\x{123} + +/^\x{123}+?$/i,utf,no_auto_possess + \x{123}\x{122}\x{123} +\= Expect no match + \x{123}\x{124}\x{123} + +/\N{U+1234}/utf + \x{1234} + +/[\N{U+1234}]/utf + \x{1234} + +# Test the full list of Unicode "Pattern White Space" characters that are to +# be ignored by /x. The pattern lines below may show up oddly in text editors +# or when listed to the screen. Note that characters such as U+2002, which are +# matched as space by \h and \v are *not* "Pattern White Space". + +/A…‎‏

B/x,utf + AB + +/A B/x,utf + A\x{2002}B +\= Expect no match + AB + +# ------- + +/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf + \x{99}\x{99}\x{99} + +/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf + \x{99}\x{99}\x{99} + +/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf + \x{99}\x{99}\x{99} + +# Script run tests + +/^(*script_run:.{4})/utf + abcd Latin x4 + \x{2e80}\x{2fa1d}\x{3041}\x{30a1} Han Han Hiragana Katakana + \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han + \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han + \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul + \x{2e80}\x{3105}\x{2e80}\x{3105} Han Bopomofo Han Bopomofo + \x{02ea}\x{2e80}\x{2e80}\x{3105} Bopomofo-Sk Han Han Bopomofo + \x{3105}\x{2e80}\x{2e80}\x{3105} Bopomofo Han Han Bopomofo + \x{0300}cd! Inherited Latin Latin Common + \x{0391}12\x{03a9} Greek Common-digits Greek + \x{0400}12\x{fe2f} Cyrillic Common-digits Cyrillic + \x{0531}12\x{fb17} Armenian Common-digits Armenian + \x{0591}12\x{fb4f} Hebrew Common-digits Hebrew + \x{0600}12\x{1eef1} Arabic Common-digits Arabic + \x{0600}\x{0660}\x{0669}\x{1eef1} Arabic Arabic-digits Arabic + \x{0700}12\x{086a} Syriac Common-digits Syriac + \x{1200}12\x{ab2e} Ethiopic Common-digits Ethiopic + \x{1680}12\x{169c} Ogham Common-digits Ogham + \x{3041}12\x{3041} Hiragana Common-digits Hiragana + \x{0980}\x{09e6}\x{09e7}\x{0993} Bengali Bengali-digits Bengali + !cde Common Latin Latin Latin + A..B Latin Common Common Latin + 0abc Ascii-digit Latin Latin Latin + 1\x{0700}\x{0700}\x{0700} Ascii-digit Syriac x 3 + \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters +\= Expect no match + a\x{370}bcd Latin Greek Latin Latin + \x{1100}\x{02ea}\x{02ea}\x{02ea} Hangul Bopomofo x3 + \x{02ea}\x{02ea}\x{02ea}\x{1100} Bopomofo x3 Hangul + \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul + \x{0391}\x{09e6}\x{09e7}\x{03a9} Greek Bengali digits Greek + \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic + \x{0600}\x{0669}7\x{1eef1} Arabic Arabic-digit ascii-digit Arabic + A5\x{ff19}B Latin Common-ascii/notascii-digits Latin + \x{0300}cd\x{0391} Inherited Latin Latin Greek + !cd\x{0391} Common Latin Latin Greek + \x{1A80}\x{1A90}\x{1a40}\x{1a41} Tai Tham Hora digit, Tham digit, letters + A\x{1d7ce}\x{1d7ff}B Common fancy-common-2-sets-digits Common + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + +/^(*sr:.{4}|..)/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + +/^(*atomic_script_run:.{4}|..)/utf +\= Expect no match + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + +/^(*asr:.*)/utf +\= Expect no match + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + +/^(?>(*sr:.*))/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + +/^(*sr:.*)/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + \x{10fffd}\x{10fffd}\x{10fffd} Private use (Unknown) + +/^(*sr:\x{2e80}*)/utf + \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo + +/^(*sr:\x{2e80}*)\x{2e80}/utf + \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo + +/^(*sr:.*)Test/utf + Test script run on an empty string + +/^(*sr:(.{2})){2}/utf + \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic + \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters + \x{1A80}\x{1a40}\x{1A90}\x{1a41} Tai Tham Hora digit, letter, Tham digit, letter +\= Expect no match + \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul + +/^(*sr:\S*)/utf + \x{1cf4}\x{20f0}\x{900}\x{11305} [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Gran + \x{1cf4}\x{20f0}\x{11305}\x{900} [Dev,Gran,Kan] [Dev,Gran,Lat] Gran Dev + \x{1cf4}\x{20f0}\x{900}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Lat + \x{1cf4}\x{20f0}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Lat + \x{20f0}ABC [Dev,Gran,Lat] Lat + XYZ\x{20f0}ABC Lat [Dev,Gran,Lat] Lat + \x{a36}\x{a33}\x{900} [Dev,...] [Dev,...] Dev + \x{3001}\x{2e80}\x{3041}\x{30a1} [Bopo, Han, etc] Han Hira Kata + \x{3001}\x{30a1}\x{2e80}\x{3041} [Bopo, Han, etc] Kata Han Hira + \x{3001}\x{3105}\x{2e80}\x{1101} [Bopo, Han, etc] Bopomofo Han Hangul + \x{3105}\x{3001}\x{2e80}\x{1101} Bopomofo [Bopo, Han, etc] Han Hangul + \x{3031}\x{3041}\x{30a1}\x{2e80} [Hira Kata] Hira Kata Han + \x{060c}\x{06d4}\x{0600}\x{10d00}\x{0700} [Arab Rohg Syrc Thaa] [Arab Rohg] Arab Rohg Syrc + \x{060c}\x{06d4}\x{0700}\x{0600}\x{10d00} [Arab Rohg Syrc Thaa] [Arab Rohg] Syrc Arab Rohg + \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80} Han Hira [Bopo, Han, etc] [Hira Kata] Han + +/(?\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidic}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi_control}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidi_c}]/utf + -->\x{202c}<-- + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + +/\p{bc = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{b_c = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + +/\p{bidi class:L}+/utf + -->ABC<-- + +/\P{bidi class:L}+/utf + -->ABC<-- + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + +# ----------------------------------------------------------------------------- + +/[\p{taml}\p{sc:ugar}]+/utf + \x{0b82}\x{10380} + +/^[\p{sc:Arabic}]/utf +\= Expect no match + \x{650} + \x{651} + \x{652} + \x{653} + \x{654} + \x{655} + +# ----------------------------------------------------------------------------- +# Tests for newly-added Boolean Properties + +/\p{ahex}\p{asciihexdigit}/utf + >4F< + +/\p{alpha}\p{alphabetic}/g,utf + >AB<>\x{148}\x{1234} + +/\p{ascii}\p{ascii}/g,utf + >AB<>\x{148}\x{1234} + +/\p{Bidi_C}\p{bidicontrol}/g,utf + >\x{202d}\x{2069}< + +/\p{Bidi_M}\p{bidimirrored}/g,utf + >\x{202d}\x{2069}<>\x{298b}\x{bb}< + +/\p{cased}\p{cased}/g,utf + >AN<>\x{149}\x{120}< + +/\p{caseignorable}\p{ci}/g,utf + >AN<>\x{60}\x{859}< + +/\p{changeswhencasefolded}\p{cwcf}/g,utf + >AN<>\x{149}\x{120}< + +/\p{changeswhencasemapped}\p{cwcm}/g,utf + >AN<>\x{149}\x{120}< + +/\p{changeswhenlowercased}\p{cwl}/g,utf + >AN<>\x{149}\x{120}<>yz< + +/\p{changeswhenuppercased}\p{cwu}/g,utf + >AN<>\x{149}\x{120}<>yz< + +/\p{changeswhentitlecased}\p{cwt}/g,utf + >AN<>\x{149}\x{120}<>yz< + +/\p{dash}\p{dash}/g,utf + >\x{2d}\x{1400}<>yz< + +/\p{defaultignorablecodepoint}\p{di}/g,utf + >AN<>\x{ad}\x{e0fff}<>yz< + +/\p{deprecated}\p{dep}/g,utf + >AN<>\x{149}\x{e0001}<>yz< + +/\p{diacritic}\p{dia}/g,utf + >AN<>\x{f84}\x{5e}<>yz< + +/\p{emojicomponent}\p{ecomp}/g,utf + >AN<>\x{200d}\x{e007f}<>yz< + +/\p{emojimodifier}\p{emod}/g,utf + >AN<>\x{1f3fb}\x{1f3ff}<>yz< + +/\p{emojipresentation}\p{epres}/g,utf + >AN<>\x{2653}\x{1f6d2}<>yz< + +/\p{extender}\p{ext}/g,utf + >AN<>\x{1e944}\x{b7}<>yz< + +/\p{extendedpictographic}\p{extpict}/g,utf + >AN<>\x{26cf}\x{ae}<>yz< + +/\p{graphemebase}\p{grbase}/g,utf + >AN<>\x{10f}\x{60}<>yz< + +/\p{graphemeextend}\p{grext}/g,utf + >AN<>\x{300}\x{b44}<>yz< + +/\p{hexdigit}\p{hex}/g,utf + >AF23<>\x{ff46}\x{ff10}<>yz< + +/\p{idcontinue}\p{idc}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + +/\p{ideographic}\p{ideo}/g,utf + >AF23<>\x{30000}\x{3006}<>yz< + +/\p{idstart}\p{ids}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + +/\p{idsbinaryoperator}\p{idsb}/g,utf + >AF23<>\x{2ff0}\x{2ffb}<>yz<\x{2ff2}\x{2ff1} + +/\p{idstrinaryoperator}\p{idst}/g,utf + >AF23<>\x{2ff2}\x{2ff3}<>yz< + +/\p{Join Control}\p{joinc}/g,utf + >AF23<>\x{200c}\x{200d}<>yz< + +/\p{logical_order_exception}\p{loe}/g,utf + >AF23<>\x{e40}\x{aabc}<>yz< + +/\p{Lowercase}\p{lower}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + +/\p{math}\p{math}/g,utf + >AF23<>\x{2215}\x{2b}<>yz< + +/\p{Non Character Code Point}\p{nchar}/g,utf + >AF23<>\x{10ffff}\x{fdd0}<>yz< + +/\p{patternsyntax}\p{patsyn}/g,utf + >AF23<>\x{21cd}\x{21}<>yz< + +/\p{patternwhitespace}\p{patws}/g,utf + >AF23<>\x{2029}\x{85}<>yz< + +/\p{prependedconcatenationmark}\p{pcm}/g,utf + >AF23<>\x{600}\x{110cd}<>yz< + +/\p{quotationmark}\p{qmark}/g,utf + >AF23<>\x{ff63}\x{22}<>yz< + +/\p{radical}\p{radical}/g,utf + >AF23<>\x{2fd5}\x{2e80}<>yz< + +/\p{regionalindicator}\p{ri}/g,utf + >AF23<>\x{1f1e6}\x{1f1ff}<>yz< + +/=\p{whitespace}\p{space}\p{wspace}=/g,utf + >AF23<=\x{d}\x{1680}\x{3000}=>yz< + +/\p{sentenceterminal}\p{sterm}/g,utf + >AF23<>\x{1da88}\x{2e}<>yz< + +/\p{terminalpunctuation}\p{term}/g,utf + >AF23<>\x{1da88}\x{2e}<>yz< + +/\p{unified ideograph}\p{uideo}/g,utf + >AF23<>\x{30000}\x{3400}<>yz< + +/\p{UPPERcase}\p{upper}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + +/\p{variationselector}\p{vs}/g,utf + >AF23<>\x{180b}\x{e01ef}<>yz< + +/\p{xidcontinue}\p{xidc}/g,utf + >AF23<>\x{146}\x{30}<>yz< + +# ----------------------------------------------------------------------------- +# Variable-length lookbehinds. + +/(?<=áb?c).../g,utf + ábcdèfgácxyz + +/(?<=PQR|áb?c).../g,utf + ábcdèfgácxyzPQR123 + +/(?<=áb?c|PQR).../g,utf + ábcdèfgácxyzPQR123 + +/(?<=PQ|áb?c).../g,utf + ábcdèfgácxyzPQR123 + +/(?<=áb?c|PQ).../g,utf + ábcdèfgácxyzPQR123 + +/(?<=á(b?c|d?è?è)f)X./g,utf + ácfX1zzzáèfX2zzzádèèfX3zzzX4zzz + +/(?[[:blank:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + +/^A\s+Z/utf,ucp + A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{2005}Z + +/^[[:graph:]]+$/utf,ucp +\= Expect no match + \x{180e} + +/^[[:print:]]+$/utf,ucp + \x{180e} + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} + +/^[[:^print:]]+$/utf,ucp +\= Expect no match + \x{180e} + +# End of U+180E tests. + +# --------------------------------------------------------------------- + +/\x{110000}/IB,utf + +/\o{4200000}/IB,utf + +/\x{ffffffff}/utf + +/\o{37777777777}/utf + +/\x{100000000}/utf + +/\o{77777777777}/utf + +/\x{d800}/utf + +/\o{154000}/utf + +/\x{dfff}/utf + +/\o{157777}/utf + +/\x{d7ff}/utf + +/\o{153777}/utf + +/\x{e000}/utf + +/\o{170000}/utf + +/^\x{100}a\x{1234}/utf + \x{100}a\x{1234}bcd + +/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf + \x{0041}\x{2262}\x{0391}\x{002e} + +/.{3,5}X/IB,utf + \x{212ab}\x{212ab}\x{212ab}\x{861}X + +/.{3,5}?/IB,utf + \x{212ab}\x{212ab}\x{212ab}\x{861} + +/^[ab]/IB,utf + bar +\= Expect no match + c + \x{ff} + \x{100} + +/\x{100}*(\d+|"(?1)")/utf + 1234 + "1234" + \x{100}1234 + "\x{100}1234" + \x{100}\x{100}12ab + \x{100}\x{100}"12" +\= Expect no match + \x{100}\x{100}abcd + +/\x{100}*/IB,utf + +/a\x{100}*/IB,utf + +/ab\x{100}*/IB,utf + +/[\x{200}-\x{100}]/utf + +/[Ā-Ą]/utf + \x{100} + \x{104} +\= Expect no match + \x{105} + \x{ff} + +/[\xFF]/IB + >\xff< + +/[^\xFF]/IB + +/[Ä-Ü]/utf + Ö # Matches without Study + \x{d6} + +/[Ä-Ü]/utf + Ö <-- Same with Study + \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö # Matches without Study + \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö <-- Same with Study + \x{d6} + +/[^\x{100}]abc(xyz(?1))/IB,utf + +/(\x{100}(b(?2)c))?/IB,utf + +/(\x{100}(b(?2)c)){0,2}/IB,utf + +/(\x{100}(b(?1)c))?/IB,utf + +/(\x{100}(b(?1)c)){0,2}/IB,utf + +/\W/utf + A.B + A\x{100}B + +/\w/utf + \x{100}X + +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize + +/()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + A (x) (?41) B/x,utf + AxxB + +/^[\x{100}\E-\Q\E\x{150}]/B,utf + +/^[\QĀ\E-\QŐ\E]/B,utf + +/^abc./gmx,newline=any,utf + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + +/abc.$/gmx,newline=any,utf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + +/^a\Rb/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x{85}b + a\x{2028}b + a\x{2029}b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode,utf + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb + +/^a\R+b/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode,utf + a\nb + a\n\rb + a\n\r\x{85}b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/\H\h\V\v/utf + X X\x0a + X\x09X\x0b +\= Expect no match + \x{a0} X\x0a + +/\H*\h+\V?\v{3,4}/utf + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c +\= Expect no match + \x09\x20\x{a0}\x0a\x0b + +/\H\h\V\v/utf + \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} +\= Expect no match + \x{2009} X\x0a + +/\H*\h+\V?\v{3,4}/utf + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + \x09\x20\x{202f}\x0a\x0b\x0c +\= Expect no match + \x09\x{200a}\x{a0}\x{2028}\x0b + +/[\h]/B,utf + >\x{1680} + +/[\h]{3,}/B,utf + >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< + +/[\v]/B,utf + +/[\H]/B,utf + +/[\V]/B,utf + +/.*$/newline=any,utf + \x{1ec5} + +/a\Rb/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\Rb/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/.*a.*=.b.*/utf,newline=any + QQQ\x{2029}ABCaXYZ=!bPQR +\= Expect no match + a\x{2029}b + \x61\xe2\x80\xa9\x62 + +/[[:a\x{100}b:]]/utf + +/a[^]b/utf,allow_empty_class,match_unset_backref + a\x{1234}b + a\nb +\= Expect no match + ab + +/a[^]+b/utf,allow_empty_class,match_unset_backref + aXb + a\nX\nX\x{1234}b +\= Expect no match + ab + +/(\x{de})\1/ + \x{de}\x{de} + +/X/newline=any,utf,firstline + A\x{1ec5}ABCXYZ + +/Xa{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\x{123}{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\x{123}{2,4}?b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\x{123}{2,4}+b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\d{2,4}b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}?b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}+b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\D{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\D{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\D{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc]{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc\x{123}]{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc\x{123}]{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc\x{123}]{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}?b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}+b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/(Y)X\1{2,4}b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}?b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}+b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(\x{123})X\1{2,4}b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/(\x{123})X\1{2,4}?b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/(\x{123})X\1{2,4}+b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/\bthe cat\b/utf + the cat\=ps + the cat\=ph + +/abcd*/utf + xxxxabcd\=ps + xxxxabcd\=ph + +/abcd*/i,utf + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/utf + xxxxabc1\=ps + xxxxabc1\=ph + +/(a)bc\1*/utf + xxxxabca\=ps + xxxxabca\=ph + +/abc[de]*/utf + xxxxabcde\=ps + xxxxabcde\=ph + +/X\W{3}X/utf + X\=ps + +/\sxxx\s/utf,tables=2 + AB\x{85}xxx\x{a0}XYZ + AB\x{a0}xxx\x{85}XYZ + +/\S \S/utf,tables=2 + \x{a2} \x{84} + +'A#хц'Bx,newline=any,utf + +'A#хц + PQ'Bx,newline=any,utf + +/a+#хaa + z#XX?/Bx,newline=any,utf + +/a+#хaa + z#х?/Bx,newline=any,utf + +/\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf + +/\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf + +/^\cÄ£/utf + +/(\R*)(.)/s,utf + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/(\R)*(.)/s,utf + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/[^\x{1234}]+/Ii,utf + +/[^\x{1234}]+?/Ii,utf + +/[^\x{1234}]++/Ii,utf + +/[^\x{1234}]{2}/Ii,utf + +/f.*/ + for\=ph + +/f.*/s + for\=ph + +/f.*/utf + for\=ph + +/f.*/s,utf + for\=ph + +/\x{d7ff}\x{e000}/utf + +/\x{d800}/utf + +/\x{dfff}/utf + +/\h+/utf + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + +/[\h\x{e000}]+/B,utf + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + +/\H+/utf + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + +/[\H\x{d7ff}]+/B,utf + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + +/\v+/utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/[\v\x{e000}]+/B,utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/\V+/utf + \x{2028}\x{2029}\x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + +/[\V\x{d7ff}]+/B,utf + \x{2028}\x{2029}\x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + +/\R+/bsr=unicode,utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/(..)\1/utf + ab\=ps + aba\=ps + abab\=ps + +/(..)\1/i,utf + ab\=ps + abA\=ps + aBAb\=ps + +/(..)\1{2,}/utf + ab\=ps + aba\=ps + abab\=ps + ababa\=ps + ababab\=ps + ababab\=ph + abababa\=ps + abababa\=ph + +/(..)\1{2,}/i,utf + ab\=ps + aBa\=ps + aBAb\=ps + AbaBA\=ps + abABAb\=ps + aBAbaB\=ph + abABabA\=ps + abaBABa\=ph + +/(..)\1{2,}?x/i,utf + ab\=ps + abA\=ps + aBAb\=ps + abaBA\=ps + abAbaB\=ps + abaBabA\=ps + abAbABaBx\=ps + +/./utf,newline=crlf + \r\=ps + \r\=ph + +/.{2,3}/utf,newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/utf,newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf + +/(?<=\x{1234}\x{1234})\bxy/I,utf + +/(?\p{Xsp}/utf + >\x{1680}\x{2028}\x{0b} + >\x{a0} +\= Expect no match + \x{0b} + +/^>\p{Xsp}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}+?/utf + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/utf + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/utf + >\x{1680}\x{2028}\x{0b} + >\x{a0} +\= Expect no match + \x{0b} + +/^>\p{Xps}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/utf + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/utf + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/utf + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^\p{Xwd}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/utf + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/utf + A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/utf + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xwd}]/utf + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^[\p{Xwd}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +# A check not in UTF-8 mode + +/^[\p{Xwd}]+/ + ABCD1234_ + +# Some negative checks + +/^[\P{Xwd}]+/utf + !.+\x{019}\x{482}AB + +/^[\p{^Xwd}]+/utf + !.+\x{019}\x{589}AB + +/[\D]/B,utf,ucp + 1\x{3c8}2 + +/[\d]/B,utf,ucp + >\x{6f4}< + +/[\S]/B,utf,ucp + \x{1680}\x{6f4}\x{1680} + +/[\s]/B,utf,ucp + >\x{1680}< + +/[\W]/B,utf,ucp + A\x{1735}B + +/[\w]/B,utf,ucp + >\x{1723}< + +/\D/B,utf,ucp + 1\x{3c8}2 + +/\d/B,utf,ucp + >\x{6f4}< + +/\S/B,utf,ucp + \x{1680}\x{6f4}\x{1680} + +/\s/B,utf,ucp + >\x{1680}> + +/\W/B,utf,ucp + A\x{1735}B + +/\w/B,utf,ucp + >\x{1723}< + +/[[:alpha:]]/B,ucp + +/[[:lower:]]/B,ucp + +/[[:upper:]]/B,ucp + +/[[:alnum:]]/B,ucp + +/[[:ascii:]]/B,ucp + +/[[:cntrl:]]/B,ucp + +/[[:digit:]]/B,ucp + +/[[:digit:]]/B,ucp,ascii_digit + +/[[:graph:]]/B,ucp + +/[[:print:]]/B,ucp + +/[[:punct:]]/B,ucp + +/[[:space:]]/B,ucp + +/[[:word:]]/B,ucp + +/[[:xdigit:]]/B,ucp + +/[[:xdigit:]]/B,ucp,ascii_digit + +# Unicode properties for \b and \B + +/\b...\B/utf,ucp + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Without PCRE_UCP, non-ASCII always fail, even if < 256 + +/\b...\B/utf + abc_ +\= Expect no match + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties + +/\b...\B/ucp + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Some of these are silly, but they check various combinations + +/[[:^alpha:][:^cntrl:]]+/B,utf,ucp + 123 + abc + +/[[:^cntrl:][:^alpha:]]+/B,utf,ucp + 123 + abc + +/[[:alpha:]]+/B,utf,ucp + abc + +/[[:^alpha:]\S]+/B,utf,ucp + 123 + abc + +/[^\d]+/B,utf,ucp + abc123 + abc\x{123} + \x{660}abc + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B + +/\p{Han}+X\p{Greek}+\x{370}/B,utf + +/\p{Xan}+!\p{Xan}+A/B + +/\p{Xsp}+!\p{Xsp}\t/B + +/\p{Xps}+!\p{Xps}\t/B + +/\p{Xwd}+!\p{Xwd}_/B + +/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp + +# These behaved oddly in Perl, so they are kept in this test + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/i,utf + ȺȺȺⱥⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/i,utf + ȺȺȺⱥⱥⱥ + +/(\x{2c65}\x{2c65})\1/i,utf + \x{2c65}\x{2c65}\x{23a}\x{23a} + +/(ⱥⱥ)\1/i,utf + ⱥⱥȺȺ + +/(\x{23a}\x{23a}\x{23a})\1Y/i,utf + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + +/(\x{2c65}\x{2c65})\1Y/i,utf + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE + +/^[\p{Batak}]/utf + \x{1bc0} + \x{1bff} +\= Expect no match + \x{1bf4} + +/^[\p{Brahmi}]/utf + \x{11000} + \x{1106f} +\= Expect no match + \x{1104e} + +/^[\p{Mandaic}]/utf + \x{840} + \x{85e} +\= Expect no match + \x{85c} + \x{85d} + +/(\X*)(.)/s,utf + A\x{300} + +/^S(\X*)e(\X*)$/utf + Stéréo + +/^\X/utf + ́réo + +/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aX41z +\= Expect no match + aAz + +/\X/ + a\=ps + a\=ph + +/\Xa/ + aa\=ps + aa\=ph + +/\X{2}/ + aa\=ps + aa\=ph + +/\X+a/ + a\=ps + aa\=ps + aa\=ph + +/\X+?a/ + a\=ps + ab\=ps + aa\=ps + aa\=ph + aba\=ps + +# These Unicode 6.1.0 scripts are not known to Perl. + +/\p{Chakma}\d/utf,ucp + \x{11100}\x{1113c} + +/\p{Takri}\d/utf,ucp + \x{11680}\x{116c0} + +/^\X/utf + A\=ps + A\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{301}\=ps + A\x{301}\=ph + +/^\X{2,3}/utf + A\=ps + A\=ph + AA\=ps + AA\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X{2}/utf + AA\=ps + AA\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X+/utf + AA\=ps + AA\=ph + +/^\X+?Z/utf + AA\=ps + AA\=ph + +/A\x{3a3}B/IBi,utf + +/[\x{3a3}]/Bi,utf + +/[^\x{3a3}]/Bi,utf + +/[\x{3a3}]+/Bi,utf + +/[^\x{3a3}]+/Bi,utf + +/a*\x{3a3}/Bi,utf + +/\x{3a3}+a/Bi,utf + +/\x{3a3}*\x{3c2}/Bi,utf + +/\x{3a3}{3}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}?/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}+./i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}++./i,utf,aftertext +\= Expect no match + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}*\x{3c2}/Bi,utf + +/[^\x{3a3}]*\x{3c2}/Bi,utf + +/[^a]*\x{3c2}/Bi,utf + +/ist/Bi,utf +\= Expect no match + ikt + +/is+t/i,utf + iSs\x{17f}t +\= Expect no match + ikt + +/is+?t/i,utf +\= Expect no match + ikt + +/is?t/i,utf +\= Expect no match + ikt + +/is{2}t/i,utf +\= Expect no match + iskt + +# This property is a PCRE special + +/^\p{Xuc}/utf + $abc + @abc + `abc + \x{1234}abc +\= Expect no match + abc + +/^\p{Xuc}+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?\*/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}++/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\P{Xuc}/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +/^[\P{Xuc}]/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +# Some auto-possessification tests + +/\pN+\z/B + +/\PN+\z/B + +/\pN+/B + +/\PN+/B + +/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp + +/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp + +/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp + +/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp + +/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp + +/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp + +/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp + +/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp + +/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp + +/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp + +/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp + +/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp + +/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp + +/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp + +/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp + +/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp + +/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp + +/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp + +/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp + +/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp + +# End auto-possessification tests + +/\w+/B,utf,ucp,auto_callout + abcd + +/[\p{N}]?+/B,no_auto_possess + +/[\p{L}ab]{2,3}+/B,no_auto_possess + +/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx + +/.+\X/Bsx + +/\X+$/Bmx + +/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp + +/[RST]+/Bi,utf,ucp + +/[R-T]+/Bi,utf,ucp + +/[Q-U]+/Bi,utf,ucp + +/^s?c/Iim,utf + scat + +/\X?abc/utf,no_start_optimize + \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + +/\x{100}\x{200}\K\x{300}/utf,startchar + \x{100}\x{200}\x{300} + +# Test UTF characters in a substitution + +/ábc/utf,replace=XሴZ + 123ábc123 + +/(?<=abc)(|def)/g,utf,replace=<$0> + 123abcáyzabcdef789abcሴqr + +/[A-`]/iB,utf + abcdefghijklmno + +/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + +/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + +"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?())(?))(?\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" + +/$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ + +"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" + +/[\pS#moq]/ + = + +/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark + cxxxz + +/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended + abcd + +/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended + a\x{e0}\x{101}\x{c0}\x{102} + +/((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> + ab12cde + +/(*UCP)(*UTF)[[:>:]]X/B + +/abc/utf,replace=xyz + abc\=zero_terminate + +/a[[:punct:]b]/ucp,bincode + +/a[[:punct:]b]/utf,ucp,bincode + +/a[b[:punct:]]/utf,ucp,bincode + +/[[:^ascii:]]/utf,ucp,bincode + +/[[:^ascii:]\w]/utf,ucp,bincode + +/[\w[:^ascii:]]/utf,ucp,bincode + +/[^[:ascii:]\W]/utf,ucp,bincode + \x{de} + \x{200} +\= Expect no match + \x{589} + \x{37e} + +/[[:^ascii:]a]/utf,ucp,bincode + +/L(?#(|++AN<>\x{261d}\x{1faf6}<>yz< + +/\p{graphemelink}\p{grlink}/g,utf + >AN<>\x{11d97}\x{94d}<>yz< + +/\p{soft dotted}\p{sd}/g,utf + >AF23<>\x{1df1a}\x{69}<>yz< + +# ------------------------------------------------ + +/\p{\2b[:xäigi:t:_/ + +# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without +# the restriction. + +/AskZ/i,utf,caseless_restrict + AskZ + aSKz +\= Expect no match + A\x{17f}kZ + As\x{212a}Z + +/AskZ/i,utf + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/A\x{17f}\x{212a}Z/ir,utf + \= Expect no match + AskZ + +/A\x{17f}\x{212a}Z/i,utf + AskZ + +/[AskZ]+/i,utf,caseless_restrict + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/[AskZ]+/i,utf + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/[\x{17f}\x{212a}]+/ir,utf +\= Expect no match + AskZ + +/[\x{17f}\x{212a}]+/i,utf + AskZ + +/[^s]+/ir,utf + A\x{17f}Z + +/[^s]+/i,utf + A\x{17f}Z + +/[^k]+/ir,utf + A\x{212a}Z + +/[^k]+/i,utf + A\x{212a}Z + +/[^sk]+/ir,utf + A\x{17f}\x{212a}Z + +/[^sk]+/i,utf + A\x{17f}\x{212a}Z + +/[^\x{17f}]+/ir,utf + AsSZ + +/[^\x{17f}]+/i,utf + AsSZ + +/[Ss]+/irB,utf + Sss\x{17f}ss + +/[Ss]+/iB,utf + Sss\x{17f}ss + +/[S\x{17f}]/irB,utf + +/[S\x{17f}]/iB,utf + +/[\x{17f}s]/irB,utf + +/[\x{17f}s]/iB,utf + +/[\x{4b}\x{6b}]/irB,utf + +/[\x{4b}\x{6b}]/iB,utf + +/s(?r)s(?-r)s(?r:s)s/i,utf + \x{17f}S\x{17f}S\x{17f} +\= Expect no match + \x{17f}\x{17f}\x{17f}S\x{17f} + \x{17f}S\x{17f}\x{17f}\x{17f} + +/k(?^i)k/ir,utf + K\x{212a} +\= Expect no match + \x{212a}\x{212a} + +# End caseless restrict tests + +# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. + +# DIGITS + +/\d+/i,utf + 123\x{660}456 + +/\d+/i,utf,ucp + 123\x{660}456 + +/\d+/i,utf,ucp,ascii_bsd + 123\x{660}456 + +/[\d]+/i,utf + 123\x{660}456 + +/[\d]+/i,utf,ucp + 123\x{660}456 + +/[\d]+/i,utf,ucp,ascii_bsd + 123\x{660}456 + +/\d(?aD)\d(?-aD)\d/utf,ucp + \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 9\x{660}9 + +/\d(?a)\d(?-a)\d/utf,ucp + \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 9\x{660}9 + +# SPACES + +/>\s+ < +\= Expect no match + >\x{a0} < + +/>\s+ < + >\x{a0} < + +/>\s+ < +\= Expect no match + >\x{a0} < + +/>[\s]+ < +\= Expect no match + >\x{a0} < + +/>[\s]+ < + >\x{a0} < + +/>[\s]+ < +\= Expect no match + >\x{a0} < + +/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< + +/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< + +# WORDS + +/\w+/i,utf + 123\x{660}abc + +/\w+/i,utf,ucp + 123\x{660}abc + +/\w+/i,utf,ucp,ascii_bsw + 123\x{660}abc + +/[\w]+/i,utf + 123\x{660}abc + +/[\w]+/i,utf,ucp + 123\x{660}abc + +/[\w]+/i,utf,ucp,ascii_bsw + 123\x{660}abc + +/\w(?aW)\w(?-aW)\w/utf,ucp + \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} + +/\w(?a)\w(?-a)\w/utf,ucp + \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} + +# WORD BOUNDARY + +/\bABC\b/utf + \x{c0}ABC\x{d0} + +/\bABC\b/utf,ucp +\= Expect no match + \x{c0}ABC\x{d0} + +/\bABC\b/utf,ucp,ascii_bsw + \x{c0}ABC\x{d0} + +/\bABC\b/utf,ucp,ascii_all + \x{c0}ABC\x{d0} + +# POSIX + +/^[[:digit:]]+$/utf,ucp + 123456 + 123\x{660}456 + +/^[[:digit:]]+$/utf,ucp,ascii_digit + 123456 +\= Expect no match + 123\x{660}456 + +/[[:digit:]]+/g,utf,ucp,ascii_digit + 123\x{660}456 + +/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit + 11 +\= Expect no match + \x{ff11}1 + 1\x{ff11} + +/[[:digit:]]+/utf,ucp,ascii_posix + 123\x{660}456 + +/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/^[[:xdigit:]]+$/utf,ucp + f0 + 1A + d\x{ff10} + \x{ff26}8 +\= Expect no match + 8g\=no_jit + +/^[[:xdigit:]]+$/utf,ucp,ascii_digit + f0 + 1A +\= Expect no match + d\x{ff10} + \x{ff26}8 + 8g + +/>[[:space:]]+\x{a0} \x{a0}< + >\x{a0}\x{a0}\x{a0}< + +/>[[:space:]]+\x{a0} \x{a0}< + +/(?aP)[[:alnum:]]+/i,ucp,utf + abcáxyz + abc\x{660}xyz + +/(?aP)[[:alnum:]\d]+/i,ucp,utf + abc\x{660}xyz + +/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ + \x{660}A\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +# VARIOUS + +/[\d\s\w]+/a,ucp,utf + 9 A\x{660}À + 9 AÀ\x{660} + +# End PCRE2_EXTRA_ASCII_xxx tests + +/(?abc>([^()]|\((?1)*\))*abc>123abc>1(2)3abc>(1(2)3)a*)\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9876 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/x + <> + + hij> + hij> + def> + +\= Expect no match + >>aaabxyzpqrrrabbxyyyypqAzz + >aaaabxyzpqrrrabbxyyyypqAzz + >>>>abcxyzpqrrrabbxyyyypqAzz +\= Expect no match + abxyzpqrrabbxyyyypqAzz + abxyzpqrrrrabbxyyyypqAzz + abxyzpqrrrabxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyypqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqqAzz + +/^(abc){1,2}zz/ + abczz + abcabczz +\= Expect no match + zz + abcabcabczz + >>abczz + +/^(b+?|a){1,2}?c/ + bc + bbc + bbbc + bac + bbac + aac + abbbbbbbbbbbc + bbbbbbbbbbbac +\= Expect no match + aaac + abbbbbbbbbbbac + +/^(b+|a){1,2}c/ + bc + bbc + bbbc + bac + bbac + aac + abbbbbbbbbbbc + bbbbbbbbbbbac +\= Expect no match + aaac + abbbbbbbbbbbac + +/^(b+|a){1,2}?bc/ + bbc + +/^(b*|ba){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/^(ba|b*){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/^\ca\cA\c[\c{\c:/ + \x01\x01\e;z + +/^[ab\]cde]/ + athing + bthing + ]thing + cthing + dthing + ething +\= Expect no match + fthing + [thing + \\thing + +/^[]cde]/ + ]thing + cthing + dthing + ething +\= Expect no match + athing + fthing + +/^[^ab\]cde]/ + fthing + [thing + \\thing +\= Expect no match + athing + bthing + ]thing + cthing + dthing + ething + +/^[^]cde]/ + athing + fthing +\= Expect no match + ]thing + cthing + dthing + ething + +/^\/ +  + +/^ÿ/ + ÿ + +/^[0-9]+$/ + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 100 +\= Expect no match + abc + +/^.*nter/ + enter + inter + uponter + +/^xxx[0-9]+$/ + xxx0 + xxx1234 +\= Expect no match + xxx + +/^.+[0-9][0-9][0-9]$/ + x123 + xx123 + 123456 + x1234 +\= Expect no match + 123 + +/^.+?[0-9][0-9][0-9]$/ + x123 + xx123 + 123456 + x1234 +\= Expect no match + 123 + +/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ + abc!pqr=apquxz.ixr.zzz.ac.uk +\= Expect no match + !pqr=apquxz.ixr.zzz.ac.uk + abc!=apquxz.ixr.zzz.ac.uk + abc!pqr=apquxz:ixr.zzz.ac.uk + abc!pqr=apquxz.ixr.zzz.ac.ukk + +/:/ + Well, we need a colon: somewhere +\= Expect no match + No match without a colon + +/([\da-f:]+)$/i + 0abc + abc + fed + E + :: + 5f03:12C0::932e + fed def + Any old stuff +\= Expect no match + 0zzz + gzzz + fed\x20 + Any old rubbish + +/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ + .1.2.3 + A.12.123.0 +\= Expect no match + .1.2.3333 + 1.2.3 + 1234.2.3 + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 1 IN SOA non-sp1 non-sp2 ( +\= Expect no match + 1IN SOA non-sp1 non-sp2( + +/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ + a. + Z. + 2. + ab-c.pq-r. + sxk.zzz.ac.uk. + x-.y-. +\= Expect no match + -abc.peq. + +/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ + *.a + *.b0-a + *.c3-b.c + *.c-a.b-c +\= Expect no match + *.0 + *.a- + *.a-b.c- + *.c-a.0-c + +/^(?=ab(de))(abd)(e)/ + abde + +/^(?!(ab)de|x)(abd)(f)/ + abdf + +/^(?=(ab(cd)))(ab)/ + abcd + +/^[\da-f](\.[\da-f])*$/i + a.b.c.d + A.B.C.D + a.b.c.1.2.3.C + +/^\".*\"\s*(;.*)?$/ + \"1234\" + \"abcd\" ; + \"\" ; rhubarb +\= Expect no match + \"1234\" : things + +/^$/ + \ + +/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x + ab c +\= Expect no match + abc + ab cde + +/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ + ab c +\= Expect no match + abc + ab cde + +/^ a\ b[c ]d $/x + a bcd + a b d +\= Expect no match + abcd + ab d + +/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/ + abcdefhijklm + +/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/ + abcdefhijklm + +/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/ + a+ Z0+\x08\n\x1d\x12 + +/^[.^$|()*+?{,}]+/ + .^\$(*+)|{?,?} + +/^a*\w/ + z + az + aaaz + a + aa + aaaa + a+ + aa+ + +/^a*?\w/ + z + az + aaaz + a + aa + aaaa + a+ + aa+ + +/^a+\w/ + az + aaaz + aa + aaaa + aa+ + +/^a+?\w/ + az + aaaz + aa + aaaa + aa+ + +/^\d{8}\w{2,}/ + 1234567890 + 12345678ab + 12345678__ +\= Expect no match + 1234567 + +/^[aeiou\d]{4,5}$/ + uoie + 1234 + 12345 + aaaaa +\= Expect no match + 123456 + +/^[aeiou\d]{4,5}?/ + uoie + 1234 + 12345 + aaaaa + 123456 + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + +/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + From abcd Mon Sep 01 12:33:02 1997 + From abcd Mon Sep 1 12:33:02 1997 +\= Expect no match + From abcd Sep 01 12:33:02 1997 + +/^12.34/s + 12\n34 + 12\r34 + +/\w+(?=\t)/ + the quick brown\t fox + +/foo(?!bar)(.*)/ + foobar is foolish see? + +/(?:(?!foo)...|^.{0,2})bar(.*)/ + foobar crowbar etc + barrel + 2barrel + A barrel + +/^(\D*)(?=\d)(?!123)/ + abc456 +\= Expect no match + abc123 + +/^1234(?# test newlines + inside)/ + 1234 + +/^1234 #comment in extended re + /x + 1234 + +/#rhubarb + abcd/x + abcd + +/^abcd#rhubarb/x + abcd + +/(?!^)abc/ + the abc +\= Expect no match + abc + +/(?=^)abc/ + abc +\= Expect no match + the abc + +/^[ab]{1,3}(ab*|b)/no_auto_possess + aabbbbb + +/^[ab]{1,3}?(ab*|b)/no_auto_possess + aabbbbb + +/^[ab]{1,3}?(ab*?|b)/no_auto_possess + aabbbbb + +/^[ab]{1,3}(ab*?|b)/no_auto_possess + aabbbbb + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/x + Alan Other + + user\@dom.ain + \"A. Other\" (a comment) + A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + A missing angle @,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +# leading word +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces +(?: +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +| +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +) # "special" comment or quoted string +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal" +)* +< +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# < +(?: +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +(?: , +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +)* # additional domains +: +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address spec +> # > +# name and address +) +/x + Alan Other + + user\@dom.ain + \"A. Other\" (a comment) + A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + A missing angle ]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 43.
Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + +/a[^a]b/ + acb + a\nb + +/a.b/ + acb +\= Expect no match + a\nb + +/a[^a]b/s + acb + a\nb + +/a.b/s + acb + a\nb + +/^(b+?|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/^(b+|a){1,2}?c/ + bac + bbac + bbbac + bbbbac + bbbbbac + +/(?!\A)x/m + a\bx\n +\= Expect no match + x\nb\n + +/\x0{ab}/ + \0{ab} + +/(A|B)*?CD/ + CD + +/(A|B)*CD/ + CD + +/(?.*/)foo" +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ + +"(?>.*/)foo" + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + +/(?>(\.\d\d[1-9]?))\d+/ + 1.230003938 + 1.875000282 +\= Expect no match + 1.235 + +/^((?>\w+)|(?>\s+))*$/ + now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! + +/(\d+)(\w)/ + 12345a + 12345+ + +/((?>\d+))(\w)/ + 12345a +\= Expect no match + 12345+ + +/(?>a+)b/ + aaab + +/((?>a+)b)/ + aaab + +/(?>(a+))b/ + aaab + +/(?>b)+/ + aaabbbccc + +/(?>a+|b+|c+)*c/ + aaabbbbccccd + +/(a+|b+|c+)*c/ + aaabbbbccccd + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + +/\(((?>[^()]+)|\([^()]+\))+\)/ + (abc) + (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/a(?-i)b/i + ab + Ab +\= Expect no match + aB + AB + +/(a (?x)b c)d e/ + a bcd e +\= Expect no match + a b cd e + abcd e + a bcde + +/(a b(?x)c d (?-x)e f)/ + a bcde f +\= Expect no match + abcdef + +/(a(?i)b)c/ + abc + aBc +\= Expect no match + abC + aBC + Abc + ABc + ABC + AbC + +/a(?i:b)c/ + abc + aBc +\= Expect no match + ABC + abC + aBC + +/a(?i:b)*c/ + aBc + aBBc +\= Expect no match + aBC + aBBC + +/a(?=b(?i)c)\w\wd/ + abcd + abCd +\= Expect no match + aBCd + abcD + +/(?s-i:more.*than).*million/i + more than million + more than MILLION + more \n than Million +\= Expect no match + MORE THAN MILLION + more \n than \n million + +/(?:(?s-i)more.*than).*million/i + more than million + more than MILLION + more \n than Million +\= Expect no match + MORE THAN MILLION + more \n than \n million + +/(?>a(?i)b+)+c/ + abc + aBbc + aBBc +\= Expect no match + Abc + abAb + abbC + +/(?=a(?i)b)\w\wc/ + abc + aBc +\= Expect no match + Ab + abC + aBC + +/(?<=a(?i)b)(\w\w)c/ + abxxc + aBxxc +\= Expect no match + Abxxc + ABxxc + abxxC + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 12 +\= Expect no match + 123 + xyz + +/^(?(?!abc)\d\d|\w{3}:)$/ + abc: + 12 +\= Expect no match + 123 + xyz + +/(?(?<=foo)bar|cat)/ + foobar + cat + fcat + focat +\= Expect no match + foocat + +/(?(?a*)*/ + a + aa + aaaa + +/(abc|)+/ + abc + abcabc + abcabcabc + xyz + +/([a]*)*/ + a + aaaaa + +/([ab]*)*/ + a + b + ababab + aaaabcde + bbbb + +/([^a]*)*/ + b + bbbb + aaa + +/([^ab]*)*/ + cccc + abab + +/([a]*?)*/ + a + aaaa + +/([ab]*?)*/ + a + b + abab + baba + +/([^a]*?)*/ + b + bbbb + aaa + +/([^ab]*?)*/ + c + cccc + baba + +/(?>a*)*/ + a + aaabcde + +/((?>a*))*/ + aaaaa + aabbaa + +/((?>a*?))*/ + aaaaa + aabbaa + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x + 12-sep-98 + 12-09-98 +\= Expect no match + sep-12-98 + +/(?i:saturday|sunday)/ + saturday + sunday + Saturday + Sunday + SATURDAY + SUNDAY + SunDay + +/(a(?i)bc|BB)x/ + abcx + aBCx + bbx + BBx +\= Expect no match + abcX + aBCX + bbX + BBX + +/^([ab](?i)[cd]|[ef])/ + ac + aC + bD + elephant + Europe + frog + France +\= Expect no match + Africa + +/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/ + ab + aBd + xy + xY + zebra + Zambesi +\= Expect no match + aCD + XY + +/(?<=foo\n)^bar/m + foo\nbar +\= Expect no match + bar + baz\nbar + +/(?<=(?]&/ + <&OUT + +/(?:(f)(o)(o)|(b)(a)(r))*/ + foobar + +/(?<=a)b/ + ab +\= Expect no match + cb + b + +/(?a+)ab/ + +/(?>a+)b/ + aaab + +/([[:]+)/ + a:[b]: + +/([[=]+)/ + a=[b]= + +/([[.]+)/ + a.[b]. + +/((?>a+)b)/ + aaab + +/(?>(a+))b/ + aaab + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + +/a\Z/ +\= Expect no match + aaab + a\nb\n + +/b\Z/ + a\nb\n + +/b\z/ + +/b\Z/ + a\nb + +/b\z/ + a\nb + +/(?>.*)(?<=(abcd|wxyz))/ + alphabetabcd + endingwxyz +\= Expect no match + a rather long string that doesn't end with one of them + +/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark + +/word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope + +/(?<=\d{3}(?!999))foo/ + 999foo + 123999foo +\= Expect no match + 123abcfoo + +/(?<=(?!...999)\d{3})foo/ + 999foo + 123999foo +\= Expect no match + 123abcfoo + +/(?<=\d{3}(?!999)...)foo/ + 123abcfoo + 123456foo +\= Expect no match + 123999foo + +/(?<=\d{3}...)(?Z)+|A)*/ + ZABCDEFG + +/((?>)+|A)*/ + ZABCDEFG + +/a*/g + abbab + +/[[:space:]]+/ + > \x09\x0a\x0c\x0d\x0b< + +/[[:blank:]]+/ + > \x09\x0a\x0c\x0d\x0b< + +/[\s]+/ + > \x09\x0a\x0c\x0d\x0b< + +/\s+/ + > \x09\x0a\x0c\x0d\x0b< + +/a b/x + ab + +/(?!\A)x/m + a\nxb\n + +/(?!^)x/m +\= Expect no match + a\nxb\n + +/abc\Qabc\Eabc/ + abcabcabc + +/abc\Q(*+|\Eabc/ + abc(*+|abc + +/ abc\Q abc\Eabc/x + abc abcabc +\= Expect no match + abcabcabc + +/abc#comment + \Q#not comment + literal\E/x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal/x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal\E #more comment + /x + abc#not comment\n literal + +/abc#comment + \Q#not comment + literal\E #more comment/x + abc#not comment\n literal + +/\Qabc\$xyz\E/ + abc\\\$xyz + +/\Qabc\E\$\Qxyz\E/ + abc\$xyz + +/\Gabc/ + abc +\= Expect no match + xyzabc + +/\Gabc./g + abc1abc2xyzabc3 + +/abc./g + abc1abc2xyzabc3 + +/a(?x: b c )d/ + XabcdY +\= Expect no match + Xa b c d Y + +/((?x)x y z | a b c)/ + XabcY + AxyzB + +/(?i)AB(?-i)C/ + XabCY +\= Expect no match + XabcY + +/((?i)AB(?-i)C|D)E/ + abCE + DE +\= Expect no match + abcE + abCe + dE + De + +/[z\Qa-d]\E]/ + z + a + - + d + ] +\= Expect no match + b + +/(a+)*b/ +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(?i)reg(?:ul(?:[aä]|ae)r|ex)/ + REGular + regulaer + Regex + regulär + +/Åæåä[à-ÿÀ-ß]+/ + Åæåäà + Åæåäÿ + ÅæåäÀ + Åæåäß + +/(?<=Z)X./ + \x84XAZXB + +/^(?(2)a|(1)(2))+$/ + 123a + +/(?<=a|bbbb)c/ + ac + bbbbc + +/line\nbreak/ + this is a line\nbreak + line one\nthis is a line\nbreak in the second line + +/line\nbreak/firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/line\nbreak/m,firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/1234/ + 123\=ps +\= Expect no match + a4\=ps,dfa_restart + +/1234/ + 123\=ps + 4\=ps,dfa_restart + +/^/gm + a\nb\nc\n + \ + +/(?<=C\n)^/gm + A\nC\nC\n + +/(?s)A?B/ + AB + aB + +/(?s)A*B/ + AB + aB + +/(?m)A?B/ + AB + aB + +/(?m)A*B/ + AB + aB + +/Content-Type\x3A[^\r\n]{6,}/ + Content-Type:xxxxxyyy + +/Content-Type\x3A[^\r\n]{6,}z/ + Content-Type:xxxxxyyyz + +/Content-Type\x3A[^a]{6,}/ + Content-Type:xxxyyy + +/Content-Type\x3A[^a]{6,}z/ + Content-Type:xxxyyyz + +/^abc/Im,newline=lf + xyz\nabc + xyz\r\nabc +\= Expect no match + xyz\rabc + xyzabc\r + xyzabc\rpqr + xyzabc\r\n + xyzabc\r\npqr + +/^abc/Im,newline=crlf + xyz\r\nabclf> +\= Expect no match + xyz\nabclf + xyz\rabclf + +/^abc/Im,newline=cr + xyz\rabc +\= Expect no match + xyz\nabc + xyz\r\nabc + +/.*/I,newline=lf + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=cr + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=crlf + abc\ndef + abc\rdef + abc\r\ndef + +/\w+(.)(.)?def/Is + abc\ndef + abc\rdef + abc\r\ndef + +/\w+(.)(.)?def/s + abc\ndef + abc\rdef + abc\r\ndef + +/^\w+=.*(\\\n.*)*/ + abc=xyz\\\npqr + +/^(a()*)*/ + aaaa + +/^(?:a(?:(?:))*)*/ + aaaa + +/^(a()+)+/ + aaaa + +/^(?:a(?:(?:))+)+/ + aaaa + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(?>a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(?:a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/^a.b/newline=lf + a\rb +\= Expect no match + a\nb + +/^a.b/newline=cr + a\nb +\= Expect no match + a\rb + +/^a.b/newline=anycrlf + a\x85b +\= Expect no match + a\rb + +/^a.b/newline=any +\= Expect no match + a\nb + a\rb + a\x85b + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + +/^a\Rb/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode + a\nb + a\n\rb + a\n\r\x85b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/.+foo/ + afoo +\= Expect no match + \r\nfoo + \nfoo + +/.+foo/newline=crlf + afoo + \nfoo +\= Expect no match + \r\nfoo + +/.+foo/newline=any + afoo +\= Expect no match + \nfoo + \r\nfoo + +/.+foo/s + afoo + \r\nfoo + \nfoo + +/^$/gm,newline=any + abc\r\rxyz + abc\n\rxyz +\= Expect no match + abc\r\nxyz + +/^X/m + XABC +\= Expect no match + XABC\=notbol + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + +/(?|(abc)|(xyz))/ + >abc< + >xyz< + +/(x)(?|(abc)|(xyz))(x)/ + xabcx + xxyzx + +/(x)(?|(abc)(pqr)|(xyz))(x)/ + xabcpqrx + xxyzx + +/(?|(abc)|(xyz))(?1)/ + abcabc + xyzabc +\= Expect no match + xyzxyz + +/\H\h\V\v/ + X X\x0a + X\x09X\x0b +\= Expect no match + \xa0 X\x0a + +/\H*\h+\V?\v{3,4}/ + \x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a + \x09\x20\xa0\x0a\x0b\x0c\x0d\x0a + \x09\x20\xa0\x0a\x0b\x0c +\= Expect no match + \x09\x20\xa0\x0a\x0b + +/\H{3,4}/ + XY ABCDE + XY PQR ST + +/.\h{3,4}./ + XY AB PQRS + +/\h*X\h?\H+Y\H?Z/ + >XNNNYZ + > X NYQZ +\= Expect no match + >XYZ + > X NY Z + +/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/ + >XY\x0aZ\x0aA\x0bNN\x0c + >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + +/.+A/newline=crlf +\= Expect no match + \r\nA + +/\nA/newline=crlf + \r\nA + +/[\r\n]A/newline=crlf + \r\nA + +/(\r|\n)A/newline=crlf + \r\nA + +/a\Rb/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\Rb/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R?b/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\R?b/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf + a\r\n\nb + a\n\r\rb + a\r\n\r\n\r\n\r\nb +\= Expect no match + a\x0b\x0bb + a\x85\x85b + +/a\R{2,4}b/I,bsr=unicode + a\r\rb + a\n\n\nb + a\r\n\n\r\rb + a\x85\x85b + a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb + +/a(?!)|\wbc/ + abc + +/a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab + +/a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab + +/a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab + +/a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aXb + a\nb +\= Expect no match + ab + +/a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aXb + a\nX\nXb +\= Expect no match + ab + +/X$/dollar_endonly + X +\= Expect no match + X\n + +/X$/ + X + X\n + +/xyz/auto_callout + xyz + abcxyz +\= Expect no match + abc + abcxypqr + +/xyz/auto_callout,no_start_optimize + abcxyz +\= Expect no match + abc + abcxypqr + +/(*NO_START_OPT)xyz/auto_callout + abcxyz + +/(?C)ab/ + ab + ab\=callout_none + +/ab/auto_callout + ab + ab\=callout_none + +/^"((?(?=[a])[^"])|b)*"$/auto_callout + "ab" + "ab"\=callout_none + +/\d+X|9+Y/ + ++++123999\=ps + ++++123999Y\=ps + +/Z(*F)/ +\= Expect no match + Z\=ps + ZA\=ps + +/Z(?!)/ +\= Expect no match + Z\=ps + ZA\=ps + +/dog(sbody)?/ + dogs\=ps + dogs\=ph + +/dog(sbody)??/ + dogs\=ps + dogs\=ph + +/dog|dogsbody/ + dogs\=ps + dogs\=ph + +/dogsbody|dog/ + dogs\=ps + dogs\=ph + +/Z(*F)Q|ZXY/ + Z\=ps +\= Expect no match + ZA\=ps + X\=ps + +/\bthe cat\b/ + the cat\=ps + the cat\=ph + +/dog(sbody)?/ + dogs\=ps + body\=dfa_restart + +/dog(sbody)?/ + dogs\=ph + body\=dfa_restart + +/abc/ + abc\=ps + abc\=ph + +/abc\K123/ + xyzabc123pqr + +/(?<=abc)123/allusedtext + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/\babc\b/allusedtext + +++abc+++ + +++ab\=ps + +++ab\=ph + +/(?=C)/g,aftertext + ABCDECBA + +/(abc|def|xyz)/I + terhjk;abcdaadsfe + the quick xyz brown fox +\= Expect no match + thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd + +/(abc|def|xyz)/I,no_start_optimize + terhjk;abcdaadsfe + the quick xyz brown fox +\= Expect no match + thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd + +/abcd*/aftertext + xxxxabcd\=ps + xxxxabcd\=ph + dddxxx\=dfa_restart + xxxxabcd\=ph + xxx\=dfa_restart + +/abcd*/i + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/ + xxxxabc1\=ps + xxxxabc1\=ph + +/abc[de]*/ + xxxxabcde\=ps + xxxxabcde\=ph + +/(?:(?1)|B)(A(*F)|C)/ + ABCD + CCD +\= Expect no match + CAD + +/^(?:(?1)|B)(A(*F)|C)/ + CCD + BCD +\= Expect no match + ABCD + CAD + BAD + +/^(?!a(*SKIP)b)/ + ac + +/^(?=a(*SKIP)b|ac)/ + ac + +/^(?=a(*THEN)b|ac)/ + ac + +/^(?=a(*PRUNE)b)/ + ab + +/^(?(?!a(*SKIP)b))/ + ac + +/(?<=abc)def/allusedtext + abc\=ph + +/abc$/ + abc + abc\=ps + abc\=ph + +/abc$/m + abc + abc\n + abc\=ph + abc\n\=ph + abc\=ps + abc\n\=ps + +/abc\z/ + abc + abc\=ps + abc\=ph + +/abc\Z/ + abc + abc\=ps + abc\=ph + +/abc\b/ + abc + abc\=ps + abc\=ph + +/abc\B/ + abc\=ps + abc\=ph +\= Expect no match + abc + +/.+/ + abc\=offset=0 + abc\=offset=1 + abc\=offset=2 +\= Bad offsets + abc\=offset=4 + abc\=offset=-4 +\= Expect no match + abc\=offset=3 + +/^(?:a)++\w/ + aaaab +\= Expect no match + aaaa + bbb + +/^(?:aa|(?:a)++\w)/ + aaaab + aaaa +\= Expect no match + bbb + +/^(?:a)*+\w/ + aaaab + bbb +\= Expect no match + aaaa + +/^(a)++\w/ + aaaab +\= Expect no match + aaaa + bbb + +/^(a|)++\w/ + aaaab +\= Expect no match + aaaa + bbb + +/(?=abc){3}abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc)+abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc)++abc/aftertext + abcabcabc +\= Expect no match + xyz + +/(?=abc){0}xyz/ + xyz + +/(?=abc){1}xyz/ +\= Expect no match + xyz + +/(?=(a))?./ + ab + bc + +/(?=(a))??./ + ab + bc + +/^(?=(a)){0}b(?1)/ + backgammon + +/^(?=(?1))?[az]([abc])d/ + abd + zcdxx + +/^(?!a){0}\w+/ + aaaaa + +/(?<=(abc))?xyz/ + abcxyz + pqrxyz + +/((?2))((?1))/ + abc + +/(?(R)a+|(?R)b)/ + aaaabcde + +/(?(R)a+|((?R))b)/ + aaaabcde + +/((?(R)a+|(?1)b))/ + aaaabcde + +/((?(R2)a+|(?1)b))()/ + aaaabcde + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde + +/(a+)/no_auto_possess + aaaa\=ovector=3 + aaaa\=ovector=4 + +/^\R/ + \r\=ps + \r\=ph + +/^\R{2,3}x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R{2,3}?x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R?x/ + \r\=ps + \r\=ph + x + \rx + +/^\R+x/ + \r\=ps + \r\=ph + \r\n\=ps + \r\n\=ph + \rx + +/^a$/newline=crlf + a\r\=ps + a\r\=ph + +/^a$/m,newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + a\r\=ph + +/./newline=crlf + \r\=ps + \r\=ph + +/.{2,3}/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +# Test simple validity check for restarts + +/abcdef/ + abc\=dfa_restart + +/)(.)|(?R))++)*F>/ + text text xxxxx text F> text2 more text. + +/^(?>.{4})abc|^\w\w.xabcd/ + xxxxabcd + xx\xa0xabcd + +/^(.{4}){2}+abc|^\w\w.x\w\w\w\wabcd/ + xxxxxxxxabcd + xx\xa0xxxxxabcd + +/abcd/ + abcd\=ovector=0 + +# These tests show up auto-possessification + +/[ab]*/ + aaaa + +/[ab]*?/ + aaaa + +/[ab]?/ + aaaa + +/[ab]??/ + aaaa + +/[ab]+/ + aaaa + +/[ab]+?/ + aaaa + +/[ab]{2,3}/ + aaaa + +/[ab]{2,3}?/ + aaaa + +/[ab]{2,}/ + aaaa + +/[ab]{2,}?/ + aaaa + +'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + +/abc(?=xyz)/allusedtext + abcxyzpqr + abcxyzpqr\=aftertext + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + xyzpqrabcxyzpqr\=aftertext + +/a\b/ + a.\=allusedtext + a\=allusedtext + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + +/a*?b*?/ + ab + +/(*NOTEMPTY)a*?b*?/ + ab + ba + cb + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + cdab + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall + +/(?aa)/ + aa\=get=A + aa\=copy=A + +/a+/no_auto_possess + a\=ovector=2,get=1,get=2,getall + aaa\=ovector=2,get=1,get=2,getall + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall + +/ab(?C" any text with spaces ")cde/B + abcde + 12abcde + +/^a(b)c(?C1)def/ + abcdef + +/^a(b)c(?C"AB")def/ + abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture + +/^a(b)c(?C{AB})def/B + abcdef\=callout_capture + +/^(?(?C25)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg + +/(?:a(?C`code`)){3}X/ + aaaXY + +# Binary zero in callout string +/"a(?C'x" 00 "z')b"/hex + abcdefgh + +/(?(?!)a|b)/ + bbb +\= Expect no match + aaa + +/^/gm + \n\n\n + +/^/gm,alt_circumflex + \n\n\n + +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 1234abcde\=offset_limit=9 + 1234abcde\=offset_limit=4 + 1234abcde\=offset_limit=4,offset=4 +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 + 1234abcde\=offset_limit=3 + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 +\= Expect no match + 1234abc\=offset_limit=6 + +/abcd/null_context + abcd\=null_context + +/()()a+/no_auto_possess + aaa\=allcaptures + a\=allcaptures + +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + +/(*LIMIT_HEAP=0)^((.)(?1)|.)$/ +\= Expect heap limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + +/(*LIMIT_HEAP=50000)^((.)(?1)|.)$/ +\= Expect success + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + +/(02-)?[0-9]{3}-[0-9]{3}/ + 02-123-123 + +/^(a(?2))(b)(?1)/ + abbab\=find_limits + +/abc/endanchored + xyzabc +\= Expect no match + xyzabcdef +\= Expect error + xyzabc\=ph + +/abc/ + xyzabc\=endanchored +\= Expect no match + xyzabcdef\=endanchored +\= Expect error + xyzabc\=ps,endanchored + +/abc|bcd/endanchored + xyzabcd +\= Expect no match + xyzabcdef + +/(*NUL)^.*/ + a\nb\x00ccc + +/(*NUL)^.*/s + a\nb\x00ccc + +/^x/m,newline=nul + ab\x00xy + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + +/(*NUL)^X\NY/ + X\nY + X\rY +\= Expect no match + X\x00Y + +/(?<=abc|)/ + abcde\=aftertext + +/(?<=|abc)/ + abcde\=aftertext + +/(?<=abc|)/endanchored + abcde\=aftertext + +/(?<=|abc)/endanchored + abcde\=aftertext + +/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor +\= Expect limit exceeded +.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); + +/\n/firstline + xyz\nabc + +/\nabc/firstline + xyz\nabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc + +/[abc]/firstline +\= Expect no match + \na + +/foobar/ + the foobar thing\=copy_matched_subject + the foobar thing\=copy_matched_subject,zero_terminate + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + +/(?(VERSION>=0)^B0W)/ + B0W-W0W +\= Expect no match + 0 + +/(?(VERSION>=1000)^B0W|W0W)/ + B0W-W0W +\= Expect no match + 0 + +/(?<=pqr)abc(?=xyz)/ + 123pqrabcxy\=ps,allusedtext + 123pqrabcxyz\=ps,allusedtext + +/(?>a+b)/ + aaaa\=ps + aaaab\=ps + +/(abc)(?1)/ + abca\=ps + abcabc\=ps + +/(?(?=abc).*|Z)/ + ab\=ps + abcxyz\=ps + +/(abc)++x/ + abcab\=ps + abc\=ps + ab\=ps + abcx + +/\z/ + abc\=ph + abc\=ps + +/\Z/ + abc\=ph + abc\=ps + abc\n\=ph + abc\n\=ps + +/c*+(?<=[bc])/ + abc\=ph + ab\=ph + abc\=ps + ab\=ps + +/c++(?<=[bc])/ + abc\=ph + ab\=ph + +/(?<=(?=.(?<=x)))/ + abx + ab\=ph + bxyz + xyz + +/(?![ab]).*/ + ab\=ph + +/c*+/ + ab\=ph,offset=2 + +/ +/anchored, firstline + \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + +/ +/firstline + \x0a + abc\x0adef + +/|a(?0)/endanchored + aaaa + +# End of testinput6 diff --git a/testdata/testinput7 b/testdata/testinput7 new file mode 100644 index 0000000..896019f --- /dev/null +++ b/testdata/testinput7 @@ -0,0 +1,2534 @@ +# This set of tests checks UTF and Unicode property support with the DFA +# matching functionality of pcre2_dfa_match(). A default subject modifier is +# used to force DFA matching for all tests. + +#subject dfa +#newline_default LF any anyCRLF + +/\x{100}ab/utf + \x{100}ab + +/a\x{100}*b/utf + ab + a\x{100}b + a\x{100}\x{100}b + +/a\x{100}+b/utf + a\x{100}b + a\x{100}\x{100}b +\= Expect no match + ab + +/\bX/utf + Xoanon + +Xoanon + \x{300}Xoanon +\= Expect no match + YXoanon + +/\BX/utf + YXoanon +\= Expect no match + Xoanon + +Xoanon + \x{300}Xoanon + +/X\b/utf + X+oanon + ZX\x{300}oanon + FAX +\= Expect no match + Xoanon + +/X\B/utf + Xoanon +\= Expect no match + X+oanon + ZX\x{300}oanon + FAX + +/[^a]/utf + abcd + a\x{100} + +/^[abc\x{123}\x{400}-\x{402}]{2,3}\d/utf + ab99 + \x{123}\x{123}45 + \x{400}\x{401}\x{402}6 +\= Expect no match + d99 + \x{123}\x{122}4 + \x{400}\x{403}6 + \x{400}\x{401}\x{402}\x{402}6 + +/a.b/utf + acb + a\x7fb + a\x{100}b +\= Expect no match + a\nb + +/a(.{3})b/utf + a\x{4000}xyb + a\x{4000}\x7fyb + a\x{4000}\x{100}yb +\= Expect no match + a\x{4000}b + ac\ncb + +/a(.*?)(.)/ + a\xc0\x88b + +/a(.*?)(.)/utf + a\x{100}b + +/a(.*)(.)/ + a\xc0\x88b + +/a(.*)(.)/utf + a\x{100}b + +/a(.)(.)/ + a\xc0\x92bcd + +/a(.)(.)/utf + a\x{240}bcd + +/a(.?)(.)/ + a\xc0\x92bcd + +/a(.?)(.)/utf + a\x{240}bcd + +/a(.??)(.)/ + a\xc0\x92bcd + +/a(.??)(.)/utf + a\x{240}bcd + +/a(.{3})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b +\= Expect no match + a\x{1234}b + ac\ncb + +/a(.{3,})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b + +/a(.{3,}?)b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b + +/a(.{3,5})b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + axxxxxbcdefghijb +\= Expect no match + a\x{1234}b + axxxxxxbcdefghijb + +/a(.{3,5}?)b/utf + a\x{1234}xyb + a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + axxxxxbcdefghijb +\= Expect no match + a\x{1234}b + axxxxxxbcdefghijb + +/^[a\x{c0}]/utf +\= Expect no match + \x{100} + +/(?<=aXb)cd/utf + aXbcd + +/(?<=a\x{100}b)cd/utf + a\x{100}bcd + +/(?<=a\x{100000}b)cd/utf + a\x{100000}bcd + +/(?:\x{100}){3}b/utf + \x{100}\x{100}\x{100}b +\= Expect no match + \x{100}\x{100}b + +/\x{ab}/utf + \x{ab} + \xc2\xab +\= Expect no match + \x00{ab} + +/(?<=(.))X/utf + WXYZ + \x{256}XYZ +\= Expect no match + XYZ + +/[^a]+/g,utf + bcd + \x{100}aY\x{256}Z + +/^[^a]{2}/utf + \x{100}bc + +/^[^a]{2,}/utf + \x{100}bcAa + +/^[^a]{2,}?/utf + \x{100}bca + +/[^a]+/gi,utf + bcd + \x{100}aY\x{256}Z + +/^[^a]{2}/i,utf + \x{100}bc + +/^[^a]{2,}/i,utf + \x{100}bcAa + +/^[^a]{2,}?/i,utf + \x{100}bca + +/\x{100}{0,0}/utf + abcd + +/\x{100}?/utf + abcd + \x{100}\x{100} + +/\x{100}{0,3}/utf + \x{100}\x{100} + \x{100}\x{100}\x{100}\x{100} + +/\x{100}*/utf + abce + \x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,1}/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,3}/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}+/utf + abcd\x{100}\x{100}\x{100}\x{100} + +/\x{100}{3}/utf + abcd\x{100}\x{100}\x{100}XX + +/\x{100}{3,5}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + +/\x{100}{3,}/utf,no_auto_possess + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + +/(?<=a\x{100}{2}b)X/utf + Xyyya\x{100}\x{100}bXzzz + +/\D*/utf,no_auto_possess + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\D*/utf,no_auto_possess + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/\D/utf + 1X2 + 1\x{100}2 + +/>\S/utf + > >X Y + > >\x{100} Y + +/\d/utf + \x{100}3 + +/\s/utf + \x{100} X + +/\D+/utf + 12abcd34 +\= Expect no match + 1234 + +/\D{2,3}/utf + 12abcd34 + 12ab34 +\= Expect no match + 1234 + 12a34 + +/\D{2,3}?/utf + 12abcd34 + 12ab34 +\= Expect no match + 1234 + 12a34 + +/\d+/utf + 12abcd34 + +/\d{2,3}/utf + 12abcd34 + 1234abcd +\= Expect no match + 1.4 + +/\d{2,3}?/utf + 12abcd34 + 1234abcd +\= Expect no match + 1.4 + +/\S+/utf + 12abcd34 +\= Expect no match + \ \ + +/\S{2,3}/utf + 12abcd34 + 1234abcd +\= Expect no match + \ \ + +/\S{2,3}?/utf + 12abcd34 + 1234abcd +\= Expect no match + \ \ + +/>\s+ <34 + +/>\s{2,3} \s{2,3}? \xff< + +/[\xff]/utf + >\x{ff}< + +/[^\xFF]/ + XYZ + +/[^\xff]/utf + XYZ + \x{123} + +/^[ac]*b/utf +\= Expect no match + xb + +/^[ac\x{100}]*b/utf +\= Expect no match + xb + +/^[^x]*b/i,utf +\= Expect no match + xb + +/^[^x]*b/utf +\= Expect no match + xb + +/^\d*b/utf +\= Expect no match + xb + +/(|a)/g,utf + catac + a\x{256}a + +/^\x{85}$/i,utf + \x{85} + +/^abc./gmx,newline=any,utf + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + +/abc.$/gmx,newline=any,utf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + +/^a\Rb/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x{85}b + a\x{2028}b + a\x{2029}b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode,utf + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb + +/^a\R+b/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode,utf + a\nb + a\n\rb + a\n\r\x{85}b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/\h+\V?\v{3,4}/utf,no_auto_possess + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + +/\V?\v{3,4}/utf,no_auto_possess + \x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + +/\h+\V?\v{3,4}/utf,no_auto_possess + >\x09\x20\x{a0}X\x0a\x0a\x0a< + +/\V?\v{3,4}/utf,no_auto_possess + >\x09\x20\x{a0}X\x0a\x0a\x0a< + +/\H\h\V\v/utf + X X\x0a + X\x09X\x0b +\= Expect no match + \x{a0} X\x0a + +/\H*\h+\V?\v{3,4}/utf,no_auto_possess + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c +\= Expect no match + \x09\x20\x{a0}\x0a\x0b + +/\H\h\V\v/utf + \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} +\= Expect no match + \x{2009} X\x0a + +/\H*\h+\V?\v{3,4}/utf,no_auto_possess + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + \x09\x20\x{202f}\x0a\x0b\x0c +\= Expect no match + \x09\x{200a}\x{a0}\x{2028}\x0b + +/a\Rb/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\Rb/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/X/newline=any,utf,firstline + A\x{1ec5}ABCXYZ + +/abcd*/utf + xxxxabcd\=ps + xxxxabcd\=ph + +/abcd*/i,utf + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/utf + xxxxabc1\=ps + xxxxabc1\=ph + +/abc[de]*/utf + xxxxabcde\=ps + xxxxabcde\=ph + +/\bthe cat\b/utf + the cat\=ps + the cat\=ph + +/./newline=crlf,utf + \r\=ps + \r\=ph + +/.{2,3}/newline=crlf,utf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/newline=crlf,utf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/[^\x{100}]/utf + \x{100}\x{101}X + +/[^\x{100}]+/utf + \x{100}\x{101}X + +/\pL\P{Nd}/utf + AB +\= Expect no match + A0 + 00 + +/\X./utf + AB + A\x{300}BC + A\x{300}\x{301}\x{302}BC +\= Expect no match + \x{300} + +/\X\X/utf + ABC + A\x{300}B\x{300}\x{301}C + A\x{300}\x{301}\x{302}BC +\= Expect no match + \x{300} + +/^\pL+/utf + abcd + a + +/^\PL+/utf + 1234 + = +\= Expect no match + abcd + +/^\X+/utf + abcdA\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302} + a + \x{300}\x{301}\x{302} + +/\X?abc/utf + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc + +/^\X?abc/utf + abc + A\x{300}abc + \x{300}abc +\= Expect no match + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + +/\X*abc/utf + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc + +/^\X*abc/utf + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc + +/^\pL?=./utf + A=b + =c +\= Expect no match + 1=2 + AAAA=b + +/^\pL*=./utf + AAAA=b + =c +\= Expect no match + 1=2 + +/^\X{2,3}X/utf + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X +\= Expect no match + X + A\x{300}\x{301}\x{302}X + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + +/^\pC\pL\pM\pN\pP\pS\pZ\p{Xsp}/utf + >\x{1680}\x{2028}\x{0b} +\= Expect no match + \x{0b} + +/^>\p{Xsp}+/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/utf,no_auto_possess + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/utf + >\x{1680}\x{2028}\x{0b} + >\x{a0} +\= Expect no match + \x{0b} + +/^>\p{Xps}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/utf + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/utf + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/utf + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^\p{Xwd}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/utf + A_12\x{6ca}\x{a6c}\x{10a7} + +/^[\p{Xwd}]/utf + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^[\p{Xwd}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +# Unicode properties for \b and \B + +/\b...\B/utf,ucp + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Without PCRE_UCP, non-ASCII always fail, even if < 256 + +/\b...\B/utf + abc_ +\= Expect no match + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties + +/\b...\B/ucp + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Caseless single negated characters > 127 need UCP support + +/[^\x{100}]/i,utf + \x{100}\x{101}X + +/[^\x{100}]+/i,utf + \x{100}\x{101}XX + +/^\X/utf + A\=ps + A\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{301}\=ps + A\x{301}\=ph + +/^\X{2,3}/utf + A\=ps + A\=ph + AA\=ps + AA\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X{2}/utf + AA\=ps + AA\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X+/utf + AA\=ps + AA\=ph + +/^\X+?Z/utf + AA\=ps + AA\=ph + +# These are tests for extended grapheme clusters + +/^\X/utf,aftertext + G\x{34e}\x{34e}X + \x{34e}\x{34e}X + \x04X + \x{1100}X + \x{1100}\x{34e}X + \x{1b04}\x{1b04}X +\= These match up to the roman letters + \x{1111}\x{1111}L,L + \x{1111}\x{1111}\x{1169}L,L,V + \x{1111}\x{ae4c}L, LV + \x{1111}\x{ad89}L, LVT + \x{1111}\x{ae4c}\x{1169}L, LV, V + \x{1111}\x{ae4c}\x{1169}\x{1169}L, LV, V, V + \x{1111}\x{ae4c}\x{1169}\x{11fe}L, LV, V, T + \x{1111}\x{ad89}\x{11fe}L, LVT, T + \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T + \x{ad89}\x{11fe}\x{11fe}LVT, T, T +\= These match just the first codepoint (invalid sequence) + \x{1111}\x{11fe}L, T + \x{ae4c}\x{1111}LV, L + \x{ae4c}\x{ae4c}LV, LV + \x{ae4c}\x{ad89}LV, LVT + \x{1169}\x{1111}V, L + \x{1169}\x{ae4c}V, LV + \x{1169}\x{ad89}V, LVT + \x{ad89}\x{1111}LVT, L + \x{ad89}\x{1169}LVT, V + \x{ad89}\x{ae4c}LVT, LV + \x{ad89}\x{ad89}LVT, LVT + \x{11fe}\x{1111}T, L + \x{11fe}\x{1169}T, V + \x{11fe}\x{ae4c}T, LV + \x{11fe}\x{ad89}T, LVT +\= Test extend and spacing mark + \x{1111}\x{ae4c}\x{0711}L, LV, extend + \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark + \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark +\= Test CR, LF, and control + \x0d\x{0711}CR, extend + \x0d\x{1b04}CR, spacingmark + \x0a\x{0711}LF, extend + \x0a\x{1b04}LF, spacingmark + \x0b\x{0711}Control, extend + \x09\x{1b04}Control, spacingmark +\= There are no Prepend characters, so we can't test Prepend, CR + +/^(?>\X{2})X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}?X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + +/[z\x{1e9e}]+/i,utf + \x{1e9e}\x{00df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + +/[z\x{00df}]+/i,utf + \x{1e9e}\x{00df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + +/[z\x{1f88}]+/i,utf + \x{1f88}\x{1f80} + +# Perl matches these + +/\x{00b5}+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/\x{039c}+/i,utf + \x{00b5}\x{039c}\x{03bc} + +/\x{03bc}+/i,utf + \x{00b5}\x{039c}\x{03bc} + + +/\x{00c5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/\x{00e5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/\x{212b}+/i,utf + \x{00c5}\x{00e5}\x{212b} + +/\x{01c4}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c5}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c6}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c7}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c8}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c9}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + + +/\x{01ca}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cb}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cc}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + +/\x{01f1}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f2}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f3}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + +/\x{0345}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{0399}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{03b9}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{1fbe}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{0392}+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/\x{03b2}+/i,utf + \x{0392}\x{03b2}\x{03d0} + +/\x{03d0}+/i,utf + \x{0392}\x{03b2}\x{03d0} + + +/\x{0395}+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/\x{03b5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/\x{03f5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + +/\x{0398}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03b8}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03d1}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03f4}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{039a}+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/\x{03ba}+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/\x{03f0}+/i,utf + \x{039a}\x{03ba}\x{03f0} + +/\x{03a0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/\x{03c0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/\x{03d6}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + +/\x{03a1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/\x{03c1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/\x{03f1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + +/\x{03a3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c2}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + +/\x{03a6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/\x{03c6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/\x{03d5}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + +/\x{03c9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/\x{03a9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/\x{2126}+/i,utf + \x{03c9}\x{03a9}\x{2126} + +/\x{1e60}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e61}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9b}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + +/\x{1f80}+/i,utf + \x{1f88}\x{1f80} + +/\x{004b}+/i,utf + \x{004b}\x{006b}\x{212a} + +/\x{006b}+/i,utf + \x{004b}\x{006b}\x{212a} + +/\x{212a}+/i,utf + \x{004b}\x{006b}\x{212a} + +/\x{0053}+/i,utf + \x{0053}\x{0073}\x{017f} + +/\x{0073}+/i,utf + \x{0053}\x{0073}\x{017f} + +/\x{017f}+/i,utf + \x{0053}\x{0073}\x{017f} + +/ist/i,utf +\= Expect no match + ikt + +/is+t/i,utf + iSs\x{17f}t +\= Expect no match + ikt + +/is+?t/i,utf +\= Expect no match + ikt + +/is?t/i,utf +\= Expect no match + ikt + +/is{2}t/i,utf +\= Expect no match + iskt + +/^\p{Xuc}/utf + $abc + @abc + `abc + \x{1234}abc +\= Expect no match + abc + +/^\p{Xuc}+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?\*/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}++/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\P{Xuc}/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +/^[\P{Xuc}]/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +/^A\s+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + +/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext + \x{100}\x{200}\x{300} + +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + +/\p{bidi class:L}+/utf + -->ABC<-- + +/\P{bidi class:L}+/utf + -->ABC<-- + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + +# ----------------------------------------------------------------------------- + +/\p{katakana}/utf + \x{30a1} + \x{3001} + +/\p{scx:katakana}/utf + \x{30a1} + \x{3001} + +/\p{script extensions:katakana}/utf + \x{30a1} + \x{3001} + +/\p{sc:katakana}/utf + \x{30a1} +\= Expect no match + \x{3001} + +/\p{script:katakana}/utf + \x{30a1} +\= Expect no match + \x{3001} + +/\p{sc:katakana}{3,}/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + +/\p{sc:katakana}{3,}?/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + +# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without +# the restriction. + +/AskZ/i,utf,caseless_restrict + AskZ + aSKz +\= Expect no match + A\x{17f}kZ + As\x{212a}Z + +/AskZ/i,utf + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/A\x{17f}\x{212a}Z/ir,utf + \= Expect no match + AskZ + +/A\x{17f}\x{212a}Z/i,utf + AskZ + +/[AskZ]+/i,utf,caseless_restrict + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/[AskZ]+/i,utf + AskZ + aSKz + A\x{17f}kZ + As\x{212a}Z + +/[\x{17f}\x{212a}]+/ir,utf +\= Expect no match + AskZ + +/[\x{17f}\x{212a}]+/i,utf + AskZ + +/[^s]+/ir,utf + A\x{17f}Z + +/[^s]+/i,utf + A\x{17f}Z + +/[^k]+/ir,utf + A\x{212a}Z + +/[^k]+/i,utf + A\x{212a}Z + +/[^sk]+/ir,utf + A\x{17f}\x{212a}Z + +/[^sk]+/i,utf + A\x{17f}\x{212a}Z + +/[^\x{17f}]+/ir,utf + AsSZ + +/[^\x{17f}]+/i,utf + AsSZ + +/[Ss]+/irB,utf + Sss\x{17f}ss + +/[Ss]+/iB,utf + Sss\x{17f}ss + +/[S\x{17f}]/irB,utf + +/[S\x{17f}]/iB,utf + +/[\x{17f}s]/irB,utf + +/[\x{17f}s]/iB,utf + +/[\x{4b}\x{6b}]/irB,utf + +/[\x{4b}\x{6b}]/iB,utf + +/s(?r)s(?-r)s(?r:s)s/i,utf + \x{17f}S\x{17f}S\x{17f} +\= Expect no match + \x{17f}\x{17f}\x{17f}S\x{17f} + \x{17f}S\x{17f}\x{17f}\x{17f} + +/k(?^i)k/ir,utf + K\x{212a} +\= Expect no match + \x{212a}\x{212a} + +# End caseless restrict tests + +# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. + +# DIGITS + +/\d+/i,utf + 123\x{660}456 + +/\d+/i,utf,ucp + 123\x{660}456 + +/\d+/i,utf,ucp,ascii_bsd + 123\x{660}456 + +/[\d]+/i,utf + 123\x{660}456 + +/[\d]+/i,utf,ucp + 123\x{660}456 + +/[\d]+/i,utf,ucp,ascii_bsd + 123\x{660}456 + +/\d(?aD)\d(?-aD)\d/utf,ucp + \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 9\x{660}9 + +/\d(?a)\d(?-a)\d/utf,ucp + \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 9\x{660}9 + +# SPACES + +/>\s+ < +\= Expect no match + >\x{a0} < + +/>\s+ < + >\x{a0} < + +/>\s+ < +\= Expect no match + >\x{a0} < + +/>[\s]+ < +\= Expect no match + >\x{a0} < + +/>[\s]+ < + >\x{a0} < + +/>[\s]+ < +\= Expect no match + >\x{a0} < + +/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< + +/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< + +# WORDS + +/\w+/i,utf + 123\x{660}abc + +/\w+/i,utf,ucp + 123\x{660}abc + +/\w+/i,utf,ucp,ascii_bsw + 123\x{660}abc + +/[\w]+/i,utf + 123\x{660}abc + +/[\w]+/i,utf,ucp + 123\x{660}abc + +/[\w]+/i,utf,ucp,ascii_bsw + 123\x{660}abc + +/\w(?aW)\w(?-aW)\w/utf,ucp + \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} + +/\w(?a)\w(?-a)\w/utf,ucp + \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} + +# POSIX + +/^[[:digit:]]+$/utf,ucp + 123456 + 123\x{660}456 + +/^[[:digit:]]+$/utf,ucp,ascii_digit + 123456 +\= Expect no match + 123\x{660}456 + +/[[:digit:]]+/g,utf,ucp,ascii_digit + 123\x{660}456 + +/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit + 11 +\= Expect no match + \x{ff11}1 + 1\x{ff11} + +/[[:digit:]]+/utf,ucp,ascii_posix + 123\x{660}456 + +/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp + 11 + \x{ff11}1 +\= Expect no match + 1\x{ff11} + +/>[[:space:]]+\x{a0} \x{a0}< + >\x{a0}\x{a0}\x{a0}< + +/>[[:space:]]+\x{a0} \x{a0}< + +/(?aP)[[:alnum:]]+/i,ucp,utf + abcáxyz + abc\x{660}xyz + +/(?aP)[[:alnum:]\d]+/i,ucp,utf + abc\x{660}xyz + +/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ + \x{660}A\x{660} +\= Expect no match + \x{660}\x{660}\x{660} + +# VARIOUS + +/[\d\s\w]+/a,ucp,utf + 9 A\x{660}À + 9 AÀ\x{660} + +# End PCRE2_EXTRA_ASCII_xxx tests + +/\w+/utf,ucp + --cafe\x{300}_au\x{203f}lait! + +/[\w]+/utf,ucp + --cafe\x{300}_au\x{203f}lait! + +/\b.+?\b/utf,ucp + --cafe\x{300}_au\x{203f}lait! + +/caf\B.+?\B/utf,ucp + --cafe\x{300}_au\x{203f}lait! + +# End of testinput7 diff --git a/testdata/testinput8 b/testdata/testinput8 new file mode 100644 index 0000000..550631d --- /dev/null +++ b/testdata/testinput8 @@ -0,0 +1,189 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ + +/(?s)(.*X|^B)/ + +/(?s:.*X|^B)/ + +/^[[:alnum:]]/ + +/#/Ix + +/a#/Ix + +/x?+/ + +/x++/ + +/x{1,3}+/ + +/(x)*+/ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" + +/(a(?1)b)/ + +/(a(?1)+b)/ + +/a(?Pb|c)d(?Pe)/ + +/(?:a(?Pc(?Pd)))(?Pa)/ + +/(?Pa)...(?P=a)bbb(?P>a)d/ + +/abc(?C255)de(?C)f/ + +/abcde/auto_callout + +/\x{100}/utf + +/\x{1000}/utf + +/\x{10000}/utf + +/\x{100000}/utf + +/\x{10ffff}/utf + +/\x{110000}/utf + +/[\x{ff}]/utf + +/[\x{100}]/utf + +/\x80/utf + +/\xff/utf + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf + +/\x{65e5}\x{672c}\x{8a9e}/I,utf + +/[\x{100}]/utf + +/[Z\x{100}]/utf + +/^[\x{100}\E-\Q\E\x{150}]/utf + +/^[\QĀ\E-\QŐ\E]/utf + +/^[\QĀ\E-\QŐ\E/utf + +/[\p{L}]/ + +/[\p{^L}]/ + +/[\P{L}]/ + +/[\P{^L}]/ + +/[abc\p{L}\x{0660}]/utf + +/[\p{Nd}]/utf + +/[\p{Nd}+-]+/utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf + +/[\x{105}-\x{109}]/i,utf + +/( ( (?(1)0|) )* )/x + +/( (?(1)0|)* )/x + +/[a]/ + +/[a]/utf + +/[\xaa]/ + +/[\xaa]/utf + +/[^a]/ + +/[^a]/utf + +/[^\xaa]/ + +/[^\xaa]/utf + +#pattern -memory + +/[^\d]/utf,ucp + +/[[:^alpha:][:^cntrl:]]+/utf,ucp + +/[[:^cntrl:][:^alpha:]]+/utf,ucp + +/[[:alpha:]]+/utf,ucp + +/[[:^alpha:]\S]+/utf,ucp + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ + +/(((a\2)|(a*)\g<-1>))*a?/ + +/((?+1)(\1))/ + +"(?1)(?#?'){2}(a)" + +/.((?2)(?R)|\1|$)()/ + +/.((?3)(?R)()(?2)|\1|$)()/ + +/(?1)()((((((\1++))\x85)+)|))/ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 + +/(?(1)(?1)){8,}+()/debug + abcd + +/(?(1)|a(?1)b){2,}+()/debug + abcde + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testinput9 b/testdata/testinput9 new file mode 100644 index 0000000..4eb228a --- /dev/null +++ b/testdata/testinput9 @@ -0,0 +1,266 @@ +# This set of tests is run only with the 8-bit library. They must not require +# UTF-8 or Unicode property support. */ + +#forbid_utf +#newline_default lf any anycrlf + +/ab/ +\= Expect error message (too big char) and no match + A\x{123}B + A\o{443}B + +/\x{100}/I + +/\o{400}/I + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/Ix + +/\h/I + +/\H/I + +/\v/I + +/\V/I + +/\R/I + +/[\h]/B + >\x09< + +/[\h]+/B + >\x09\x20\xa0< + +/[\v]/B + +/[\H]/B + +/[^\h]/B + +/[\V]/B + +/[\x0a\V]/B + +/\777/I + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark,alt_verbnames + XX + +/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames + +/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames + +/[^\x00-a]{12,}[^b-\xff]*/B + +/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B + +/(*MARK:a\x{100}b)z/alt_verbnames + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ + +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + +# End of testinput9 diff --git a/testdata/testinputEBC b/testdata/testinputEBC new file mode 100644 index 0000000..36df20b --- /dev/null +++ b/testdata/testinputEBC @@ -0,0 +1,137 @@ +# This is a specialized test for checking, when PCRE2 is compiled with the +# EBCDIC option but in an ASCII environment, that newline, white space, and \c +# functionality is working. It catches cases where explicit values such as 0x0a +# have been used instead of names like CHAR_LF. Needless to say, it is not a +# genuine EBCDIC test! In patterns, alphabetic characters that follow a +# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be +# in EBCDIC, but can of course be specified as escapes. + +# Test default newline and variations + +/^A/m + ABC + 12\x15ABC + +/^A/m,newline=any + 12\x15ABC + 12\x0dABC + 12\x0d\x15ABC + 12\x25ABC + +/^A/m,newline=anycrlf + 12\x15ABC + 12\x0dABC + 12\x0d\x15ABC + ** Fail + 12\x25ABC + +# Test \h + +/^A\ˆ/ + A B + A\x41B + +# Test \H + +/^A\È/ + AB + A\x42B + ** Fail + A B + A\x41B + +# Test \R + +/^A\Ù/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \v + +/^A\¥/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \V + +/^A\å/ + A B + ** Fail + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + +# For repeated items, use an atomic group so that the output is the same +# for DFA matching (otherwise it may show multiple matches). + +# Test \h+ + +/^A(?>\ˆ+)/ + A B + +# Test \H+ + +/^A(?>\È+)/ + AB + ** Fail + A B + +# Test \R+ + +/^A(?>\Ù+)/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \v+ + +/^A(?>\¥+)/ + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + ** Fail + A B + +# Test \V+ + +/^A(?>\å+)/ + A B + ** Fail + A\x15B + A\x0dB + A\x25B + A\x0bB + A\x0cB + +# Test \c functionality + +/\ƒ@\ƒA\ƒb\ƒC\ƒd\ƒE\ƒf\ƒG\ƒh\ƒI\ƒJ\ƒK\ƒl\ƒm\ƒN\ƒO\ƒp\ƒq\ƒr\ƒS\ƒT\ƒu\ƒV\ƒW\ƒX\ƒy\ƒZ/ + \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f + +/\ƒ[\ƒ\\ƒ]\ƒ^\ƒ_/ + \x18\x19\x1a\x1b\x1c\x1d\x1e\x1f + +/\ƒ?/ + A\xffB + +/\ƒ&/ + +# End diff --git a/testdata/testinputheap b/testdata/testinputheap new file mode 100644 index 0000000..2a390f0 --- /dev/null +++ b/testdata/testinputheap @@ -0,0 +1,35 @@ +#pattern framesize, memory + +/abcd/ + abcd\=memory + abcd\=find_limits + +/(((((((((((((((((((((((((((((( (^abc|xyz){1,20}$ ))))))))))))))))))))))))))))))/x + abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcX\=memory + abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcX\=find_limits + +/ab(cd)/ + abcd\=memory + abcd\=memory,ovector=0 + +/\[(a)]{1000}/expand,framesize + \[a]{1000}\=ovector=1 + +# The heapframes_size option gets pcre2test to show the size of the heapframes +# vector that after pcre2_match() has run. Running a match with ovector=0 +# causes the match data block to be freed, thus releasing that vector. + +/\[(a)]{1000}/expand,framesize + \[a]{1000}\=ovector=1,heapframes_size + +/a/heapframes_size,framesize + a\=ovector=0 + +/a|(b){200}/g,expand,heapframes_size + abacus z\[b]{200}z + a\=ovector=0 + +/(a)/replace=>$1< + cat\=heapframes_size + +# End diff --git a/testdata/testoutput1 b/testdata/testoutput1 new file mode 100644 index 0000000..753937f --- /dev/null +++ b/testdata/testoutput1 @@ -0,0 +1,10506 @@ +# This set of tests is for features that are compatible with all versions of +# Perl >= 5.10, in non-UTF mode. It should run clean for the 8-bit, 16-bit, and +# 32-bit PCRE libraries, and also using the perltest.sh script. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + +#forbid_utf +#newline_default lf any anycrlf +#perltest + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox + What do you know about the quick brown fox? + 0: the quick brown fox +\= Expect no match + The quick brown FOX +No match + What do you know about THE QUICK BROWN FOX? +No match + +/The quick brown fox/i + the quick brown fox + 0: the quick brown fox + The quick brown FOX + 0: The quick brown FOX + What do you know about the quick brown fox? + 0: the quick brown fox + What do you know about THE QUICK BROWN FOX? + 0: THE QUICK BROWN FOX + +/abcd\t\n\r\f\a\e\071\x3b\$\\\?caxyz/ + abcd\t\n\r\f\a\e9;\$\\?caxyz + 0: abcd\x09\x0a\x0d\x0c\x07\x1b9;$\?caxyz + +/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/ + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + aabxyzpqrrrabbxyyyypqAzz + 0: aabxyzpqrrrabbxyyyypqAzz + aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz + aabcxyzpqrrrabbxyyyypqAzz + 0: aabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypAzz + 0: aaabcxyzpqrrrabbxyyyypAzz + aaabcxyzpqrrrabbxyyyypqAzz + 0: aaabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqAzz + aaabcxyzpqrrrabbxyyyypqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz + aaaabcxyzpqrrrabbxyyyypqAzz + 0: aaaabcxyzpqrrrabbxyyyypqAzz + abxyzzpqrrrabbxyyyypqAzz + 0: abxyzzpqrrrabbxyyyypqAzz + aabxyzzzpqrrrabbxyyyypqAzz + 0: aabxyzzzpqrrrabbxyyyypqAzz + aaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaabxyzzzzpqrrrabbxyyyypqAzz + aaaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabxyzzzzpqrrrabbxyyyypqAzz + abcxyzzpqrrrabbxyyyypqAzz + 0: abcxyzzpqrrrabbxyyyypqAzz + aabcxyzzzpqrrrabbxyyyypqAzz + 0: aabcxyzzzpqrrrabbxyyyypqAzz + aaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + aaabcxyzpqrrrabbxyyyypABzz + 0: aaabcxyzpqrrrabbxyyyypABzz + aaabcxyzpqrrrabbxyyyypABBzz + 0: aaabcxyzpqrrrabbxyyyypABBzz + >>>aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + >aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + >>>>abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz +\= Expect no match + abxyzpqrrabbxyyyypqAzz +No match + abxyzpqrrrrabbxyyyypqAzz +No match + abxyzpqrrrabxyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyypqAzz +No match + aaabcxyzpqrrrabbxyyyypqqqqqqqAzz +No match + +/^(abc){1,2}zz/ + abczz + 0: abczz + 1: abc + abcabczz + 0: abcabczz + 1: abc +\= Expect no match + zz +No match + abcabcabczz +No match + >>abczz +No match + +/^(b+?|a){1,2}?c/ + bc + 0: bc + 1: b + bbc + 0: bbc + 1: b + bbbc + 0: bbbc + 1: bb + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + aac + 0: aac + 1: a + abbbbbbbbbbbc + 0: abbbbbbbbbbbc + 1: bbbbbbbbbbb + bbbbbbbbbbbac + 0: bbbbbbbbbbbac + 1: a +\= Expect no match + aaac +No match + abbbbbbbbbbbac +No match + +/^(b+|a){1,2}c/ + bc + 0: bc + 1: b + bbc + 0: bbc + 1: bb + bbbc + 0: bbbc + 1: bbb + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + aac + 0: aac + 1: a + abbbbbbbbbbbc + 0: abbbbbbbbbbbc + 1: bbbbbbbbbbb + bbbbbbbbbbbac + 0: bbbbbbbbbbbac + 1: a +\= Expect no match + aaac +No match + abbbbbbbbbbbac +No match + +/^(ba|b*){1,2}?bc/ + babc + 0: babc + 1: ba + bbabc + 0: bbabc + 1: ba + bababc + 0: bababc + 1: ba +\= Expect no match + bababbc +No match + babababc +No match + +/^\ca\cA\c[;\c:/ + \x01\x01\e;z + 0: \x01\x01\x1b;z + +/^[ab\]cde]/ + athing + 0: a + bthing + 0: b + ]thing + 0: ] + cthing + 0: c + dthing + 0: d + ething + 0: e +\= Expect no match + fthing +No match + [thing +No match + \\thing +No match + +/^[]cde]/ + ]thing + 0: ] + cthing + 0: c + dthing + 0: d + ething + 0: e +\= Expect no match + athing +No match + fthing +No match + +/^[^ab\]cde]/ + fthing + 0: f + [thing + 0: [ + \\thing + 0: \ +\= Expect no match + athing +No match + bthing +No match + ]thing +No match + cthing +No match + dthing +No match + ething +No match + +/^[^]cde]/ + athing + 0: a + fthing + 0: f +\= Expect no match + ]thing +No match + cthing +No match + dthing +No match + ething +No match + +/^\/ +  + 0: \x81 + +/^ÿ/ + ÿ + 0: \xff + +/^[0-9]+$/ + 0 + 0: 0 + 1 + 0: 1 + 2 + 0: 2 + 3 + 0: 3 + 4 + 0: 4 + 5 + 0: 5 + 6 + 0: 6 + 7 + 0: 7 + 8 + 0: 8 + 9 + 0: 9 + 10 + 0: 10 + 100 + 0: 100 +\= Expect no match + abc +No match + +/^.*nter/ + enter + 0: enter + inter + 0: inter + uponter + 0: uponter + +/^xxx[0-9]+$/ + xxx0 + 0: xxx0 + xxx1234 + 0: xxx1234 +\= Expect no match + xxx +No match + +/^.+[0-9][0-9][0-9]$/ + x123 + 0: x123 + x1234 + 0: x1234 + xx123 + 0: xx123 + 123456 + 0: 123456 +\= Expect no match + 123 +No match + +/^.+?[0-9][0-9][0-9]$/ + x123 + 0: x123 + x1234 + 0: x1234 + xx123 + 0: xx123 + 123456 + 0: 123456 +\= Expect no match + 123 +No match + +/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ + abc!pqr=apquxz.ixr.zzz.ac.uk + 0: abc!pqr=apquxz.ixr.zzz.ac.uk + 1: abc + 2: pqr +\= Expect no match + !pqr=apquxz.ixr.zzz.ac.uk +No match + abc!=apquxz.ixr.zzz.ac.uk +No match + abc!pqr=apquxz:ixr.zzz.ac.uk +No match + abc!pqr=apquxz.ixr.zzz.ac.ukk +No match + +/:/ + Well, we need a colon: somewhere + 0: : +\= Expect no match + Fail without a colon +No match + +/([\da-f:]+)$/i + 0abc + 0: 0abc + 1: 0abc + abc + 0: abc + 1: abc + fed + 0: fed + 1: fed + E + 0: E + 1: E + :: + 0: :: + 1: :: + 5f03:12C0::932e + 0: 5f03:12C0::932e + 1: 5f03:12C0::932e + fed def + 0: def + 1: def + Any old stuff + 0: ff + 1: ff +\= Expect no match + 0zzz +No match + gzzz +No match + fed\x20 +No match + Any old rubbish +No match + +/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ + .1.2.3 + 0: .1.2.3 + 1: 1 + 2: 2 + 3: 3 + A.12.123.0 + 0: A.12.123.0 + 1: 12 + 2: 123 + 3: 0 +\= Expect no match + .1.2.3333 +No match + 1.2.3 +No match + 1234.2.3 +No match + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 0: 1 IN SOA non-sp1 non-sp2( + 1: 1 + 2: non-sp1 + 3: non-sp2 + 1 IN SOA non-sp1 non-sp2 ( + 0: 1 IN SOA non-sp1 non-sp2 ( + 1: 1 + 2: non-sp1 + 3: non-sp2 +\= Expect no match + 1IN SOA non-sp1 non-sp2( +No match + +/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ + a. + 0: a. + Z. + 0: Z. + 2. + 0: 2. + ab-c.pq-r. + 0: ab-c.pq-r. + 1: .pq-r + sxk.zzz.ac.uk. + 0: sxk.zzz.ac.uk. + 1: .uk + x-.y-. + 0: x-.y-. + 1: .y- +\= Expect no match + -abc.peq. +No match + +/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ + *.a + 0: *.a + *.b0-a + 0: *.b0-a + 1: 0-a + *.c3-b.c + 0: *.c3-b.c + 1: 3-b + 2: .c + *.c-a.b-c + 0: *.c-a.b-c + 1: -a + 2: .b-c + 3: -c +\= Expect no match + *.0 +No match + *.a- +No match + *.a-b.c- +No match + *.c-a.0-c +No match + +/^(?=ab(de))(abd)(e)/ + abde + 0: abde + 1: de + 2: abd + 3: e + +/^(?!(ab)de|x)(abd)(f)/ + abdf + 0: abdf + 1: + 2: abd + 3: f + +/^(?=(ab(cd)))(ab)/ + abcd + 0: ab + 1: abcd + 2: cd + 3: ab + +/^[\da-f](\.[\da-f])*$/i + a.b.c.d + 0: a.b.c.d + 1: .d + A.B.C.D + 0: A.B.C.D + 1: .D + a.b.c.1.2.3.C + 0: a.b.c.1.2.3.C + 1: .C + +/^\".*\"\s*(;.*)?$/ + \"1234\" + 0: "1234" + \"abcd\" ; + 0: "abcd" ; + 1: ; + \"\" ; rhubarb + 0: "" ; rhubarb + 1: ; rhubarb +\= Expect no match + \"1234\" : things +No match + +/^$/ + \ + 0: +\= Expect no match + A non-empty line +No match + +/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x + ab c + 0: ab c +\= Expect no match + abc +No match + ab cde +No match + +/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ + ab c + 0: ab c +\= Expect no match + abc +No match + ab cde +No match + +/^ a\ b[c ]d $/x + a bcd + 0: a bcd + a b d + 0: a b d +\= Expect no match + abcd +No match + ab d +No match + +/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/ + abcdefhijklm + 0: abcdefhijklm + 1: abc + 2: bc + 3: c + 4: def + 5: ef + 6: f + 7: hij + 8: ij + 9: j +10: klm +11: lm +12: m + +/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/ + abcdefhijklm + 0: abcdefhijklm + 1: bc + 2: c + 3: ef + 4: f + 5: ij + 6: j + 7: lm + 8: m + +/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/ + a+ Z0+\x08\n\x1d\x12 + 0: a+ Z0+\x08\x0a\x1d\x12 + +/^[.^$|()*+?{,}]+/ + .^\$(*+)|{?,?} + 0: .^$(*+)|{?,?} + +/^a*\w/ + z + 0: z + az + 0: az + aaaz + 0: aaaz + a + 0: a + aa + 0: aa + aaaa + 0: aaaa + a+ + 0: a + aa+ + 0: aa + +/^a*?\w/ + z + 0: z + az + 0: a + aaaz + 0: a + a + 0: a + aa + 0: a + aaaa + 0: a + a+ + 0: a + aa+ + 0: a + +/^a+\w/ + az + 0: az + aaaz + 0: aaaz + aa + 0: aa + aaaa + 0: aaaa + aa+ + 0: aa + +/^a+?\w/ + az + 0: az + aaaz + 0: aa + aa + 0: aa + aaaa + 0: aa + aa+ + 0: aa + +/^\d{8}\w{2,}/ + 1234567890 + 0: 1234567890 + 12345678ab + 0: 12345678ab + 12345678__ + 0: 12345678__ +\= Expect no match + 1234567 +No match + +/^[aeiou\d]{4,5}$/ + uoie + 0: uoie + 1234 + 0: 1234 + 12345 + 0: 12345 + aaaaa + 0: aaaaa +\= Expect no match + 123456 +No match + +/^[aeiou\d]{4,5}?/ + uoie + 0: uoie + 1234 + 0: 1234 + 12345 + 0: 1234 + aaaaa + 0: aaaa + 123456 + 0: 1234 + +/\A(abc|def)=(\1){2,3}\Z/ + abc=abcabc + 0: abc=abcabc + 1: abc + 2: abc + def=defdefdef + 0: def=defdefdef + 1: def + 2: def +\= Expect no match + abc=defdef +No match + +/^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + abcdefghijkcda2 + 0: abcdefghijkcda2 + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: cd + abcdefghijkkkkcda2 + 0: abcdefghijkkkkcda2 + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: cd + +/(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + cataract cataract23 + 0: cataract cataract23 + 1: cataract + 2: aract + 3: ract + 4: + 5: 3 + catatonic catatonic23 + 0: catatonic catatonic23 + 1: catatonic + 2: atonic + 3: tonic + 4: + 5: 3 + caterpillar caterpillar23 + 0: caterpillar caterpillar23 + 1: caterpillar + 2: erpillar + 3: + 4: + 5: 3 + + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + 1: abcd + +/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + 1: Sep + From abcd Mon Sep 1 12:33:02 1997 + 0: From abcd Mon Sep 1 12:33 + 1: Sep +\= Expect no match + From abcd Sep 01 12:33:02 1997 +No match + +/^12.34/s + 12\n34 + 0: 12\x0a34 + 12\r34 + 0: 12\x0d34 + +/\w+(?=\t)/ + the quick brown\t fox + 0: brown + +/foo(?!bar)(.*)/ + foobar is foolish see? + 0: foolish see? + 1: lish see? + +/(?:(?!foo)...|^.{0,2})bar(.*)/ + foobar crowbar etc + 0: rowbar etc + 1: etc + barrel + 0: barrel + 1: rel + 2barrel + 0: 2barrel + 1: rel + A barrel + 0: A barrel + 1: rel + +/^(\D*)(?=\d)(?!123)/ + abc456 + 0: abc + 1: abc +\= Expect no match + abc123 +No match + +/^1234(?# test newlines + inside)/ + 1234 + 0: 1234 + +/^1234 #comment in extended re + /x + 1234 + 0: 1234 + +/#rhubarb + abcd/x + abcd + 0: abcd + +/^abcd#rhubarb/x + abcd + 0: abcd + +/^(a)\1{2,3}(.)/ + aaab + 0: aaab + 1: a + 2: b + aaaab + 0: aaaab + 1: a + 2: b + aaaaab + 0: aaaaa + 1: a + 2: a + aaaaaab + 0: aaaaa + 1: a + 2: a + +/(?!^)abc/ + the abc + 0: abc +\= Expect no match + abc +No match + +/(?=^)abc/ + abc + 0: abc +\= Expect no match + the abc +No match + +/^[ab]{1,3}(ab*|b)/ + aabbbbb + 0: aabb + 1: b + +/^[ab]{1,3}?(ab*|b)/ + aabbbbb + 0: aabbbbb + 1: abbbbb + +/^[ab]{1,3}?(ab*?|b)/ + aabbbbb + 0: aa + 1: a + +/^[ab]{1,3}(ab*?|b)/ + aabbbbb + 0: aabb + 1: b + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/x + Alan Other + 0: Alan Other + + 0: user@dom.ain + user\@dom.ain + 0: user@dom.ain + \"A. Other\" (a comment) + 0: "A. Other" (a comment) + A. Other (a comment) + 0: Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay + A missing angle @,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +# leading word +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces +(?: +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +| +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +) # "special" comment or quoted string +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal" +)* +< +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# < +(?: +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +(?: , +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +)* # additional domains +: +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address spec +> # > +# name and address +) +/x + Alan Other + 0: Alan Other + + 0: user@dom.ain + user\@dom.ain + 0: user@dom.ain + \"A. Other\" (a comment) + 0: "A. Other" + A. Other (a comment) + 0: Other + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay + A missing angle ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff + +/P[^*]TAIRE[^*]{1,6}?LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/P[^*]TAIRE[^*]{1,}?LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/(\.\d\d[1-9]?)\d+/ + 1.230003938 + 0: .230003938 + 1: .23 + 1.875000282 + 0: .875000282 + 1: .875 + 1.235 + 0: .235 + 1: .23 + +/(\.\d\d((?=0)|\d(?=\d)))/ + 1.230003938 + 0: .23 + 1: .23 + 2: + 1.875000282 + 0: .875 + 1: .875 + 2: 5 +\= Expect no match + 1.235 +No match + +/a(?)b/ + ab + 0: ab + +/\b(foo)\s+(\w+)/i + Food is on the foo table + 0: foo table + 1: foo + 2: table + +/foo(.*)bar/ + The food is under the bar in the barn. + 0: food is under the bar in the bar + 1: d is under the bar in the + +/foo(.*?)bar/ + The food is under the bar in the barn. + 0: food is under the bar + 1: d is under the + +/(.*)(\d*)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 53147 + 2: + +/(.*)(\d+)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: 7 + +/(.*?)(\d*)/ + I have 2 numbers: 53147 + 0: + 1: + 2: + +/(.*?)(\d+)/ + I have 2 numbers: 53147 + 0: I have 2 + 1: I have + 2: 2 + +/(.*)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: 7 + +/(.*?)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: + 2: 53147 + +/(.*)\b(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: + 2: 53147 + +/(.*\D)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: + 2: 53147 + +/^\D*(?!123)/ + ABC123 + 0: AB + +/^(\D*)(?=\d)(?!123)/ + ABC445 + 0: ABC + 1: ABC +\= Expect no match + ABC123 +No match + +/^[W-]46]/ + W46]789 + 0: W46] + -46]789 + 0: -46] +\= Expect no match + Wall +No match + Zebra +No match + 42 +No match + [abcd] +No match + ]abcd[ +No match + +/^[W-\]46]/ + W46]789 + 0: W + Wall + 0: W + Zebra + 0: Z + Xylophone + 0: X + 42 + 0: 4 + [abcd] + 0: [ + ]abcd[ + 0: ] + \\backslash + 0: \ +\= Expect no match + -46]789 +No match + well +No match + +/\d\d\/\d\d\/\d\d\d\d/ + 01/01/2000 + 0: 01/01/2000 + +/word (?:[a-zA-Z0-9]+ ){0,10}otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword + 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark +No match + +/word (?:[a-zA-Z0-9]+ ){0,300}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope +No match + +/^(a){0,0}/ + bcd + 0: + abc + 0: + aab + 0: + +/^(a){0,1}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: a + 1: a + +/^(a){0,2}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + +/^(a){0,3}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + +/^(a){0,}/ + bcd + 0: + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + aaaaaaaa + 0: aaaaaaaa + 1: a + +/^(a){1,1}/ + abc + 0: a + 1: a + aab + 0: a + 1: a +\= Expect no match + bcd +No match + +/^(a){1,2}/ + abc + 0: a + 1: a + aab + 0: aa + 1: a +\= Expect no match + bcd +No match + +/^(a){1,3}/ + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a +\= Expect no match + bcd +No match + +/^(a){1,}/ + abc + 0: a + 1: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: a + aaaaaaaa + 0: aaaaaaaa + 1: a +\= Expect no match + bcd +No match + +/.*\.gif/ + borfle\nbib.gif\nno + 0: bib.gif + +/.{0,}\.gif/ + borfle\nbib.gif\nno + 0: bib.gif + +/.*\.gif/m + borfle\nbib.gif\nno + 0: bib.gif + +/.*\.gif/s + borfle\nbib.gif\nno + 0: borfle\x0abib.gif + +/.*\.gif/ms + borfle\nbib.gif\nno + 0: borfle\x0abib.gif + +/.*$/ + borfle\nbib.gif\nno + 0: no + +/.*$/m + borfle\nbib.gif\nno + 0: borfle + +/.*$/s + borfle\nbib.gif\nno + 0: borfle\x0abib.gif\x0ano + +/.*$/ms + borfle\nbib.gif\nno + 0: borfle\x0abib.gif\x0ano + +/.*$/ + borfle\nbib.gif\nno\n + 0: no + +/.*$/m + borfle\nbib.gif\nno\n + 0: borfle + +/.*$/s + borfle\nbib.gif\nno\n + 0: borfle\x0abib.gif\x0ano\x0a + +/.*$/ms + borfle\nbib.gif\nno\n + 0: borfle\x0abib.gif\x0ano\x0a + +/(.*X|^B)/ + abcde\n1234Xyz + 0: 1234X + 1: 1234X + BarFoo + 0: B + 1: B +\= Expect no match + abcde\nBar +No match + +/(.*X|^B)/m + abcde\n1234Xyz + 0: 1234X + 1: 1234X + BarFoo + 0: B + 1: B + abcde\nBar + 0: B + 1: B + +/(.*X|^B)/s + abcde\n1234Xyz + 0: abcde\x0a1234X + 1: abcde\x0a1234X + BarFoo + 0: B + 1: B +\= Expect no match + abcde\nBar +No match + +/(.*X|^B)/ms + abcde\n1234Xyz + 0: abcde\x0a1234X + 1: abcde\x0a1234X + BarFoo + 0: B + 1: B + abcde\nBar + 0: B + 1: B + +/(?s)(.*X|^B)/ + abcde\n1234Xyz + 0: abcde\x0a1234X + 1: abcde\x0a1234X + BarFoo + 0: B + 1: B +\= Expect no match + abcde\nBar +No match + +/(?s:.*X|^B)/ + abcde\n1234Xyz + 0: abcde\x0a1234X + BarFoo + 0: B +\= Expect no match + abcde\nBar +No match + +/^.*B/ +\= Expect no match + abc\nB +No match + +/(?s)^.*B/ + abc\nB + 0: abc\x0aB + +/(?m)^.*B/ + abc\nB + 0: B + +/(?ms)^.*B/ + abc\nB + 0: abc\x0aB + +/(?ms)^B/ + abc\nB + 0: B + +/(?s)B$/ + B\n + 0: B + +/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/ + 123456654321 + 0: 123456654321 + +/^\d\d\d\d\d\d\d\d\d\d\d\d/ + 123456654321 + 0: 123456654321 + +/^[\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d]/ + 123456654321 + 0: 123456654321 + +/^[abc]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^[a-c]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^(a|b|c){12}/ + abcabcabcabc + 0: abcabcabcabc + 1: c + +/^[abcdefghijklmnopqrstuvwxy0123456789]/ + n + 0: n +\= Expect no match + z +No match + +/abcde{0,0}/ + abcd + 0: abcd +\= Expect no match + abce +No match + +/ab[cd]{0,0}e/ + abe + 0: abe +\= Expect no match + abcde +No match + +/ab(c){0,0}d/ + abd + 0: abd +\= Expect no match + abcd +No match + +/a(b*)/ + a + 0: a + 1: + ab + 0: ab + 1: b + abbbb + 0: abbbb + 1: bbbb +\= Expect no match + bbbbb +No match + +/ab\d{0}e/ + abe + 0: abe +\= Expect no match + ab1e +No match + +/"([^\\"]+|\\.)*"/ + the \"quick\" brown fox + 0: "quick" + 1: quick + \"the \\\"quick\\\" brown fox\" + 0: "the \"quick\" brown fox" + 1: brown fox + +/.*?/g,aftertext + abc + 0: + 0+ abc + 0: a + 0+ bc + 0: + 0+ bc + 0: b + 0+ c + 0: + 0+ c + 0: c + 0+ + 0: + 0+ + +/\b/g,aftertext + abc + 0: + 0+ abc + 0: + 0+ + +/\b/g,aftertext + abc + 0: + 0+ abc + 0: + 0+ + +//g + abc + 0: + 0: + 0: + 0: + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 43.
Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + 0: 43.Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + 1: BGCOLOR='#DBE9E9' + 2: align=left valign=top + 3: 43. + 4: Word Processor
(N-1286) + 5: + 6: + 7: + 8: align=left valign=top + 9: Lega lstaff.com +10: align=left valign=top +11: CA - Statewide + +/a[^a]b/ + acb + 0: acb + a\nb + 0: a\x0ab + +/a.b/ + acb + 0: acb +\= Expect no match + a\nb +No match + +/a[^a]b/s + acb + 0: acb + a\nb + 0: a\x0ab + +/a.b/s + acb + 0: acb + a\nb + 0: a\x0ab + +/^(b+?|a){1,2}?c/ + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + bbbac + 0: bbbac + 1: a + bbbbac + 0: bbbbac + 1: a + bbbbbac + 0: bbbbbac + 1: a + +/^(b+|a){1,2}?c/ + bac + 0: bac + 1: a + bbac + 0: bbac + 1: a + bbbac + 0: bbbac + 1: a + bbbbac + 0: bbbbac + 1: a + bbbbbac + 0: bbbbbac + 1: a + +/(?!\A)x/m + a\bx\n + 0: x + a\nx\n + 0: x +\= Expect no match + x\nb\n +No match + +/(A|B)*?CD/ + CD + 0: CD + +/(A|B)*CD/ + CD + 0: CD + +/(AB)*?\1/ + ABABAB + 0: ABAB + 1: AB + +/(AB)*\1/ + ABABAB + 0: ABABAB + 1: AB + +/(?.*\/)foo/ + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ +No match + +/(?>(\.\d\d[1-9]?))\d+/ + 1.230003938 + 0: .230003938 + 1: .23 + 1.875000282 + 0: .875000282 + 1: .875 +\= Expect no match + 1.235 +No match + +/^((?>\w+)|(?>\s+))*$/ + now is the time for all good men to come to the aid of the party + 0: now is the time for all good men to come to the aid of the party + 1: party +\= Expect no match + this is not a line with only words and spaces! +No match + +/(\d+)(\w)/ + 12345a + 0: 12345a + 1: 12345 + 2: a + 12345+ + 0: 12345 + 1: 1234 + 2: 5 + +/((?>\d+))(\w)/ + 12345a + 0: 12345a + 1: 12345 + 2: a +\= Expect no match + 12345+ +No match + +/(?>a+)b/ + aaab + 0: aaab + +/((?>a+)b)/ + aaab + 0: aaab + 1: aaab + +/(?>(a+))b/ + aaab + 0: aaab + 1: aaa + +/(?>b)+/ + aaabbbccc + 0: bbb + +/(?>a+|b+|c+)*c/ + aaabbbbccccd + 0: aaabbbbc + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: x + +/\(((?>[^()]+)|\([^()]+\))+\)/ + (abc) + 0: (abc) + 1: abc + (abc(def)xyz) + 0: (abc(def)xyz) + 1: xyz +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/a(?-i)b/i + ab + 0: ab + Ab + 0: Ab +\= Expect no match + aB +No match + AB +No match + +/(a (?x)b c)d e/ + a bcd e + 0: a bcd e + 1: a bc +\= Expect no match + a b cd e +No match + abcd e +No match + a bcde +No match + +/(a b(?x)c d (?-x)e f)/ + a bcde f + 0: a bcde f + 1: a bcde f +\= Expect no match + abcdef +No match + +/(a(?i)b)c/ + abc + 0: abc + 1: ab + aBc + 0: aBc + 1: aB +\= Expect no match + abC +No match + aBC +No match + Abc +No match + ABc +No match + ABC +No match + AbC +No match + +/a(?i:b)c/ + abc + 0: abc + aBc + 0: aBc +\= Expect no match + ABC +No match + abC +No match + aBC +No match + +/a(?i:b)*c/ + aBc + 0: aBc + aBBc + 0: aBBc +\= Expect no match + aBC +No match + aBBC +No match + +/a(?=b(?i)c)\w\wd/ + abcd + 0: abcd + abCd + 0: abCd +\= Expect no match + aBCd +No match + abcD +No match + +/(?s-i:more.*than).*million/i + more than million + 0: more than million + more than MILLION + 0: more than MILLION + more \n than Million + 0: more \x0a than Million +\= Expect no match + MORE THAN MILLION +No match + more \n than \n million +No match + +/(?:(?s-i)more.*than).*million/i + more than million + 0: more than million + more than MILLION + 0: more than MILLION + more \n than Million + 0: more \x0a than Million +\= Expect no match + MORE THAN MILLION +No match + more \n than \n million +No match + +/(?>a(?i)b+)+c/ + abc + 0: abc + aBbc + 0: aBbc + aBBc + 0: aBBc +\= Expect no match + Abc +No match + abAb +No match + abbC +No match + +/(?=a(?i)b)\w\wc/ + abc + 0: abc + aBc + 0: aBc +\= Expect no match + Ab +No match + abC +No match + aBC +No match + +/(?<=a(?i)b)(\w\w)c/ + abxxc + 0: xxc + 1: xx + aBxxc + 0: xxc + 1: xx +\= Expect no match + Abxxc +No match + ABxxc +No match + abxxC +No match + +/(?:(a)|b)(?(1)A|B)/ + aA + 0: aA + 1: a + bB + 0: bB +\= Expect no match + aB +No match + bA +No match + +/^(a)?(?(1)a|b)+$/ + aa + 0: aa + 1: a + b + 0: b + bb + 0: bb +\= Expect no match + ab +No match + +# Perl gets this next one wrong if the pattern ends with $; in that case it +# fails to match "12". + +/^(?(?=abc)\w{3}:|\d\d)/ + abc: + 0: abc: + 12 + 0: 12 + 123 + 0: 12 +\= Expect no match + xyz +No match + +/^(?(?!abc)\d\d|\w{3}:)$/ + abc: + 0: abc: + 12 + 0: 12 +\= Expect no match + 123 +No match + xyz +No match + +/(?(?<=foo)bar|cat)/ + foobar + 0: bar + cat + 0: cat + fcat + 0: cat + focat + 0: cat +\= Expect no match + foocat +No match + +/(?(?a*)*/ + a + 0: a + aa + 0: aa + aaaa + 0: aaaa + +/(abc|)+/ + abc + 0: abc + 1: + abcabc + 0: abcabc + 1: + abcabcabc + 0: abcabcabc + 1: + xyz + 0: + 1: + +/([a]*)*/ + a + 0: a + 1: + aaaaa + 0: aaaaa + 1: + +/([ab]*)*/ + a + 0: a + 1: + b + 0: b + 1: + ababab + 0: ababab + 1: + aaaabcde + 0: aaaab + 1: + bbbb + 0: bbbb + 1: + +/([^a]*)*/ + b + 0: b + 1: + bbbb + 0: bbbb + 1: + aaa + 0: + 1: + +/([^ab]*)*/ + cccc + 0: cccc + 1: + abab + 0: + 1: + +/([a]*?)*/ + a + 0: + 1: + aaaa + 0: + 1: + +/([ab]*?)*/ + a + 0: + 1: + b + 0: + 1: + abab + 0: + 1: + baba + 0: + 1: + +/([^a]*?)*/ + b + 0: + 1: + bbbb + 0: + 1: + aaa + 0: + 1: + +/([^ab]*?)*/ + c + 0: + 1: + cccc + 0: + 1: + baba + 0: + 1: + +/(?>a*)*/ + a + 0: a + aaabcde + 0: aaa + +/((?>a*))*/ + aaaaa + 0: aaaaa + 1: + aabbaa + 0: aa + 1: + +/((?>a*?))*/ + aaaaa + 0: + 1: + aabbaa + 0: + 1: + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x + 12-sep-98 + 0: 12-sep-98 + 12-09-98 + 0: 12-09-98 +\= Expect no match + sep-12-98 +No match + +/(?<=(foo))bar\1/ + foobarfoo + 0: barfoo + 1: foo + foobarfootling + 0: barfoo + 1: foo +\= Expect no match + foobar +No match + barfoo +No match + +/(?i:saturday|sunday)/ + saturday + 0: saturday + sunday + 0: sunday + Saturday + 0: Saturday + Sunday + 0: Sunday + SATURDAY + 0: SATURDAY + SUNDAY + 0: SUNDAY + SunDay + 0: SunDay + +/(a(?i)bc|BB)x/ + abcx + 0: abcx + 1: abc + aBCx + 0: aBCx + 1: aBC + bbx + 0: bbx + 1: bb + BBx + 0: BBx + 1: BB +\= Expect no match + abcX +No match + aBCX +No match + bbX +No match + BBX +No match + +/^([ab](?i)[cd]|[ef])/ + ac + 0: ac + 1: ac + aC + 0: aC + 1: aC + bD + 0: bD + 1: bD + elephant + 0: e + 1: e + Europe + 0: E + 1: E + frog + 0: f + 1: f + France + 0: F + 1: F +\= Expect no match + Africa +No match + +/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/ + ab + 0: ab + 1: ab + aBd + 0: aBd + 1: aBd + xy + 0: xy + 1: xy + xY + 0: xY + 1: xY + zebra + 0: z + 1: z + Zambesi + 0: Z + 1: Z +\= Expect no match + aCD +No match + XY +No match + +/(?<=foo\n)^bar/m + foo\nbar + 0: bar +\= Expect no match + bar +No match + baz\nbar +No match + +/(?<=(?]&/ + <&OUT + 0: <& + +/^(a\1?){4}$/ + aaaaaaaaaa + 0: aaaaaaaaaa + 1: aaaa +\= Expect no match + AB +No match + aaaaaaaaa +No match + aaaaaaaaaaa +No match + +/^(a(?(1)\1)){4}$/ + aaaaaaaaaa + 0: aaaaaaaaaa + 1: aaaa +\= Expect no match + aaaaaaaaa +No match + aaaaaaaaaaa +No match + +/(?<=a)b/ + ab + 0: b +\= Expect no match + cb +No match + b +No match + +/(? + 2: abcd + xy:z:::abcd + 0: xy:z:::abcd + 1: xy:z::: + 2: abcd + +/^[^bcd]*(c+)/ + aexycd + 0: aexyc + 1: c + +/(a*)b+/ + caab + 0: aab + 1: aa + +/([\w:]+::)?(\w+)$/ + abcd + 0: abcd + 1: + 2: abcd + xy:z:::abcd + 0: xy:z:::abcd + 1: xy:z::: + 2: abcd +\= Expect no match + abcd: +No match + abcd: +No match + +/^[^bcd]*(c+)/ + aexycd + 0: aexyc + 1: c + +/(>a+)ab/ + +/(?>a+)b/ + aaab + 0: aaab + +/([[:]+)/ + a:[b]: + 0: :[ + 1: :[ + +/([[=]+)/ + a=[b]= + 0: =[ + 1: =[ + +/([[.]+)/ + a.[b]. + 0: .[ + 1: .[ + +/((?>a+)b)/ + aaab + 0: aaab + 1: aaab + +/(?>(a+))b/ + aaab + 0: aaab + 1: aaa + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: x + +/a\Z/ +\= Expect no match + aaab +No match + a\nb\n +No match + +/b\Z/ + a\nb\n + 0: b + +/b\z/ + +/b\Z/ + a\nb + 0: b + +/b\z/ + a\nb + 0: b + +/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ + a + 0: a + 1: + abc + 0: abc + 1: + a-b + 0: a-b + 1: + 0-9 + 0: 0-9 + 1: + a.b + 0: a.b + 1: + 5.6.7 + 0: 5.6.7 + 1: + the.quick.brown.fox + 0: the.quick.brown.fox + 1: + a100.b200.300c + 0: a100.b200.300c + 1: + 12-ab.1245 + 0: 12-ab.1245 + 1: +\= Expect no match + \ +No match + .a +No match + -a +No match + a- +No match + a. +No match + a_b +No match + a.- +No match + a.. +No match + ab..bc +No match + the.quick.brown.fox- +No match + the.quick.brown.fox. +No match + the.quick.brown.fox_ +No match + the.quick.brown.fox+ +No match + +/(?>.*)(?<=(abcd|wxyz))/ + alphabetabcd + 0: alphabetabcd + 1: abcd + endingwxyz + 0: endingwxyz + 1: wxyz +\= Expect no match + a rather long string that doesn't end with one of them +No match + +/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword + 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark +No match + +/word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope +No match + +/(?<=\d{3}(?!999))foo/ + 999foo + 0: foo + 123999foo + 0: foo +\= Expect no match + 123abcfoo +No match + +/(?<=(?!...999)\d{3})foo/ + 999foo + 0: foo + 123999foo + 0: foo +\= Expect no match + 123abcfoo +No match + +/(?<=\d{3}(?!999)...)foo/ + 123abcfoo + 0: foo + 123456foo + 0: foo +\= Expect no match + 123999foo +No match + +/(?<=\d{3}...)(? + 2: + 3: abcd +
+ 2: + 3: abcd + \s*)=(?>\s*) # find + 2: + 3: abcd + Z)+|A)*/ + ZABCDEFG + 0: ZA + 1: A + +/((?>)+|A)*/ + ZABCDEFG + 0: + 1: + +/a*/g + abbab + 0: a + 0: + 0: + 0: a + 0: + 0: + +/[[:space:]]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/[[:blank:]]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09 + +/[\s]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/\s+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/a b/x + ab + 0: ab + +/(?!\A)x/m + a\nxb\n + 0: x + +/(?!^)x/m +\= Expect no match + a\nxb\n +No match + +/abc\Qabc\Eabc/ + abcabcabc + 0: abcabcabc + +/abc\Q(*+|\Eabc/ + abc(*+|abc + 0: abc(*+|abc + +/ abc\Q abc\Eabc/x + abc abcabc + 0: abc abcabc +\= Expect no match + abcabcabc +No match + +/abc#comment + \Q#not comment + literal\E/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal\E #more comment + /x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal\E #more comment/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/\Qabc\$xyz\E/ + abc\\\$xyz + 0: abc\$xyz + +/\Qabc\E\$\Qxyz\E/ + abc\$xyz + 0: abc$xyz + +/\Gabc/ + abc + 0: abc +\= Expect no match + xyzabc +No match + +/\Gabc./g + abc1abc2xyzabc3 + 0: abc1 + 0: abc2 + +/abc./g + abc1abc2xyzabc3 + 0: abc1 + 0: abc2 + 0: abc3 + +/a(?x: b c )d/ + XabcdY + 0: abcd +\= Expect no match + Xa b c d Y +No match + +/((?x)x y z | a b c)/ + XabcY + 0: abc + 1: abc + AxyzB + 0: xyz + 1: xyz + +/(?i)AB(?-i)C/ + XabCY + 0: abC +\= Expect no match + XabcY +No match + +/((?i)AB(?-i)C|D)E/ + abCE + 0: abCE + 1: abC + DE + 0: DE + 1: D +\= Expect no match + abcE +No match + abCe +No match + dE +No match + De +No match + +/(.*)\d+\1/ + abc123abc + 0: abc123abc + 1: abc + abc123bc + 0: bc123bc + 1: bc + +/(.*)\d+\1/s + abc123abc + 0: abc123abc + 1: abc + abc123bc + 0: bc123bc + 1: bc + +/((.*))\d+\1/ + abc123abc + 0: abc123abc + 1: abc + 2: abc + abc123bc + 0: bc123bc + 1: bc + 2: bc + +# This tests for an IPv6 address in the form where it can have up to +# eight components, one and only one of which is empty. This must be +# an internal component. + +/^(?!:) # colon disallowed at start + (?: # start of item + (?: [0-9a-f]{1,4} | # 1-4 hex digits or + (?(1)0 | () ) ) # if null previously matched, fail; else null + : # followed by colon + ){1,7} # end item; 1-7 of them required + [0-9a-f]{1,4} $ # final hex number at end of string + (?(1)|.) # check that there was an empty component + /ix + a123::a123 + 0: a123::a123 + 1: + a123:b342::abcd + 0: a123:b342::abcd + 1: + a123:b342::324e:abcd + 0: a123:b342::324e:abcd + 1: + a123:ddde:b342::324e:abcd + 0: a123:ddde:b342::324e:abcd + 1: + a123:ddde:b342::324e:dcba:abcd + 0: a123:ddde:b342::324e:dcba:abcd + 1: + a123:ddde:9999:b342::324e:dcba:abcd + 0: a123:ddde:9999:b342::324e:dcba:abcd + 1: +\= Expect no match + 1:2:3:4:5:6:7:8 +No match + a123:bce:ddde:9999:b342::324e:dcba:abcd +No match + a123::9999:b342::324e:dcba:abcd +No match + abcde:2:3:4:5:6:7:8 +No match + ::1 +No match + abcd:fee0:123:: +No match + :1 +No match + 1: +No match + +/[z\Qa-d]\E]/ + z + 0: z + a + 0: a + - + 0: - + d + 0: d + ] + 0: ] +\= Expect no match + b +No match + +/(a+)*b/ +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?i)reg(?:ul(?:[aä]|ae)r|ex)/ + REGular + 0: REGular + regulaer + 0: regulaer + Regex + 0: Regex + regulär + 0: regul\xe4r + +/Åæåä[à-ÿÀ-ß]+/ + Åæåäà + 0: \xc5\xe6\xe5\xe4\xe0 + Åæåäÿ + 0: \xc5\xe6\xe5\xe4\xff + ÅæåäÀ + 0: \xc5\xe6\xe5\xe4\xc0 + Åæåäß + 0: \xc5\xe6\xe5\xe4\xdf + +/(?<=Z)X./ + \x84XAZXB + 0: XB + +/ab cd (?x) de fg/ + ab cd defg + 0: ab cd defg + +/ab cd(?x) de fg/ + ab cddefg + 0: ab cddefg +\= Expect no match + abcddefg +No match + +/(? + 2: + D + 0: D + 1: + 2: + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 1: +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?>a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?:a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/\Z/g + abc\n + 0: + 0: + +/^(?s)(?>.*)(? + 2: a + +/(?>(a))b|(a)c/ + ac + 0: ac + 1: + 2: a + +/(?=(a))ab|(a)c/ + ac + 0: ac + 1: + 2: a + +/((?>(a))b|(a)c)/ + ac + 0: ac + 1: ac + 2: + 3: a + +/((?>(a))b|(a)c)++/ + ac + 0: ac + 1: ac + 2: + 3: a + +/(?:(?>(a))b|(a)c)++/ + ac + 0: ac + 1: + 2: a + +/(?=(?>(a))b|(a)c)(..)/ + ac + 0: ac + 1: + 2: a + 3: ac + +/(?>(?>(a))b|(a)c)/ + ac + 0: ac + 1: + 2: a + +/(?:(?>([ab])))+a=/aftertext + =ba= + 0: ba= + 0+ + 1: b + +/(?>([ab]))+a=/aftertext + =ba= + 0: ba= + 0+ + 1: b + +/((?>(a+)b)+(aabab))/ + aaaabaaabaabab + 0: aaaabaaabaabab + 1: aaaabaaabaabab + 2: aaa + 3: aabab + +/(?>a+|ab)+?c/ +\= Expect no match + aabc +No match + +/(?>a+|ab)+c/ +\= Expect no match + aabc +No match + +/(?:a+|ab)+c/ + aabc + 0: aabc + +/(?(?=(a))a)/ + a + 0: a + 1: a + +/(?(?=(a))a)(b)/ + ab + 0: ab + 1: a + 2: b + +/^(?:a|ab)++c/ +\= Expect no match + aaaabc +No match + +/^(?>a|ab)++c/ +\= Expect no match + aaaabc +No match + +/^(?:a|ab)+c/ + aaaabc + 0: aaaabc + +/(?=abc){3}abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc)+abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc)++abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc){0}xyz/ + xyz + 0: xyz + +/(?=abc){1}xyz/ +\= Expect no match + xyz +No match + +/(?=(a))?./ + ab + 0: a + 1: a + bc + 0: b + +/(?=(a))??./ + ab + 0: a + bc + 0: b + +/^(?=(?1))?[az]([abc])d/ + abd + 0: abd + 1: b + zcdxx + 0: zcd + 1: c + +/^(?!a){0}\w+/ + aaaaa + 0: aaaaa + +/(?<=(abc))?xyz/ + abcxyz + 0: xyz + 1: abc + pqrxyz + 0: xyz + +/^[\g]+/ + ggg<<>> + 0: ggg<<>> +\= Expect no match + \\ga +No match + +/^[\ga]+/ + gggagagaxyz + 0: gggagaga + +/^[:a[:digit:]]+/ + aaaa444:::Z + 0: aaaa444::: + +/^[:a[:digit:]:b]+/ + aaaa444:::bbbZ + 0: aaaa444:::bbb + +/[:a]xxx[b:]/ + :xxx: + 0: :xxx: + +/(?<=a{2})b/i + xaabc + 0: b +\= Expect no match + xabc +No match + +/(?XNNNYZ + 0: XNNNYZ + > X NYQZ + 0: X NYQZ +\= Expect no match + >XYZ +No match + > X NY Z +No match + +/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/ + >XY\x0aZ\x0aA\x0bNN\x0c + 0: XY\x0aZ\x0aA\x0bNN\x0c + >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + 0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + +/(foo)\Kbar/ + foobar + 0: bar + 1: foo + +/(foo)(\Kbar|baz)/ + foobar + 0: bar + 1: foo + 2: bar + foobaz + 0: foobaz + 1: foo + 2: baz + +/(foo\Kbar)baz/ + foobarbaz + 0: barbaz + 1: foobar + +/abc\K|def\K/g,aftertext + Xabcdefghi + 0: + 0+ defghi + 0: + 0+ ghi + +/ab\Kc|de\Kf/g,aftertext + Xabcdefghi + 0: c + 0+ defghi + 0: f + 0+ ghi + +/(?=C)/g,aftertext + ABCDECBA + 0: + 0+ CDECBA + 0: + 0+ CBA + +/^abc\K/aftertext + abcdef + 0: + 0+ def +\= Expect no match + defabcxyz +No match + +/^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-2}Z/ + ababababbbabZXXXX + 0: ababababbbabZ + 1: ab + 2: b + +/(?tom|bon)-\g{A}/ + tom-tom + 0: tom-tom + 1: tom + bon-bon + 0: bon-bon + 1: bon + +/(^(a|b\g{-1}))/ +\= Expect no match + bacxxx +No match + +/(?|(abc)|(xyz))\1/ + abcabc + 0: abcabc + 1: abc + xyzxyz + 0: xyzxyz + 1: xyz +\= Expect no match + abcxyz +No match + xyzabc +No match + +/(?|(abc)|(xyz))(?1)/ + abcabc + 0: abcabc + 1: abc + xyzabc + 0: xyzabc + 1: xyz +\= Expect no match + xyzxyz +No match + +/^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: c + 4: d + 5: Y + +/^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: + 4: + 5: c + 6: d + 7: Y + +/^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: + 4: + 5: c + 6: d + 7: Y + +/(?'abc'\w+):\k{2}/ + a:aaxyz + 0: a:aa + 1: a + ab:ababxyz + 0: ab:abab + 1: ab +\= Expect no match + a:axyz +No match + ab:abxyz +No match + +/(?'abc'\w+):\g{abc}{2}/ + a:aaxyz + 0: a:aa + 1: a + ab:ababxyz + 0: ab:abab + 1: ab +\= Expect no match + a:axyz +No match + ab:abxyz +No match + +/^(?a)? (?()b|c) (?('ab')d|e)/x + abd + 0: abd + 1: a + ce + 0: ce + +/^(a.)\g-1Z/ + aXaXZ + 0: aXaXZ + 1: aX + +/^(a.)\g{-1}Z/ + aXaXZ + 0: aXaXZ + 1: aX + +/^(?(DEFINE) (? a) (? b) ) (?&A) (?&B) /x + abcd + 0: ab + +/(?(?:(?:a(?&all))|(b))(c?))/ + aabc + 0: aabc + 1: aabc + 2: + 3: + +/(a(b)|(c))(?1)/ + abc + 0: abc + 1: ab + 2: b + cab + 0: cab + 1: c + 2: + 3: c + +/(?1)(a(b)|(c))/ + abc + 0: abc + 1: c + 2: + 3: c + cab + 0: cab + 1: ab + 2: b + +/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x + metcalfe 33 + 0: metcalfe 33 + 1: metcalfe + 2: 33 + +/(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + 1.2.3.4 + 0: 1.2.3.4 + 1: + 2: .4 + 131.111.10.206 + 0: 131.111.10.206 + 1: + 2: .206 + 10.0.0.0 + 0: 10.0.0.0 + 1: + 2: .0 +\= Expect no match + 10.6 +No match + 455.3.4.5 +No match + +/\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + 1.2.3.4 + 0: 1.2.3.4 + 1: .4 + 131.111.10.206 + 0: 131.111.10.206 + 1: .206 + 10.0.0.0 + 0: 10.0.0.0 + 1: .0 +\= Expect no match + 10.6 +No match + 455.3.4.5 +No match + +/^(\w++|\s++)*$/ + now is the time for all good men to come to the aid of the party + 0: now is the time for all good men to come to the aid of the party + 1: party +\= Expect no match + this is not a line with only words and spaces! +No match + +/(\d++)(\w)/ + 12345a + 0: 12345a + 1: 12345 + 2: a +\= Expect no match + 12345+ +No match + +/a++b/ + aaab + 0: aaab + +/(a++b)/ + aaab + 0: aaab + 1: aaab + +/(a++)b/ + aaab + 0: aaab + 1: aaa + +/([^()]++|\([^()]*\))+/ + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: x + +/\(([^()]++|\([^()]+\))+\)/ + (abc) + 0: (abc) + 1: abc + (abc(def)xyz) + 0: (abc(def)xyz) + 1: xyz +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/^([^()]|\((?1)*\))*$/ + abc + 0: abc + 1: c + a(b)c + 0: a(b)c + 1: c + a(b(c))d + 0: a(b(c))d + 1: d +\= Expect no match) + a(b(c)d +No match + +/^>abc>([^()]|\((?1)*\))*abc>123abc>123abc>1(2)3abc>1(2)3abc>(1(2)3)abc>(1(2)3) + 2: + 3: Satanoscillatemymetallicsonatas + 4: S + AmanaplanacanalPanama + 0: AmanaplanacanalPanama + 1: + 2: + 3: AmanaplanacanalPanama + 4: A + AblewasIereIsawElba + 0: AblewasIereIsawElba + 1: + 2: + 3: AblewasIereIsawElba + 4: A +\= Expect no match + Thequickbrownfox +No match + +/^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/ + 12 + 0: 12 + 1: 12 + (((2+2)*-3)-7) + 0: (((2+2)*-3)-7) + 1: (((2+2)*-3)-7) + 2: - + -12 + 0: -12 + 1: -12 +\= Expect no match + ((2+2)*-3)-7) +No match + +/^(x(y|(?1){2})z)/ + xyz + 0: xyz + 1: xyz + 2: y + xxyzxyzz + 0: xxyzxyzz + 1: xxyzxyzz + 2: xyzxyz +\= Expect no match + xxyzz +No match + xxyzxyzxyzz +No match + +/((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/x + <> + 0: <> + 1: <> + 2: <> + + 0: + 1: + 2: + hij> + 0: hij> + 1: hij> + 2: hij> + hij> + 0: + 1: + 2: + def> + 0: def> + 1: def> + 2: def> + + 0: <> + 1: <> + 2: <> +\= Expect no match + + 2: + 3: Satan, oscillate my metallic sonatas + 4: S + A man, a plan, a canal: Panama! + 0: A man, a plan, a canal: Panama! + 1: + 2: + 3: A man, a plan, a canal: Panama + 4: A + Able was I ere I saw Elba. + 0: Able was I ere I saw Elba. + 1: + 2: + 3: Able was I ere I saw Elba + 4: A +\= Expect no match + The quick brown fox +No match + +/^((.)(?1)\2|.)$/ + a + 0: a + 1: a + aba + 0: aba + 1: aba + 2: a + aabaa + 0: aabaa + 1: aabaa + 2: a + abcdcba + 0: abcdcba + 1: abcdcba + 2: a + pqaabaaqp + 0: pqaabaaqp + 1: pqaabaaqp + 2: p + ablewasiereisawelba + 0: ablewasiereisawelba + 1: ablewasiereisawelba + 2: a +\= Expect no match + rhubarb +No match + the quick brown fox +No match + +/(a)(?<=b(?1))/ + baz + 0: a + 1: a +\= Expect no match + caz +No match + +/(?<=b(?1))(a)/ + zbaaz + 0: a + 1: a +\= Expect no match + aaa +No match + +/(?a)(?<=b(?&X))/ + baz + 0: a + 1: a + +/^(?|(abc)|(def))\1/ + abcabc + 0: abcabc + 1: abc + defdef + 0: defdef + 1: def +\= Expect no match + abcdef +No match + defabc +No match + +/^(?|(abc)|(def))(?1)/ + abcabc + 0: abcabc + 1: abc + defabc + 0: defabc + 1: def +\= Expect no match + defdef +No match + abcdef +No match + +/(?:a(? (?')|(?")) |b(? (?')|(?")) ) (?('quote')[a-z]+|[0-9]+)/x,dupnames + a\"aaaaa + 0: a"aaaaa + 1: " + 2: + 3: " + b\"aaaaa + 0: b"aaaaa + 1: + 2: + 3: + 4: " + 5: + 6: " +\= Expect no match + b\"11111 +No match + +/(?:(?1)|B)(A(*F)|C)/ + ABCD + 0: BC + 1: C + CCD + 0: CC + 1: C +\= Expect no match + CAD +No match + +/^(?:(?1)|B)(A(*F)|C)/ + CCD + 0: CC + 1: C + BCD + 0: BC + 1: C +\= Expect no match + ABCD +No match + CAD +No match + BAD +No match + +/(?:(?1)|B)(A(*ACCEPT)XX|C)D/ + AAD + 0: AA + 1: A + ACD + 0: ACD + 1: C + BAD + 0: BA + 1: A + BCD + 0: BCD + 1: C + BAX + 0: BA + 1: A +\= Expect no match + ACX +No match + ABC +No match + +/(?(DEFINE)(A))B(?1)C/ + BAC + 0: BAC + +/(?(DEFINE)((A)\2))B(?1)C/ + BAAC + 0: BAAC + +/(? \( ( [^()]++ | (?&pn) )* \) )/x + (ab(cd)ef) + 0: (ab(cd)ef) + 1: (ab(cd)ef) + 2: ef + +/^(?=a(*SKIP)b|ac)/ +\= Expect no match + ac +No match + +/^(?=a(*PRUNE)b)/ + ab + 0: +\= Expect no match + ac +No match + +/^(?=a(*ACCEPT)b)/ + ac + 0: + +/(?>a\Kb)/ + ab + 0: b + +/((?>a\Kb))/ + ab + 0: b + 1: ab + +/(a\Kb)/ + ab + 0: b + 1: ab + +/^a\Kcz|ac/ + ac + 0: ac + +/(?>a\Kbz|ab)/ + ab + 0: ab + +/^(?&t)(?(DEFINE)(?a\Kb))$/ + ab + 0: b + +/^([^()]|\((?1)*\))*$/ + a(b)c + 0: a(b)c + 1: c + a(b(c)d)e + 0: a(b(c)d)e + 1: e + +/(?P(?P0)(?P>L1)|(?P>L2))/ + 0 + 0: 0 + 1: 0 + 00 + 0: 00 + 1: 00 + 2: 0 + 0000 + 0: 0000 + 1: 0000 + 2: 0 + +/(?P(?P0)|(?P>L2)(?P>L1))/ + 0 + 0: 0 + 1: 0 + 2: 0 + 00 + 0: 0 + 1: 0 + 2: 0 + 0000 + 0: 0 + 1: 0 + 2: 0 + +# This one does fail, as expected, in Perl. It needs the complex item at the +# end of the pattern. A single letter instead of (B|D) makes it not fail, which +# I think is a Perl bug. + +/A(*COMMIT)(B|D)/ +\= Expect no match + ACABX +No match + +# Check the use of names for failure + +/^(A(*PRUNE:A)B|C(*PRUNE:B)D)/mark +\= Expect no match + AC +No match, mark = A + CB +No match, mark = B + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C + 0: C + 1: C +MK: A +\= Expect no match + D +No match, mark = A + +/^(A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB +No match, mark = B + +/^(?:A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB +No match, mark = B + +/^(?>A(*THEN:A)B|C(*THEN:B)D)/mark +\= Expect no match + CB +No match, mark = B + +# This should succeed, as the skip causes bump to offset 1 (the mark). Note +# that we have to have something complicated such as (B|Z) at the end because, +# for Perl, a simple character somehow causes an unwanted optimization to mess +# with the handling of backtracking verbs. + +/A(*MARK:A)A+(*SKIP:A)(B|Z) | AC/x,mark + AAAC + 0: AC + +# Test skipping over a non-matching mark. + +/A(*MARK:A)A+(*MARK:B)(*SKIP:A)(B|Z) | AC/x,mark + AAAC + 0: AC + +# Check shorthand for MARK. + +/A(*:A)A+(*SKIP:A)(B|Z) | AC/x,mark + AAAC + 0: AC + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC +No match, mark = A + +# This should succeed, as a non-existent skip name disables the skip. + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/x,mark + AAAC + 0: AC + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/x,mark + AAAC + 0: AC +MK: B + +# COMMIT at the start of a pattern should act like an anchor. Again, however, +# we need the complication for Perl. + +/(*COMMIT)(A|P)(B|P)(C|P)/ + ABCDEFG + 0: ABC + 1: A + 2: B + 3: C +\= Expect no match + DEFGABC +No match + +# COMMIT inside an atomic group can't stop backtracking over the group. + +/(\w+)(?>b(*COMMIT))\w{2}/ + abbb + 0: abbb + 1: a + +/(\w+)b(*COMMIT)\w{2}/ +\= Expect no match + abbb +No match + +# Check opening parens in comment when seeking forward reference. + +/(?&t)(?#()(?(DEFINE)(?a))/ + bac + 0: a + +# COMMIT should override THEN. + +/(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ +\= Expect no match + yes +No match + +/(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ +\= Expect no match + yes +No match + +/b?(*SKIP)c/ + bc + 0: bc + abc + 0: bc + +/(*SKIP)bc/ +\= Expect no match + a +No match + +/(*SKIP)b/ +\= Expect no match + a +No match + +/(?P(?P=abn)xxx|)+/ + xxx + 0: + 1: + +/(?i:([^b]))(?1)/ + aa + 0: aa + 1: a + aA + 0: aA + 1: a +\= Expect no match + ab +No match + aB +No match + Ba +No match + ba +No match + +/^(?&t)*+(?(DEFINE)(?a))\w$/ + aaaaaaX + 0: aaaaaaX +\= Expect no match + aaaaaa +No match + +/^(?&t)*(?(DEFINE)(?a))\w$/ + aaaaaaX + 0: aaaaaaX + aaaaaa + 0: aaaaaa + +/^(a)*+(\w)/ + aaaaX + 0: aaaaX + 1: a + 2: X + YZ + 0: Y + 1: + 2: Y +\= Expect no match + aaaa +No match + +/^(?:a)*+(\w)/ + aaaaX + 0: aaaaX + 1: X + YZ + 0: Y + 1: Y +\= Expect no match + aaaa +No match + +/^(a)++(\w)/ + aaaaX + 0: aaaaX + 1: a + 2: X +\= Expect no match + aaaa +No match + YZ +No match + +/^(?:a)++(\w)/ + aaaaX + 0: aaaaX + 1: X +\= Expect no match + aaaa +No match + YZ +No match + +/^(a)?+(\w)/ + aaaaX + 0: aa + 1: a + 2: a + YZ + 0: Y + 1: + 2: Y + +/^(?:a)?+(\w)/ + aaaaX + 0: aa + 1: a + YZ + 0: Y + 1: Y + +/^(a){2,}+(\w)/ + aaaaX + 0: aaaaX + 1: a + 2: X +\= Expect no match + aaa +No match + YZ +No match + +/^(?:a){2,}+(\w)/ + aaaaX + 0: aaaaX + 1: X +\= Expect no match + aaa +No match + YZ +No match + +/(a|)*(?1)b/ + b + 0: b + 1: + ab + 0: ab + 1: + aab + 0: aab + 1: + +/(a)++(?1)b/ +\= Expect no match + ab +No match + aab +No match + +/(a)*+(?1)b/ +\= Expect no match + ab +No match + aab +No match + +/(?1)(?:(b)){0}/ + b + 0: b + +/(foo ( \( ((?:(?> [^()]+ )|(?2))*) \) ) )/x + foo(bar(baz)+baz(bop)) + 0: foo(bar(baz)+baz(bop)) + 1: foo(bar(baz)+baz(bop)) + 2: (bar(baz)+baz(bop)) + 3: bar(baz)+baz(bop) + +/(A (A|B(*ACCEPT)|C) D)(E)/x + AB + 0: AB + 1: AB + 2: B + +/\A.*?(a|bc)/ + ba + 0: ba + 1: a + +/\A.*?(?:a|bc)++/ + ba + 0: ba + +/\A.*?(a|bc)++/ + ba + 0: ba + 1: a + +/\A.*?(?:a|bc|d)/ + ba + 0: ba + +/(?:(b))++/ + beetle + 0: b + 1: b + +/(?(?=(a(*ACCEPT)z))a)/ + a + 0: a + 1: a + +/^(a)(?1)+ab/ + aaaab + 0: aaaab + 1: a + +/^(a)(?1)++ab/ +\= Expect no match + aaaab +No match + +/^(?=a(*:M))aZ/mark + aZbc + 0: aZ +MK: M + +/^(?!(*:M)b)aZ/mark + aZbc + 0: aZ + +/(?(DEFINE)(a))?b(?1)/ + backgammon + 0: ba + +/^\N+/ + abc\ndef + 0: abc + +/^\N{1,}/ + abc\ndef + 0: abc + +/(?(R)a+|(?R)b)/ + aaaabcde + 0: aaaab + +/(?(R)a+|((?R))b)/ + aaaabcde + 0: aaaab + 1: aaaa + +/((?(R)a+|(?1)b))/ + aaaabcde + 0: aaaab + 1: aaaab + +/((?(R1)a+|(?1)b))/ + aaaabcde + 0: aaaab + 1: aaaab + +/((?(R)a|(?1)))*/ + aaa + 0: aaa + 1: a + +/((?(R)a|(?1)))+/ + aaa + 0: aaa + 1: a + +/a(*:any +name)/mark + abc + 0: a +MK: any \x0aname + +/(?>(?&t)c|(?&t))(?(DEFINE)(?a|b(*PRUNE)c))/ + a + 0: a + ba + 0: a + bba + 0: a + +# Checking revised (*THEN) handling. + +# Capture + +/^.*? (a(*THEN)b) c/x +\= Expect no match + aabc +No match + +/^.*? (a(*THEN)b|(*F)) c/x + aabc + 0: aabc + 1: ab + +/^.*? ( (a(*THEN)b) | (*F) ) c/x + aabc + 0: aabc + 1: ab + 2: ab + +/^.*? ( (a(*THEN)b) ) c/x +\= Expect no match + aabc +No match + +# Non-capture + +/^.*? (?:a(*THEN)b) c/x +\= Expect no match + aabc +No match + +/^.*? (?:a(*THEN)b|(*F)) c/x + aabc + 0: aabc + +/^.*? (?: (?:a(*THEN)b) | (*F) ) c/x + aabc + 0: aabc + +/^.*? (?: (?:a(*THEN)b) ) c/x +\= Expect no match + aabc +No match + +# Atomic + +/^.*? (?>a(*THEN)b) c/x +\= Expect no match + aabc +No match + +/^.*? (?>a(*THEN)b|(*F)) c/x + aabc + 0: aabc + +/^.*? (?> (?>a(*THEN)b) | (*F) ) c/x + aabc + 0: aabc + +/^.*? (?> (?>a(*THEN)b) ) c/x +\= Expect no match + aabc +No match + +# Possessive capture + +/^.*? (a(*THEN)b)++ c/x +\= Expect no match + aabc +No match + +/^.*? (a(*THEN)b|(*F))++ c/x + aabc + 0: aabc + 1: ab + +/^.*? ( (a(*THEN)b)++ | (*F) )++ c/x + aabc + 0: aabc + 1: ab + 2: ab + +/^.*? ( (a(*THEN)b)++ )++ c/x +\= Expect no match + aabc +No match + +# Possessive non-capture + +/^.*? (?:a(*THEN)b)++ c/x +\= Expect no match + aabc +No match + +/^.*? (?:a(*THEN)b|(*F))++ c/x + aabc + 0: aabc + +/^.*? (?: (?:a(*THEN)b)++ | (*F) )++ c/x + aabc + 0: aabc + +/^.*? (?: (?:a(*THEN)b)++ )++ c/x +\= Expect no match + aabc +No match + +# Condition assertion + +/^(?(?=a(*THEN)b)ab|ac)/ + ac + 0: ac + +# Condition + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba +No match + +/^.*?(?:(?(?=a)a|b(*THEN)c)|d)/ + ba + 0: ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac +No match + +# Assertion + +/^.*(?=a(*THEN)b)/ + aabc + 0: a + +# -------------------------- + +/(?>a(*:m))/imsx,mark + a + 0: a +MK: m + +/(?>(a)(*:m))/imsx,mark + a + 0: a + 1: a +MK: m + +/(?<=a(*ACCEPT)b)c/ + xacd + 0: c + +/(?<=(a(*ACCEPT)b))c/ + xacd + 0: c + 1: a + +/(?<=(a(*COMMIT)b))c/ + xabcd + 0: c + 1: ab +\= Expect no match + xacd +No match + +/(? + 2: + +/(another)?(\1+)test/ +\= Expect no match + hello world test +No match + +/(a(*COMMIT)b){0}a(?1)|aac/ + aac + 0: aac + +/((?:a?)*)*c/ + aac + 0: aac + 1: + +/((?>a?)*)*c/ + aac + 0: aac + 1: + +/(?>.*?a)(?<=ba)/ + aba + 0: ba + +/(?:.*?a)(?<=ba)/ + aba + 0: aba + +/(?>.*?a)b/s + aab + 0: ab + +/(?>.*?a)b/ + aab + 0: ab + +/(?>^a)b/s +\= Expect no match + aab +No match + +/(?>.*?)(?<=(abcd)|(wxyz))/ + alphabetabcd + 0: + 1: abcd + endingwxyz + 0: + 1: + 2: wxyz + +/(?>.*)(?<=(abcd)|(wxyz))/ + alphabetabcd + 0: alphabetabcd + 1: abcd + endingwxyz + 0: endingwxyz + 1: + 2: wxyz + +/(?>.*)foo/ +\= Expect no match + abcdfooxyz +No match + +/(?>.*?)foo/ + abcdfooxyz + 0: foo + +/(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/ + ac + 0: ac + +/(?:(a(*SKIP)b)){0}(?:(?1)|ac)/ + ac + 0: ac + +/(?<=(*SKIP)ac)a/ +\= Expect no match + aa +No match + +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/x,mark + AAAC + 0: AC + +/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/mark + acacd + 0: acd +MK: n + +/A(*SKIP:m)x|A(*SKIP:n)x|AB/mark + AB + 0: AB + +/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/mark + acacd + 0: ac +MK: n + +# Tests that try to figure out how Perl works. My hypothesis is that the first +# verb that is backtracked onto is the one that acts. This seems to be the case +# almost all the time, but there is one exception that is perhaps a bug. + +# This matches "aaaac"; each PRUNE advances one character until the subject no +# longer starts with 5 'a's. + +/aaaaa(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +# Putting SKIP in front of PRUNE makes no difference, as it is never +# backtracked onto, whether or not it has a label. + +/aaaaa(*SKIP)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +/aaaaa(*SKIP:N)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +# Putting THEN in front makes no difference. + +/aaaaa(*THEN)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +# However, putting COMMIT in front of the prune changes it to "no match". I +# think this is inconsistent and possibly a bug. For the moment, running this +# test is moved out of the Perl-compatible file. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + +# OK, lets play the same game again using SKIP instead of PRUNE. + +# This matches "ac" because SKIP forces the next match to start on the +# sixth "a". + +/aaaaa(*SKIP)b|a+c/ + aaaaaac + 0: ac + +# Putting PRUNE in front makes no difference. + +/aaaaa(*PRUNE)(*SKIP)b|a+c/ + aaaaaac + 0: ac + +# Putting THEN in front makes no difference. + +/aaaaa(*THEN)(*SKIP)b|a+c/ + aaaaaac + 0: ac + +# In this case, neither does COMMIT. This still matches "ac". + +/aaaaa(*COMMIT)(*SKIP)b|a+c/ + aaaaaac + 0: ac + +# This gives "no match", as expected. + +/aaaaa(*COMMIT)b|a+c/ +\= Expect no match + aaaaaac +No match + +# ---- Tests using THEN ---- + +# This matches "aaaaaac", as expected. + +/aaaaa(*THEN)b|a+c/ + aaaaaac + 0: aaaaaac + +# Putting SKIP in front makes no difference. + +/aaaaa(*SKIP)(*THEN)b|a+c/ + aaaaaac + 0: aaaaaac + +# Putting PRUNE in front makes no difference. + +/aaaaa(*PRUNE)(*THEN)b|a+c/ + aaaaaac + 0: aaaaaac + +# Putting COMMIT in front makes no difference. + +/aaaaa(*COMMIT)(*THEN)b|a+c/ + aaaaaac + 0: aaaaaac + +# End of "priority" tests + +/aaaaa(*:m)(*PRUNE:m)(*SKIP:m)m|a+/ + aaaaaa + 0: a + +/aaaaa(*:m)(*MARK:m)(*PRUNE)(*SKIP:m)m|a+/ + aaaaaa + 0: a + +/aaaaa(*:n)(*PRUNE:m)(*SKIP:m)m|a+/ + aaaaaa + 0: aaaa + +/aaaaa(*:n)(*MARK:m)(*PRUNE)(*SKIP:m)m|a+/ + aaaaaa + 0: a + +/a(*MARK:A)aa(*PRUNE:A)a(*SKIP:A)b|a+c/ + aaaac + 0: aac + +/a(*MARK:A)aa(*MARK:A)a(*SKIP:A)b|a+c/ + aaaac + 0: ac + +/aaa(*PRUNE:A)a(*SKIP:A)b|a+c/ + aaaac + 0: aac + +/aaa(*MARK:A)a(*SKIP:A)b|a+c/ + aaaac + 0: ac + +/a(*:m)a(*COMMIT)(*SKIP:m)b|a+c/mark + aaaaaac + 0: ac + +/.?(a|b(*THEN)c)/ + ba + 0: ba + 1: a + +/(a(*COMMIT)b)c|abd/ + abc + 0: abc + 1: ab +\= Expect no match + abd +No match + +/(?=a(*COMMIT)b)abc|abd/ + abc + 0: abc + abd + 0: abd + +/(?>a(*COMMIT)b)c|abd/ + abc + 0: abc + abd + 0: abd + +/a(?=b(*COMMIT)c)[^d]|abd/ + abc + 0: ab +\= Expect no match + abd +No match + +/a(?=bc).|abd/ + abd + 0: abd + abc + 0: ab + +/a(?>b(*COMMIT)c)d|abd/ +\= Expect no match + abceabd +No match + +/a(?>bc)d|abd/ + abceabd + 0: abd + +/(?>a(*COMMIT)b)c|abd/ + abd + 0: abd + +/(?>a(*COMMIT)c)d|abd/ +\= Expect no match + abd +No match + +/((?=a(*COMMIT)b)ab|ac){0}(?:(?1)|a(c))/ + ac + 0: ac + 1: + 2: c + +# These tests were formerly in test 2, but changes in PCRE and Perl have +# made them compatible. + +/^(a)?(?(1)a|b)+$/ +\= Expect no match + a +No match + +/A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/x,mark +\= Expect no match + AAAC +No match, mark = A + +/^((abc|abcx)(*THEN)y|abcd)/ + abcd + 0: abcd + 1: abcd +\= Expect no match + abcxy +No match + +/^((yes|no)(*THEN)(*F))?/ +\= Expect no match + yes +No match + +/(A (.*) C? (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) C? (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) C? (*THEN) | A D) \s* (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) C? (*THEN) | A D) \s* z/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) (?:C|) (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) (?:C|) (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) C{0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) C{0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) (CE){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCEBefgBhiBqz +No match + +/(A (.*) (CE){0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCEBefgBhiBqz +No match + +/(A (.*) (CE*){0,6} (*THEN) | A D) (*FAIL)/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(A (.*) (CE*){0,6} (*THEN) | A D) z/x +\= Expect no match + AbcdCBefgBhiBqz +No match + +/(?=a(*COMMIT)b|ac)ac|ac/ +\= Expect no match + ac +No match + +/(?=a(*COMMIT)b|(ac)) ac | (a)c/x +\= Expect no match + ac +No match + +# ---- + +/(?(?!b(*THEN)a)bn|bnn)/ + bnn + 0: bn + +/(?!b(*SKIP)a)bn|bnn/ + bnn + 0: bn + +/(?(?!b(*SKIP)a)bn|bnn)/ + bnn + 0: bn + +/(?!b(*PRUNE)a)bn|bnn/ + bnn + 0: bn + +/(?(?!b(*PRUNE)a)bn|bnn)/ + bnn + 0: bn + +/(?!b(*COMMIT)a)bn|bnn/ + bnn + 0: bn + +/(?(?!b(*COMMIT)a)bn|bnn)/ + bnn + 0: bn + +/(?=b(*SKIP)a)bn|bnn/ +\= Expect no match + bnn +No match + +/(?=b(*THEN)a)bn|bnn/ + bnn + 0: bnn + +/^(?!a(*SKIP)b)/ + ac + 0: + +/^(?!a(*SKIP)b)../ + acd + 0: ac + +/(?!a(*SKIP)b)../ + acd + 0: ac + +/^(?(?!a(*SKIP)b))/ + ac + 0: + +/^(?!a(*PRUNE)b)../ + acd + 0: ac + +/(?!a(*PRUNE)b)../ + acd + 0: ac + +/(?!a(*COMMIT)b)ac|cd/ + ac + 0: ac + +/\A.*?(?:a|bc)/ + ba + 0: ba + +/^(A(*THEN)B|C(*THEN)D)/ + CD + 0: CD + 1: CD + +/(*:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + 0: b +MK: m(m + +/(*PRUNE:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + 0: b +MK: m(m + +/(*SKIP:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + 0: b + +/(*THEN:m(m)(?&y)(?(DEFINE)(?b))/mark + abc + 0: b +MK: m(m + +/^\d*\w{4}/ + 1234 + 0: 1234 +\= Expect no match + 123 +No match + +/^[^b]*\w{4}/ + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/^[^b]*\w{4}/i + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/^a*\w{4}/ + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/^a*\w{4}/i + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/(?:(?foo)|(?bar))\k/dupnames + foofoo + 0: foofoo + 1: foo + barbar + 0: barbar + 1: + 2: bar + +/(?A)(?:(?foo)|(?bar))\k/dupnames + AfooA + 0: AfooA + 1: A + 2: foo + AbarA + 0: AbarA + 1: A + 2: + 3: bar +\= Expect no match + Afoofoo +No match + Abarbar +No match + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 0: 1 IN SOA non-sp1 non-sp2( + 1: 1 + 2: non-sp1 + 3: non-sp2 + +/^ (?:(?A)|(?'B'B)(?A)) (?('A')x) (?()y)$/x,dupnames + Ax + 0: Ax + 1: A + BAxy + 0: BAxy + 1: + 2: B + 3: A + +/^A\xZ/ + A\0Z + 0: A\x00Z + +/^A\o{123}B/ + A\123B + 0: ASB + +/ ^ a + + b $ /x + aaaab + 0: aaaab + +/ ^ a + #comment + + b $ /x + aaaab + 0: aaaab + +/ ^ a + #comment + #comment + + b $ /x + aaaab + 0: aaaab + +/ ^ (?> a + ) b $ /x + aaaab + 0: aaaab + +/ ^ ( a + ) + + \w $ /x + aaaab + 0: aaaab + 1: aaaa + +/(?:a\Kb)*+/aftertext + ababc + 0: b + 0+ c + +/(?>a\Kb)*/aftertext + ababc + 0: b + 0+ c + +/(?:a\Kb)*/aftertext + ababc + 0: b + 0+ c + +/(a\Kb)*+/aftertext + ababc + 0: b + 0+ c + 1: ab + +/(a\Kb)*/aftertext + ababc + 0: b + 0+ c + 1: ab + +/(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc/ +\= Expect no match + acb +No match + +/\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + +/\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + +/\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + +/\A([^\"1]++|[\"2]([^\"3]*+|[\"4][\"5])*+[\"6])++/ + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + 1: AFTER + 2: + +/^\w+(?>\s*)(?<=\w)/ + test test + 0: tes + +/(?Pa)(?Pb)/g,dupnames + abbaba + 0: ab + 1: a + 2: b + 0: ab + 1: a + 2: b + +/(?Pa)(?Pb)(?P=same)/g,dupnames + abbaba + 0: aba + 1: a + 2: b + +/(?P=same)?(?Pa)(?Pb)/g,dupnames + abbaba + 0: ab + 1: a + 2: b + 0: ab + 1: a + 2: b + +/(?:(?P=same)?(?:(?P=same)(?Pa)(?P=same)|(?P=same)?(?Pb)(?P=same)){2}(?P=same)(?Pc)(?P=same)){2}(?Pz)?/g,dupnames +\= Expect no match + bbbaaaccccaaabbbcc +No match + +/(?Pa)?(?Pb)?(?()c|d)*l/ + acl + 0: acl + 1: a + bdl + 0: bdl + 1: + 2: b + adl + 0: dl + bcl + 0: l + +/\sabc/ + \x{0b}abc + 0: \x0babc + +/[\Qa]\E]+/ + aa]] + 0: aa]] + +/[\Q]a\E]+/ + aa]] + 0: aa]] + +/A((((((((a))))))))\8B/ + AaaB + 0: AaaB + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + +/A(((((((((a)))))))))\9B/ + AaaB + 0: AaaB + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a + +/A[\8\9]B/ + A8B + 0: A8B + A9B + 0: A9B + +/(|ab)*?d/ + abd + 0: abd + 1: ab + xyd + 0: d + +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + 0: + 1: + 2: + 3: + 4: + 5: + +/(\2|a)(\1)/ + aaa + 0: aa + 1: a + 2: a + +/(\2)(\1)/ + +/Z*(|d*){216}/ + +/(?1)(?#?'){8}(a)/ + baaaaaaaaac + 0: aaaaaaaaa + 1: a + +/((((((((((((x))))))))))))\12/ + xx + 0: xx + 1: x + 2: x + 3: x + 4: x + 5: x + 6: x + 7: x + 8: x + 9: x +10: x +11: x +12: x + +/A[\8]B[\9]C/ + A8B9C + 0: A8B9C + +/(?1)()((((((\1++))\x85)+)|))/ + \x85\x85 + 0: \x85\x85 + 1: + 2: \x85\x85 + 3: \x85\x85 + 4: \x85\x85 + 5: \x85 + 6: + 7: + +/(?|(\k'Pm')|(?'Pm'))/ + abcd + 0: + 1: + +/(?|(aaa)|(b))\g{1}/ + aaaaaa + 0: aaaaaa + 1: aaa + bb + 0: bb + 1: b + +/(?|(aaa)|(b))(?1)/ + aaaaaa + 0: aaaaaa + 1: aaa + baaa + 0: baaa + 1: b +\= Expect no match + bb +No match + +/(?|(aaa)|(b))/ + xaaa + 0: aaa + 1: aaa + xbc + 0: b + 1: b + +/(?|(?'a'aaa)|(?'a'b))\k'a'/ + aaaaaa + 0: aaaaaa + 1: aaa + bb + 0: bb + 1: b + +/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/dupnames + aaaccccaaa + 0: aaaccccaaa + 1: aaa + 2: cccc + bccccb + 0: bccccb + 1: b + 2: cccc + +# /x does not apply to MARK labels + +/x (*MARK:ab cd # comment +ef) x/x,mark + axxz + 0: xx +MK: ab cd # comment\x0aef + +/(?<=a(B){0}c)X/ + acX + 0: X + +/(?b)(?(DEFINE)(a+))(?&DEFINE)/ + bbbb + 0: bb + 1: b +\= Expect no match + baaab +No match + +/(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])/ + \ Fred:099 + 0: + +/(?=.*X)X$/ + \ X + 0: X + +/(?s)(?=.*?)b/ + aabc + 0: b + +/(Z)(a)\2{1,2}?(?-i)\1X/i + ZaAAZX + 0: ZaAAZX + 1: Z + 2: a + +/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ + +/[s[:digit:]\E-H]+/ + s09-H + 0: s09-H + +/[s[:digit:]\Q\E-H]+/ + s09-H + 0: s09-H + +/a+(?:|b)a/ + aaaa + 0: aaaa + +/X?(R||){3335}/ + +/(?1)(A(*COMMIT)|B)D/ + ABD + 0: ABD + 1: B + XABD + 0: ABD + 1: B + BAD + 0: BAD + 1: A + ABXABD + 0: ABD + 1: B +\= Expect no match + ABX +No match + +/(?(DEFINE)(? 1? (?=(?2)?) 1 2 (?('cond')|3))) + \A + () + (?&m) + \Z/x + 123 + 0: 123 + 1: + 2: + 3: + +/^(?: +(?: A| (1? (?=(?2)?) (1) 2 (?('cond')|3)) ) +(Z) +)+$/x + AZ123Z + 0: AZ123Z + 1: 123 + 2: + 3: 1 + 4: Z +\= Expect no match + AZ12Z +No match + +/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x + acb + 0: ac + 1: + 2: + 3: +\= Expect no match + aab +No match + +/(?>ab|abab){1,5}?M/ + abababababababababababM + 0: abababababM + +/(?>ab|abab){2}?M/ + abababM + 0: ababM + +/((?(?=(a))a)+k)/ + bbak + 0: ak + 1: ak + 2: a + +/((?(?=(a))a|)+k)/ + bbak + 0: ak + 1: ak + 2: a + +/(?(?!(b))a|b)+k/ + ababbalbbadabak + 0: abak + 1: b + +/(?!(b))c|b/ + Ab + 0: b + Ac + 0: c + +/(?=(b))b|c/ + Ab + 0: b + 1: b + Ac + 0: c + +/^(.|(.)(?1)\2)$/ + a + 0: a + 1: a + aba + 0: aba + 1: aba + 2: a + abcba + 0: abcba + 1: abcba + 2: a + ababa + 0: ababa + 1: ababa + 2: a + abcdcba + 0: abcdcba + 1: abcdcba + 2: a + +/^((.)(?1)\2|.?)$/ + a + 0: a + 1: a + aba + 0: aba + 1: aba + 2: a + abba + 0: abba + 1: abba + 2: a + abcba + 0: abcba + 1: abcba + 2: a + ababa + 0: ababa + 1: ababa + 2: a + abccba + 0: abccba + 1: abccba + 2: a + abcdcba + 0: abcdcba + 1: abcdcba + 2: a + abcddcba + 0: abcddcba + 1: abcddcba + 2: a + +/^(.)(\1|a(?2))/ + bab + 0: bab + 1: b + 2: ab + +/^(.|(.)(?1)?\2)$/ + abcba + 0: abcba + 1: abcba + 2: a + +/^(?(?=(a))abc|def)/ + abc + 0: abc + 1: a + +/^(?(?!(a))def|abc)/ + abc + 0: abc + 1: a + +/^(?(?=(a)(*ACCEPT))abc|def)/ + abc + 0: abc + 1: a + +/^(?(?!(a)(*ACCEPT))def|abc)/ + abc + 0: abc + 1: a + +/^(?1)\d{3}(a)/ + a123a + 0: a123a + 1: a + +# This pattern uses a lot of named subpatterns in order to match email +# addresses in various formats. It's a heavy test for named subpatterns. In the +# group, slash is coded as \x{2f} so that this pattern can also be +# processed by perltest.sh, which does not cater for an escaped delimiter +# within the pattern. $ within the pattern must also be escaped. All $ and @ +# characters in subject strings are escaped so that Perl doesn't interpret them +# as variable insertions and " characters must also be escaped for Perl. + +# This set of subpatterns is more or less a direct transliteration of the BNF +# definitions in RFC2822, without any of the obsolete features. The addition of +# a possessive + to the definition of reduced the match limit in PCRE2 +# from over 5 million to just under 400, and eliminated a very noticeable delay +# when this file was passed to perltest.sh. + +/(?ix)(?(DEFINE) +(? (?&local_part) \@ (?&domain) ) +(? (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ ) +(? [a-z\d!#\$%&'*+-\x{2f}=?^_`{|}~] ) +(? (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ ) +(? (?&ctext) | (?"ed_pair) | (?&comment) ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] ) +(? \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) ) +(? (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive) + (?: (?&FWS)?+ (?&comment) | (?&FWS) ) ) +(? (?&dtext) | (?"ed_pair) ) +(? (?&phrase) ) +(? (?&dot_atom) | (?&domain_literal) ) +(? (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \] + (?&CFWS)?+ ) +(? (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ ) +(? (?&atext)++ (?: \. (?&atext)++)*+ ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] ) +(? (?: [\t\ ]*+ \n)?+ [\t\ ]++ ) +(? (?&dot_atom) | (?"ed_string) ) +(? (?&name_addr) | (?&addr_spec) ) +(? (?&display_name)? (?&angle_addr) ) +(? (?&word)++ ) +(? (?&qtext) | (?"ed_pair) ) +(? " (?&text) ) +(? (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ " + (?&CFWS)?+ ) +(? [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] ) +(? [^\r\n] ) +(? (?&atom) | (?"ed_string) ) +) # End DEFINE +^(?&mailbox)$/ + Alan Other + 0: Alan Other + + 0: + user\@dom.ain + 0: user@dom.ain + user\@[] + 0: user@[] + user\@[domain literal] + 0: user@[domain literal] + user\@[domain literal with \"[square brackets\"] inside] + 0: user@[domain literal with "[square brackets"] inside] + \"A. Other\" (a comment) + 0: "A. Other" (a comment) + A. Other (a comment) + 0: A. Other (a comment) + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay +\= Expect no match + A missing angle (?&simple_assertion) | (?&lookaround) ) + +(? \( \? > (?®ex) \) ) + +(? \\ \d+ | + \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | + \\k <(?&groupname)> | + \\k '(?&groupname)' | + \\k \{ (?&groupname) \} | + \( \? P= (?&groupname) \) ) + +(? (?:(?&assertion) | + (?&callout) | + (?&comment) | + (?&option_setting) | + (?&qualified_item) | + (?"ed_string) | + (?"ed_string_empty) | + (?&special_escape) | + (?&verb) + )* ) + +(? \(\?C (?: \d+ | + (?: (?["'`^%\#\$]) + (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | + \{ (?: \}\} | [^}]*+ )* \} ) + )? \) ) + +(? \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? + (?®ex) \) ) + +(? \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] ) + +(? (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] ) + +(? (?: \[ : (?: + alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| + punct|space|upper|word|xdigit + ) : \] | + (?"ed_string) | + (?"ed_string_empty) | + (?&escaped_character) | + (?&character_type) | + [^]] ) ) + +(? \(\?\# [^)]* \) | (?"ed_string_empty) | \\E ) + +(? (?: \( [+-]? \d+ \) | + \( < (?&groupname) > \) | + \( ' (?&groupname) ' \) | + \( R \d* \) | + \( R & (?&groupname) \) | + \( (?&groupname) \) | + \( DEFINE \) | + \( VERSION >?=\d+(?:\.\d\d?)? \) | + (?&callout)?+ (?&comment)* (?&lookaround) ) ) + +(? \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) ) + +(? (? [-\x{2f}!"'`=_:;,%&@~]) (?®ex) + \k'delimiter' .* ) + +(? \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | + x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | + [aefnrt] | c[[:print:]] | + [^[:alnum:]] ) ) + +(? (?&capturing_group) | (?&non_capturing_group) | + (?&resetting_group) | (?&atomic_group) | + (?&conditional_group) ) + +(? [a-zA-Z_]\w* ) + +(? (?! (?&range_qualifier) ) [^[()|*+?.\$\\] ) + +(? \(\? (?: = | ! | <= | \(\? [iJmnsUx-]* : (?®ex) \) ) + +(? \(\? [iJmnsUx-]* \) ) + +(? (?:\. | + (?&lookaround) | + (?&back_reference) | + (?&character_class) | + (?&character_type) | + (?&escaped_character) | + (?&group) | + (?&subroutine_call) | + (?&literal_character) | + (?"ed_string) + ) (?&comment)? (?&qualifier)? ) + +(? (?: [?*+] | (?&range_qualifier) ) [+?]? ) + +(? (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) + +(? \\Q\\E ) + +(? \{ (?: \d+ (?: , \d* )? | , \d+ ) \} ) + +(? (?&start_item)* (?&branch) (?: \| (?&branch) )* ) + +(? \( \? \| (?®ex) \) ) + +(? \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z ) + +(? \\K ) + +(? \( \* (?: + ANY | + ANYCRLF | + BSR_ANYCRLF | + BSR_UNICODE | + CR | + CRLF | + LF | + LIMIT_MATCH=\d+ | + LIMIT_DEPTH=\d+ | + LIMIT_HEAP=\d+ | + NOTEMPTY | + NOTEMPTY_ATSTART | + NO_AUTO_POSSESS | + NO_DOTSTAR_ANCHOR | + NO_JIT | + NO_START_OPT | + NUL | + UTF | + UCP ) \) ) + +(? (?: \(\?R\) | \(\?[+-]?\d+\) | + \(\? (?: & | P> ) (?&groupname) \) | + \\g < (?&groupname) > | + \\g ' (?&groupname) ' | + \\g < [+-]? \d+ > | + \\g ' [+-]? \d+ ) ) + +(? \(\* (?: ACCEPT | FAIL | F | COMMIT | + (?:MARK)?:(?&verbname) | + (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) ) + +(? [^)]+ ) + +) # End DEFINE +# Kick it all off... +^(?&delimited_regex)$/subject_literal,jitstack=256 + /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + 0: /^(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\11*(\3\4)\1(?#)2$/ + /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + 0: /(cat(a(ract|tonic)|erpillar)) \1()2(3)/ + /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + 0: /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + 0: /^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + /]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 0: /]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + /^(?(DEFINE) (?
a) (? b) ) (?&A) (?&B) / + 0: /^(?(DEFINE) (? a) (? b) ) (?&A) (?&B) / + /(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + 0: /(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ + /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + 0: /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ + /^(\w++|\s++)*$/ + 0: /^(\w++|\s++)*$/ + /a+b?(*THEN)c+(*FAIL)/ + 0: /a+b?(*THEN)c+(*FAIL)/ + /(A (A|B(*ACCEPT)|C) D)(E)/x + 0: /(A (A|B(*ACCEPT)|C) D)(E)/x + /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + 0: /^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$/i + /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + 0: /A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + 0: /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + /(?sx)(?(DEFINE)(? (?&simple_assertion) | (?&lookaround) )(? \( \? > (?®ex) \) )(? \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(? (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(? \(\?C (?: \d+ | (?: (?["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(? \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(? \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(? (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(? (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(? \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(? (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(? \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(? (? [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(? \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(? (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(? [a-zA-Z_]\w* )(? (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(? \(\? (?: = | ! | <= | \(\? [iJmnsUx-]* : (?®ex) \) )(? \(\? [iJmnsUx-]* \) )(? (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(? (?: [?*+] | (?&range_qualifier) ) [+?]? )(? (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (? \\Q\\E ) (? \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(? (?&start_item)* (?&branch) (?: \| (?&branch) )* )(? \( \? \| (?®ex) \) )(? \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(? \\K )(? \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(? (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(? \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(? [^)]+ ))^(?&delimited_regex)$/ + 0: /(?sx)(?(DEFINE)(? (?&simple_assertion) | (?&lookaround) )(? \( \? > (?®ex) \) )(? \\ \d+ | \\g (?: [+-]?\d+ | \{ (?: [+-]?\d+ | (?&groupname) ) \} ) | \\k <(?&groupname)> | \\k '(?&groupname)' | \\k \{ (?&groupname) \} | \( \? P= (?&groupname) \) )(? (?:(?&assertion) | (?&callout) | (?&comment) | (?&option_setting) | (?&qualified_item) | (?"ed_string) | (?"ed_string_empty) | (?&special_escape) | (?&verb) )* )(? \(\?C (?: \d+ | (?: (?["'`^%\#\$]) (?: \k'D'\k'D' | (?!\k'D') . )* \k'D' | \{ (?: \}\} | [^}]*+ )* \} ) )? \) )(? \( (?: \? P? < (?&groupname) > | \? ' (?&groupname) ' )? (?®ex) \) )(? \[ \^?+ (?: \] (?&class_item)* | (?&class_item)+ ) \] )(? (?! \\N\{\w+\} ) \\ [dDsSwWhHvVRN] )(? (?: \[ : (?: alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print| punct|space|upper|word|xdigit ) : \] | (?"ed_string) | (?"ed_string_empty) | (?&escaped_character) | (?&character_type) | [^]] ) )(? \(\?\# [^)]* \) | (?"ed_string_empty) | \\E )(? (?: \( [+-]? \d+ \) | \( < (?&groupname) > \) | \( ' (?&groupname) ' \) | \( R \d* \) | \( R & (?&groupname) \) | \( (?&groupname) \) | \( DEFINE \) | \( VERSION >?=\d+(?:\.\d\d?)? \) | (?&callout)?+ (?&comment)* (?&lookaround) ) )(? \(\? (?&condition) (?&branch) (?: \| (?&branch) )? \) )(? (? [-\x{2f}!"'`=_:;,%&@~]) (?®ex) \k'delimiter' .* )(? \\ (?: 0[0-7]{1,2} | [0-7]{1,3} | o\{ [0-7]+ \} | x \{ (*COMMIT) [[:xdigit:]]* \} | x [[:xdigit:]]{0,2} | [aefnrt] | c[[:print:]] | [^[:alnum:]] ) )(? (?&capturing_group) | (?&non_capturing_group) | (?&resetting_group) | (?&atomic_group) | (?&conditional_group) )(? [a-zA-Z_]\w* )(? (?! (?&range_qualifier) ) [^[()|*+?.\$\\] )(? \(\? (?: = | ! | <= | \(\? [iJmnsUx-]* : (?®ex) \) )(? \(\? [iJmnsUx-]* \) )(? (?:\. | (?&lookaround) | (?&back_reference) | (?&character_class) | (?&character_type) | (?&escaped_character) | (?&group) | (?&subroutine_call) | (?&literal_character) | (?"ed_string) ) (?&comment)? (?&qualifier)? )(? (?: [?*+] | (?&range_qualifier) ) [+?]? )(? (?: \\Q (?: (?!\\E | \k'delimiter') . )++ (?: \\E | ) ) ) (? \\Q\\E ) (? \{ (?: \d+ (?: , \d* )? | , \d+ ) \} )(? (?&start_item)* (?&branch) (?: \| (?&branch) )* )(? \( \? \| (?®ex) \) )(? \^ | \$ | \\A | \\b | \\B | \\G | \\z | \\Z )(? \\K )(? \( \* (?: ANY | ANYCRLF | BSR_ANYCRLF | BSR_UNICODE | CR | CRLF | LF | LIMIT_MATCH=\d+ | LIMIT_DEPTH=\d+ | LIMIT_HEAP=\d+ | NOTEMPTY | NOTEMPTY_ATSTART | NO_AUTO_POSSESS | NO_DOTSTAR_ANCHOR | NO_JIT | NO_START_OPT | NUL | UTF | UCP ) \) )(? (?: \(\?R\) | \(\?[+-]?\d+\) | \(\? (?: & | P> ) (?&groupname) \) | \\g < (?&groupname) > | \\g ' (?&groupname) ' | \\g < [+-]? \d+ > | \\g ' [+-]? \d+ ) )(? \(\* (?: ACCEPT | FAIL | F | COMMIT | (?:MARK)?:(?&verbname) | (?:PRUNE|SKIP|THEN) (?: : (?&verbname)? )? ) \) )(? [^)]+ ))^(?&delimited_regex)$/ +\= Expect no match + /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +No match + /(?:(?(2y)a|b)(X))+/ +No match + /a(*MARK)b/ +No match + /a(*CR)b/ +No match + /(?P(?P=abn)(?/xx + < > + 0: < > + +/<(?:[a b])>/xx + < > +No match + +/<(?xxx:[a b])>/ + < > +No match + +/<(?-x:[a b])>/xx + < > + 0: < > + +/[[:digit:]-]+/ + 12-24 + 0: 12-24 + +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 +No match + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + 0: YY + 1: Y + 2: + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + 0: Y + 1: Y + 2: + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/ + aa + 0: aa + a + 0: a + +/^(a?)b(?1)a/ + abaa + 0: abaa + 1: a + aba + 0: aba + 1: a + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)+b(?1)a/ + abaa + 0: abaa + 1: + aba + 0: aba + 1: + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)++b(?1)a/ + abaa + 0: abaa + 1: + aba + 0: aba + 1: + baa + 0: baa + 1: + ba + 0: ba + 1: + +/^(a?)+b/ + b + 0: b + 1: + ab + 0: ab + 1: + aaab + 0: aaab + 1: + +/(?=a+)a(a+)++b/ + aab + 0: aab + 1: a + +/(?<=\G.)/g,aftertext + abc + 0: + 0+ bc + 0: + 0+ c + 0: + 0+ + +/(?<=(?=.)?)/ + +/(?<=(?=.)?+)/ + +/(?<=(?=.)*)/ + +/(?<=(?=.){4,5})/ + +/(?<=(?=.){4,5}x)/ + +/a(?=.(*:X))(*SKIP:X)(*F)|(.)/ + abc + 0: a + 1: a + +/a(?>(*:X))(*SKIP:X)(*F)|(.)/ + abc + 0: a + 1: a + +/a(?:(*:X))(*SKIP:X)(*F)|(.)/ + abc + 0: b + 1: b + +#pattern no_start_optimize + +/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/ + abc + 0: abc + +/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ + abc + 0: abc + +#subject mark + +/a(*ACCEPT:X)b/ + abc + 0: a +MK: X + +/(?=a(*ACCEPT:QQ)bc)axyz/ + axyz + 0: axyz +MK: QQ + +/(?(DEFINE)(a(*ACCEPT:X)))(?1)b/ + abc + 0: ab +MK: X + +/a(*F:X)b/ + abc +No match, mark = X + +/(?(DEFINE)(a(*F:X)))(?1)b/ + abc +No match, mark = X + +/a(*COMMIT:X)b/ + abc + 0: ab +MK: X + +/(?(DEFINE)(a(*COMMIT:X)))(?1)b/ + abc + 0: ab +MK: X + +/a+(*:Z)b(*COMMIT:X)(*SKIP:Z)c|.*/ + aaaabd + 0: bd + +/a+(*:Z)b(*COMMIT:X)(*SKIP:X)c|.*/ + aaaabd +No match, mark = X + +/a(*COMMIT:X)b/ + axabc +No match, mark = X + +#pattern -no_start_optimize +#subject -mark + +/(.COMMIT)(*COMMIT::::::::::interal error:::)/ + +/(*COMMIT:ÿÿ)/ + +/(*COMMIT:]w)/ + +/(?i)A(?^)B(?^x:C D)(?^i)e f/ + aBCDE F + 0: aBCDE F +\= Expect no match + aBCDEF +No match + AbCDe f +No match + +/(*pla:foo).{6}/ + abcfoobarxyz + 0: foobar +\= Expect no match + abcfooba +No match + +/(*positive_lookahead:foo).{6}/ + abcfoobarxyz + 0: foobar + +/(?(*pla:foo).{6}|a..)/ + foobarbaz + 0: foobar + abcfoobar + 0: abc + +/(?(*positive_lookahead:foo).{6}|a..)/ + foobarbaz + 0: foobar + abcfoobar + 0: abc + +/(*plb:foo)bar/ + abcfoobar + 0: bar +\= Expect no match + abcbarfoo +No match + +/(*positive_lookbehind:foo)bar/ + abcfoobar + 0: bar +\= Expect no match + abcbarfoo +No match + +/(?(*plb:foo)bar|baz)/ + abcfoobar + 0: bar + bazfoobar + 0: baz + abcbazfoobar + 0: baz + foobazfoobar + 0: bar + +/(?(*positive_lookbehind:foo)bar|baz)/ + abcfoobar + 0: bar + bazfoobar + 0: baz + abcbazfoobar + 0: baz + foobazfoobar + 0: bar + +/(*nlb:foo)bar/ + abcbarfoo + 0: bar +\= Expect no match + abcfoobar +No match + +/(*negative_lookbehind:foo)bar/ + abcbarfoo + 0: bar +\= Expect no match + abcfoobar +No match + +/(?(*nlb:foo)bar|baz)/ + abcfoobaz + 0: baz + abcbarbaz + 0: bar +\= Expect no match + abcfoobar +No match + +/(?(*negative_lookbehind:foo)bar|baz)/ + abcfoobaz + 0: baz + abcbarbaz + 0: bar +\= Expect no match + abcfoobar +No match + +/(*atomic:a+)\w/ + aaab + 0: aaab +\= Expect no match + aaaa +No match + +/ (? \w+ )* \. /xi + pokus. + 0: pokus. + 1: pokus + +/(?(DEFINE) (? \w+ ) ) (?&word)* \./xi + pokus. + 0: pokus. + +/(?(DEFINE) (? \w+ ) ) ( (?&word)* ) \./xi + pokus. + 0: pokus. + 1: + 2: pokus + +/(?&word)* (?(DEFINE) (? \w+ ) ) \./xi + pokus. + 0: pokus. + +/(?&word)* \. (? \w+ )/xi + pokus.hokus + 0: pokus.hokus + 1: hokus + +/a(?(?=(*:2)b).)/mark + abc + 0: ab +MK: 2 + acb + 0: a + +/a(?(?!(*:2)b).)/mark + acb + 0: ac + abc + 0: a +MK: 2 + +/(?:a|ab){1}+c/ +\= Expect no match + abc +No match + +/(a|ab){1}+c/ + abc +No match + +/(a+){1}+a/ +\= Expect no match + aaaa +No match + +/(?(DEFINE)(a|ab))(?1){1}+c/ + abc +No match + +/(?:a|(?=b)|.)*\z/ + abc + 0: abc + +/(?:a|(?=b)|.)*/ + abc + 0: a + +/(?<=a(*SKIP)x)|c/ + abcd +No match + +/(?<=a(*SKIP)x)|d/ + abcd + 0: d + +/(?<=(?=.(?<=x)))/aftertext + abx + 0: + 0+ x + +/(?<=(?=(?<=a)))b/ + ab + 0: b + +/^(?a)(?()b)((?<=b).*)$/ + abc + 0: abc + 1: a + 2: c + +/^(a\1?){4}$/ + aaaa + 0: aaaa + 1: a + aaaaaa + 0: aaaaaa + 1: aa + +/^((\1+)|\d)+133X$/ + 111133X + 0: 111133X + 1: 11 + 2: 11 + +/^(?>.*?([A-Z])(?!.*\1)){26}/i + The quick brown fox jumps over the lazy dog. + 0: The quick brown fox jumps over the lazy dog + 1: g + Jackdaws love my big sphinx of quartz. + 0: Jackdaws love my big sphinx of quartz + 1: z + Pack my box with five dozen liquor jugs. + 0: Pack my box with five dozen liquor jugs + 1: s +\= Expect no match + The quick brown fox jumps over the lazy cat. +No match + Hackdaws love my big sphinx of quartz. +No match + Pack my fox with five dozen liquor jugs. +No match + +/(?<=X(?(DEFINE)(A)))X(*F)/ +\= Expect no match + AXYZ +No match + +/(?<=X(?(DEFINE)(A)))./ + AXYZ + 0: Y + +/(?<=X(?(DEFINE)(.*))Y)./ + AXYZ + 0: Z + +/(?<=X(?(DEFINE)(Y))(?1))./ + AXYZ + 0: Z + +/(?(DEFINE)(?bar))(?\x{8c}748364< + 0: \x8c748364 + +/a{65536/ + >a{65536< + 0: a{65536 + +/a\K.(?0)*/ + abac + 0: c + +/(a\K.(?1)*)/ + abac + 0: c + 1: abac + +# -------------------------------------------------------------------------- +# Perl-compatible tests of variable-length lookbehinds. + +/(?<=ab?c).../g + abcdefgacxyz + 0: def + 0: xyz + +/(?<=PQR|ab?c).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: 123 + +/(?<=ab?c|PQR).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: 123 + +/(?<=PQ|ab?c).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: R12 + +/(?<=ab?c|PQ).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: R12 + +/(?<=a(b?c|d?e?e)f)X./g + acfX1zzzaefX2zzzadeefX3zzzX4zzz + 0: X1 + 1: c + 0: X2 + 1: e + 0: X3 + 1: dee + +/(?)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/ +# This is not very readable, and also does not handle all features. I have done +# some work on it. + +/^ +(? +# A regular expression is zero or more of these items. + (?: + # An item is one of these: + (?: + [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted . + \\.| # Quoted . + + \[ # Class, which is [ + (?: # Followed by + \^?\\.| # Optional ^ and any escaped character + \^[^\\]| # OR ^ and not escaped character + [^\\^] # OR neither ^ nor \ + ) # Followed by + (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot + \]| # Class ends with ] + + \( # Parenthesized group + (?: # Start with optional + \?[:=!]| # ? followed by : = ! + \?<[=!]| # OR ?< followed by = or ! + \?> # OR ?> + )? + (?&re)?? # Then a nested + \)| # End parenthesized group + + \(\? # Other parenthesized items + (?: # (? followed by + R| # R + [+-]?\d++ # Or optional +- and digits + ) + \)| # End parens + + \(\* # Verbs + (?: + COMMIT| + FAIL| + MARK:[^)]*| + (?:PRUNE|SKIP|THEN)(?::[^\)]*+)? + ) + \) + ) # End list of items + + # Followed by an optional quantifier + + (?: + (?: + [?+*] # ?+* + | # OR + \{\d+ # { digits + (?:,\d*)? # optionally followed by ,digits + \} # then closing } + | # OR + \{,\d+} # {,digits} + ) + [?+]? # optional ungreedy or possessive + )? + + | # OR an "item" is a branch ending + + \| + + )* # Zero or more top-level items. +) # End regex group. +$/x + [abcdef] + 0: [abcdef] + 1: [abcdef] + [abc\\]def] + 0: [abc\]def] + 1: [abc\]def] + a.b|abcd + 0: a.b|abcd + 1: a.b|abcd + ab()d + 0: ab()d + 1: ab()d + ab{1,3}d + 0: ab{1,3}d + 1: ab{1,3}d + ab{,3}d + 0: ab{,3}d + 1: ab{,3}d + ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + 0: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + 1: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + ab(*MARK:xyz) + 0: ab(*MARK:xyz) + 1: ab(*MARK:xyz) + (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s]) + 0: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s]) + 1: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s]) + abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz + 0: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz + 1: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz + a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + 0: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + 1: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2 + 0: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2 + 1: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2 +\= Expect no match + ab)d +No match + ab(d +No match + {4,5} +No match + a[]b +No match + (a)(?(1)a|b|c) +No match + +/^..A(*SKIP)B|C/ + 12ADC + 0: C + +/(? ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff +Subject length lower bound = 1 + \x{f1} + 0: \x{f1} + \x{bf} + 0: \x{bf} + \x{100} + 0: \x{100} + \x{1000} + 0: \x{1000} +\= Expect no match + \x{c0} +No match + \x{f0} +No match + +/Ā{3,4}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}{3} + \x{100}?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc4 +Last code unit = \x80 +Subject length lower bound = 3 + \x{100}\x{100}\x{100}\x{100\x{100} + 0: \x{100}\x{100}\x{100} + +/(\x{100}+|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}++ + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xc4 +Subject length lower bound = 1 + +/(\x{100}*a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}*+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xc4 +Subject length lower bound = 1 + +/(\x{100}{0,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}{0,2}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xc4 +Subject length lower bound = 1 + +/(\x{100}{1,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100} + \x{100}{0,1}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xc4 +Subject length lower bound = 1 + +/\x{100}/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc4 +Last code unit = \x80 +Subject length lower bound = 1 + +/a\x{100}\x{101}*/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x80 +Subject length lower bound = 2 + +/a\x{100}\x{101}+/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x81 +Subject length lower bound = 3 + +/[^\x{c4}]/IB +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc4 +Last code unit = \x80 +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Z\x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[\xff]/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc3 +Last code unit = \xbf +Subject length lower bound = 1 + >\x{ff}< + 0: \x{ff} + +/[^\xff]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{ff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/\x{100}abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + \x{100}abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +First code unit = \xc4 +Last code unit = 'z' +Subject length lower bound = 7 + +/\777/I,utf +Capture group count = 0 +Options: utf +First code unit = \xc7 +Last code unit = \xbf +Subject length lower bound = 1 + \x{1ff} + 0: \x{1ff} + \777 + 0: \x{1ff} + +/\x{100}+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc4 +Last code unit = \x80 +Subject length lower bound = 2 + +/\x{100}+X/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc4 +Last code unit = 'X' +Subject length lower bound = 2 + +/^[\QĀ\E-\QŐ\E/B,utf +Failed: error 106 at offset 15: missing terminating ] for character class + +# This tests the stricter UTF-8 check according to RFC 3629. + +/X/utf +\= Expect UTF-8 errors + \x{d800} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 + \x{da00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 + \x{dfff} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 + \x{110000} +Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0 + \x{2000000} +Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 + \x{7fffffff} +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 +\= Expect no match + \x{d800}\=no_utf_check +No match + \x{da00}\=no_utf_check +No match + \x{dfff}\=no_utf_check +No match + \x{110000}\=no_utf_check +No match + \x{2000000}\=no_utf_check +No match + \x{7fffffff}\=no_utf_check +No match + +/(*UTF8)\x{1234}/ + abcd\x{1234}pqr + 0: \x{1234} + +/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +Compile options: +Overall options: utf +\R matches any Unicode newline +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/\h/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 +Subject length lower bound = 1 + ABC\x{09} + 0: \x{09} + ABC\x{20} + 0: + ABC\x{a0} + 0: \x{a0} + ABC\x{1680} + 0: \x{1680} + ABC\x{180e} + 0: \x{180e} + ABC\x{2000} + 0: \x{2000} + ABC\x{202f} + 0: \x{202f} + ABC\x{205f} + 0: \x{205f} + ABC\x{3000} + 0: \x{3000} + +/\v/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Subject length lower bound = 1 + ABC\x{0a} + 0: \x{0a} + ABC\x{0b} + 0: \x{0b} + ABC\x{0c} + 0: \x{0c} + ABC\x{0d} + 0: \x{0d} + ABC\x{85} + 0: \x{85} + ABC\x{2028} + 0: \x{2028} + +/\h*A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 +Last code unit = 'A' +Subject length lower bound = 1 + CDBABC + 0: A + +/\v+A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Last code unit = 'A' +Subject length lower bound = 2 + +/\s?xxx\s/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Last code unit = 'x' +Subject length lower bound = 4 + +/\sxxx\s/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 +Last code unit = 'x' +Subject length lower bound = 5 + AB\x{85}xxx\x{a0}XYZ + 0: \x{85}xxx\x{a0} + AB\x{a0}xxx\x{85}XYZ + 0: \x{a0}xxx\x{85} + +/\S \S/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Last code unit = ' ' +Subject length lower bound = 3 + \x{a2} \x{84} + 0: \x{a2} \x{84} + A Z + 0: A Z + +/a+/utf + a\x{123}aa\=offset=1 + 0: aa + a\x{123}aa\=offset=3 + 0: aa + a\x{123}aa\=offset=4 + 0: a +\= Expect bad offset value + a\x{123}aa\=offset=6 +Failed: error -33: bad offset value +\= Expect bad UTF-8 offset + a\x{123}aa\=offset=2 +Error -36 (bad UTF-8 offset) +\= Expect no match + a\x{123}aa\=offset=5 +No match + +/\x{1234}+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: \xe1 +Subject length lower bound = 1 + +/\x{1234}+?/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: \xe1 +Subject length lower bound = 1 + +/\x{1234}++/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: \xe1 +Subject length lower bound = 1 + +/\x{1234}{2}/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: \xe1 +Subject length lower bound = 2 + +/[^\x{c4}]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/X+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + X++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'X' +Last code unit = \x80 +Subject length lower bound = 2 + +/\R/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 +Subject length lower bound = 1 + +/\777/IB,utf +------------------------------------------------------------------ + Bra + \x{1ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xc7 +Last code unit = \xbf +Subject length lower bound = 1 + +/\w+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/B,utf +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/A/utf + \x{ff000041} +** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 + \x{7f000041} +Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 + +/(*UTF8)abc/never_utf +Failed: error 174 at offset 7: using UTF is disabled by the application + +/abc/utf,never_utf +Failed: error 174 at offset 0: using UTF is disabled by the application + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Subject length lower bound = 5 + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \xb0 +Subject length lower bound = 5 + +/AB\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \xb0 +Subject length lower bound = 3 + +/AB\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = 'B' (caseless) +Subject length lower bound = 3 + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: \xd0 \xd1 +Subject length lower bound = 17 + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/[â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i \x{2c65} + Ket + End +------------------------------------------------------------------ + +/[^â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i [^\x{2c65}] + Ket + End +------------------------------------------------------------------ + +/\h/I +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 +Subject length lower bound = 1 + +/\v/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 +Subject length lower bound = 1 + +/\R/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 +Subject length lower bound = 1 + +/[[:blank:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x09 \xa0] + Ket + End +------------------------------------------------------------------ + +/\x{212a}+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: K k \xe2 +Subject length lower bound = 1 + KKkk\x{212a} + 0: KKkk\x{212a} + +/s+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: S s \xc5 +Subject length lower bound = 1 + SSss\x{17f} + 0: SSss\x{17f} + +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: A \xc4 +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: Z \xc4 +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 +Subject length lower bound = 1 + +/[z\Qa-d]Ā\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: - ] a d z \xc4 +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ā + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a b \xc4 +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xc4 +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 + \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 + \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 + \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 + \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xc4 +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} +\= Expect no match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} +\= Expect no match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xce \xcf +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + +/abc/utf,replace=à + abc +Failed: error -3: UTF-8 error: 1 byte missing at end + +/(?<=(a)(?-1))x/I,utf +Capture group count = 1 +Max lookbehind = 2 +Options: utf +First code unit = 'x' +Subject length lower bound = 1 + a\x80zx\=offset=3 +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a +\= Expect no match + 123 +No match + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/utf +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}] + Ket + End +------------------------------------------------------------------ + +# A special extra option allows excaped surrogate code points in 8-bit mode, +# but subjects containing them must not be UTF-checked. + +/\x{d800}/I,utf,allow_surrogate_escapes +Capture group count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \xed +Last code unit = \x80 +Subject length lower bound = 1 + \x{d800}\=no_utf_check + 0: \x{d800} + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + 0: \x{dfff}\x{df01} + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + +# Offsets are different in 8-bit mode. + +/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout + 123abcáyzabcdef789abcሴqr + 1(2) Old 6 6 "" New 6 8 "<>" + 2(2) Old 13 13 "" New 15 17 "<>" + 3(2) Old 13 16 "def" New 17 22 "" + 4(2) Old 22 22 "" New 28 30 "<>" + 4: 123abc<>\x{e1}yzabc<>789abc<>\x{1234}qr + +# Check name length with non-ASCII characters + +/(?'ABáC678901234567890123456789012012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf + +/(?'ABáC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf +Failed: error 148 at offset 132: subpattern name is too long (maximum 128 code units) + +/(?'ABZC6789012345678901234567890123012345678901234567890123456789AB012345678901234567890123456789AB012345678901234567890123456789AB'...)/utf + +/(?(n/utf +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +/(?(á/utf +Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) + +# Invalid UTF-8 tests + +/.../g,match_invalid_utf + abcd\x80wxzy\x80pqrs + 0: abc + 0: wxz + 0: pqr + abcd\x{80}wxzy\x80pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/abc/match_invalid_utf + ab\x80ab\=ph +Partial match: ab +\= Expect no match + ab\x80cdef\=ph +No match + +/.a/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b + b\xf0\x91\x88b\=ph +Partial match: b + b\xf0\x91\x88b\=ps +Partial match: b + b\xf0\x91\x88\xb4a + 0: \x{11234}a +\= Expect no match + b\x80\=ph +No match + b\x80\=ps +No match + b\xf0\x91\x88\=ph +No match + b\xf0\x91\x88\=ps +No match + +/.a$/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b +\= Expect no match + b\xf0\x91\x98\=ph +No match + b\xf0\x91\x98\=ps +No match + +/ab$/match_invalid_utf + ab\x80cdeab + 0: ab +\= Expect no match + ab\x80cde +No match + +/.../g,match_invalid_utf + abcd\x{80}wxzy\x80pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/(?<=x)../g,match_invalid_utf + abcd\x{80}wxzy\x80pqrs + 0: zy + abcd\x{80}wxzy\x80xpqrs + 0: zy + 0: pq + +/X$/match_invalid_utf +\= Expect no match + X\xc4 +No match + +/(?<=..)X/match_invalid_utf,aftertext + AB\x80AQXYZ + 0: X + 0+ YZ + AB\x80AQXYZ\=offset=5 + 0: X + 0+ YZ + AB\x80\x80AXYZXC\=offset=5 + 0: X + 0+ C +\= Expect no match + AB\x80XYZ +No match + AB\x80XYZ\=offset=3 +No match + AB\xfeXYZ +No match + AB\xffXYZ\=offset=3 +No match + AB\x80AXYZ +No match + AB\x80AXYZ\=offset=4 +No match + AB\x80\x80AXYZ\=offset=5 +No match + +/.../match_invalid_utf + AB\xc4CCC + 0: CCC +\= Expect no match + A\x{d800}B +No match + A\x{110000}B +No match + A\xc4B +No match + +/\bX/match_invalid_utf + A\x80X + 0: X + +/\BX/match_invalid_utf +\= Expect no match + A\x80X +No match + +/(?<=...)X/match_invalid_utf + AAA\x80BBBXYZ + 0: X +\= Expect no match + AAA\x80BXYZ +No match + AAA\x80BBXYZ +No match + +# ------------------------------------- + +/(*UTF)(?=\x{123})/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: utf +First code unit = \xc4 +Last code unit = \xa3 +Subject length lower bound = 1 + +/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xc3 +Last code unit = 'X' +Subject length lower bound = 3 + +/[󿾟,]/BI,utf +------------------------------------------------------------------ + Bra + [,\x{fff9f}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: , \xf3 +Subject length lower bound = 1 + +/[\x{fff4}-\x{ffff8}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xef \xf0 \xf1 \xf2 \xf3 +Subject length lower bound = 1 + +/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xef \xf0 \xf1 \xf2 \xf4 +Subject length lower bound = 1 + +/[\xff\x{ffff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xc3 \xef +Subject length lower bound = 1 + +/[\xff\x{ff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xc3 +Subject length lower bound = 1 + abc\x{ff}def + 0: \x{ff} + +/[\xff\x{ff}]/I +Capture group count = 0 +First code unit = \xff +Subject length lower bound = 1 + abc\x{ff}def + 0: \xff + +/[Ss]/I +Capture group count = 0 +First code unit = 'S' (caseless) +Subject length lower bound = 1 + +/[Ss]/I,utf +Capture group count = 0 +Options: utf +Starting code units: S s +Subject length lower bound = 1 + +/(?:\x{ff}|\x{3000})/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xc3 \xe3 +Subject length lower bound = 1 + +/x/utf + abxyz + 0: x + \x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 + abc\x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 + abc\x80\=startchar,offset=3 +Error -36 (bad UTF-8 offset) + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/a|\x{c1}/iI,utf +Capture group count = 0 +Options: caseless utf +Starting code units: A a \xc3 +Subject length lower bound = 1 + \x{e1}xxx + 0: \x{e1} + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended + X\x{c1}Y + 1: >\xe1< + +# Without UTF or UCP characters > 127 have only one case in the default locale. + +/X(\x{e1})Y/replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xe1< + +/A/utf,match_invalid_utf,caseless + \xe5A + 0: A + +/\bch\b/utf,match_invalid_utf + qchq\=ph +Partial match: + qchq\=ps +Partial match: + +/line1\nbreak/firstline,utf,match_invalid_utf + line1\nbreak + 0: line1\x{0a}break + line0\nline1\nbreak +No match + +/A\z/utf,match_invalid_utf + A\x80\x42\n +No match + +# End of testinput10 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 new file mode 100644 index 0000000..8768785 --- /dev/null +++ b/testdata/testoutput11-16 @@ -0,0 +1,668 @@ +# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) +# features that are not compatible with the 8-bit library, or which give +# different output in 16-bit or 32-bit mode. The output for the two widths is +# different, so they have separate output files. + +#forbid_utf +#newline_default LF ANY ANYCRLF + +/[^\x{c4}]/IB +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/\x{100}/I +Capture group count = 0 +First code unit = \x{100} +Subject length lower bound = 1 + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/Ix +Capture group count = 0 +Contains explicit CR or LF match +Options: extended +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff +Subject length lower bound = 3 + +/[\h]/B +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + >\x09< + 0: \x09 + +/[\h]+/B +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++ + Ket + End +------------------------------------------------------------------ + >\x09\x20\xa0< + 0: \x09 \xa0 + +/[\v]/B +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}] + Ket + End +------------------------------------------------------------------ + +/[^\h]/B +------------------------------------------------------------------ + Bra + [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/\h+/I +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + 0: \x{200a}\xa0\x{2000} + +/[\h\x{dc00}]+/IB +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + 0: \x{200a}\xa0\x{2000} + +/\H+/I +Capture group count = 0 +Subject length lower bound = 1 + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + 0: \x9f\xa1\x{2fff}\x{3001} + +/[\H\x{d800}]+/ + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + 0: \x9f\xa1\x{2fff}\x{3001} + +/\v+/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/[\v\x{dc00}]+/IB +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/\V+/I +Capture group count = 0 +Subject length lower bound = 1 + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + 0: \x09\x0e\x84\x86 + +/[\V\x{d800}]+/ + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + 0: \x09\x0e\x84\x86 + +/\R+/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I +Capture group count = 0 +First code unit = \x{d800} +Last code unit = \x{dd00} +Subject length lower bound = 6 + \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} + 0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B +------------------------------------------------------------------ + Bra + [^\x{80}] + [^\x{ff}] + [^\x{100}] + [^\x{1000}] + [^\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi +------------------------------------------------------------------ + Bra + /i [^\x{80}] + /i [^\x{ff}] + /i [^\x{100}] + /i [^\x{1000}] + /i [^\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B +------------------------------------------------------------------ + Bra + [^\x{100}]* + [^\x{1000}]+ + [^\x{ffff}]?? + [^\x{8000}]{4} + [^\x{8000}]* + [^\x{7fff}]{2} + [^\x{7fff}]{0,7}? + [^\x{100}]{5} + [^\x{100}]?+ + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi +------------------------------------------------------------------ + Bra + /i [^\x{100}]* + /i [^\x{1000}]+ + /i [^\x{ffff}]?? + /i [^\x{8000}]{4} + /i [^\x{8000}]* + /i [^\x{7fff}]{2} + /i [^\x{7fff}]{0,7}? + /i [^\x{100}]{5} + /i [^\x{100}]?+ + Ket + End +------------------------------------------------------------------ + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + +/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ + +/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + [\x{100}-\x{200}] + Ket + End +------------------------------------------------------------------ + +/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + \x{d800} + Ket + End +------------------------------------------------------------------ + +/^\x{ffff}+/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}?/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}*/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}{3}/i + \x{ffff}\x{ffff}\x{ffff} + 0: \x{ffff}\x{ffff}\x{ffff} + +/^\x{ffff}{0,3}/i + \x{ffff} + 0: \x{ffff} + +/[^\x00-a]{12,}[^b-\xff]*/B +------------------------------------------------------------------ + Bra + [b-\xff] (neg){12,} + [\x00-a] (neg)*+ + Ket + End +------------------------------------------------------------------ + +/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0e-\x1f!-\xff] (neg)* + \s* + + [0-9A-Z_a-z]++ + \W+ + + [\x00-/:-\xff] (neg)*? + \d + 0 + [\x00-/:-@[-^`{-\xff] (neg){4,6}? + \w* + A + Ket + End +------------------------------------------------------------------ + +/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B +------------------------------------------------------------------ + Bra + a* + [b-\xff\x{100}-\x{200}]?+ + a# + a*+ + [b-\xff\x{100}-\x{200}]? + b# + [a-f]*+ + [g-\xff\x{100}-\x{200}]*+ + # + [g-\xff\x{100}-\x{200}]*+ + [a-c]*+ + # + [g-\xff\x{100}-\x{200}]* + [a-h]*+ + Ket + End +------------------------------------------------------------------ + +/^[\x{1234}\x{4321}]{2,4}?/ + \x{1234}\x{1234}\x{1234} + 0: \x{1234}\x{1234} + +# Check maximum non-UTF character size for the 16-bit library. + +/\x{ffff}/ + A\x{ffff}B + 0: \x{ffff} + +/\x{10000}/ +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large + +/\o{20000}/ + +# Check maximum character size for the 32-bit library. These will all give +# errors in the 16-bit library. + +/\x{110000}/ +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/\x{7fffffff}/ +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\x{80000000}/ +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\x{ffffffff}/ +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\x{100000000}/ +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + +/\o{17777777777}/ +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\o{20000000000}/ +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\o{37777777777}/ +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\o{40000000000}/ +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\x{7fffffff}\x{7fffffff}/I +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\x{80000000}\x{80000000}/I +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\x{ffffffff}\x{ffffffff}/I +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +# Non-UTF characters + +/.{2,3}/ + \x{400000}\x{400001}\x{400002}\x{400003} +** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. + 0: \x00\x01\x02 + +/\x{400000}\x{800000}/IBi +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +# Check character ranges + +/[\H]/IB +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\V]/IB +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e + \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d + \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input +** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode + abý¿¿¿¿¿z + ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abÿý¿¿¿¿¿z + ab\x{ffffffff}z + +/abÿAz/utf8_input +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + abÿAz + ab\x{80000041}z + +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + 0: A\x{1b6}6666666 + +# End of testinput11 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 new file mode 100644 index 0000000..2c95f61 --- /dev/null +++ b/testdata/testoutput11-32 @@ -0,0 +1,674 @@ +# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) +# features that are not compatible with the 8-bit library, or which give +# different output in 16-bit or 32-bit mode. The output for the two widths is +# different, so they have separate output files. + +#forbid_utf +#newline_default LF ANY ANYCRLF + +/[^\x{c4}]/IB +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/\x{100}/I +Capture group count = 0 +First code unit = \x{100} +Subject length lower bound = 1 + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/Ix +Capture group count = 0 +Contains explicit CR or LF match +Options: extended +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff +Subject length lower bound = 3 + +/[\h]/B +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + >\x09< + 0: \x09 + +/[\h]+/B +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++ + Ket + End +------------------------------------------------------------------ + >\x09\x20\xa0< + 0: \x09 \xa0 + +/[\v]/B +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}] + Ket + End +------------------------------------------------------------------ + +/[^\h]/B +------------------------------------------------------------------ + Bra + [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/\h+/I +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + 0: \x{200a}\xa0\x{2000} + +/[\h\x{dc00}]+/IB +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\xa0\x{2000} + 0: \x{200a}\xa0\x{2000} + +/\H+/I +Capture group count = 0 +Subject length lower bound = 1 + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + 0: \x9f\xa1\x{2fff}\x{3001} + +/[\H\x{d800}]+/ + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} + 0: \x9f\xa1\x{2fff}\x{3001} + +/\v+/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/[\v\x{dc00}]+/IB +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/\V+/I +Capture group count = 0 +Subject length lower bound = 1 + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + 0: \x09\x0e\x84\x86 + +/[\V\x{d800}]+/ + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 + 0: \x09\x0e\x84\x86 + +/\R+/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d + 0: \x85\x0a\x0b\x0c\x0d + +/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I +Capture group count = 0 +First code unit = \x{d800} +Last code unit = \x{dd00} +Subject length lower bound = 6 + \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} + 0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B +------------------------------------------------------------------ + Bra + [^\x{80}] + [^\x{ff}] + [^\x{100}] + [^\x{1000}] + [^\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi +------------------------------------------------------------------ + Bra + /i [^\x{80}] + /i [^\x{ff}] + /i [^\x{100}] + /i [^\x{1000}] + /i [^\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B +------------------------------------------------------------------ + Bra + [^\x{100}]* + [^\x{1000}]+ + [^\x{ffff}]?? + [^\x{8000}]{4} + [^\x{8000}]* + [^\x{7fff}]{2} + [^\x{7fff}]{0,7}? + [^\x{100}]{5} + [^\x{100}]?+ + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi +------------------------------------------------------------------ + Bra + /i [^\x{100}]* + /i [^\x{1000}]+ + /i [^\x{ffff}]?? + /i [^\x{8000}]{4} + /i [^\x{8000}]* + /i [^\x{7fff}]{2} + /i [^\x{7fff}]{0,7}? + /i [^\x{100}]{5} + /i [^\x{100}]?+ + Ket + End +------------------------------------------------------------------ + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + +/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ + +/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + [\x{100}-\x{200}] + Ket + End +------------------------------------------------------------------ + +/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref +------------------------------------------------------------------ + Bra + \x{d800} + Ket + End +------------------------------------------------------------------ + +/^\x{ffff}+/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}?/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}*/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}{3}/i + \x{ffff}\x{ffff}\x{ffff} + 0: \x{ffff}\x{ffff}\x{ffff} + +/^\x{ffff}{0,3}/i + \x{ffff} + 0: \x{ffff} + +/[^\x00-a]{12,}[^b-\xff]*/B +------------------------------------------------------------------ + Bra + [b-\xff] (neg){12,} + [\x00-a] (neg)*+ + Ket + End +------------------------------------------------------------------ + +/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0e-\x1f!-\xff] (neg)* + \s* + + [0-9A-Z_a-z]++ + \W+ + + [\x00-/:-\xff] (neg)*? + \d + 0 + [\x00-/:-@[-^`{-\xff] (neg){4,6}? + \w* + A + Ket + End +------------------------------------------------------------------ + +/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B +------------------------------------------------------------------ + Bra + a* + [b-\xff\x{100}-\x{200}]?+ + a# + a*+ + [b-\xff\x{100}-\x{200}]? + b# + [a-f]*+ + [g-\xff\x{100}-\x{200}]*+ + # + [g-\xff\x{100}-\x{200}]*+ + [a-c]*+ + # + [g-\xff\x{100}-\x{200}]* + [a-h]*+ + Ket + End +------------------------------------------------------------------ + +/^[\x{1234}\x{4321}]{2,4}?/ + \x{1234}\x{1234}\x{1234} + 0: \x{1234}\x{1234} + +# Check maximum non-UTF character size for the 16-bit library. + +/\x{ffff}/ + A\x{ffff}B + 0: \x{ffff} + +/\x{10000}/ + +/\o{20000}/ + +# Check maximum character size for the 32-bit library. These will all give +# errors in the 16-bit library. + +/\x{110000}/ + +/\x{7fffffff}/ + +/\x{80000000}/ + +/\x{ffffffff}/ + +/\x{100000000}/ +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + +/\o{17777777777}/ + +/\o{20000000000}/ + +/\o{37777777777}/ + +/\o{40000000000}/ +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\x{7fffffff}\x{7fffffff}/I +Capture group count = 0 +First code unit = \x{7fffffff} +Last code unit = \x{7fffffff} +Subject length lower bound = 2 + +/\x{80000000}\x{80000000}/I +Capture group count = 0 +First code unit = \x{80000000} +Last code unit = \x{80000000} +Subject length lower bound = 2 + +/\x{ffffffff}\x{ffffffff}/I +Capture group count = 0 +First code unit = \x{ffffffff} +Last code unit = \x{ffffffff} +Subject length lower bound = 2 + +# Non-UTF characters + +/.{2,3}/ + \x{400000}\x{400001}\x{400002}\x{400003} + 0: \x{400000}\x{400001}\x{400002} + +/\x{400000}\x{800000}/IBi +------------------------------------------------------------------ + Bra + /i \x{400000}\x{800000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless +First code unit = \x{400000} +Last code unit = \x{800000} +Subject length lower bound = 2 + +# Check character ranges + +/[\H]/IB +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\V]/IB +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e + \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d + \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/(*THEN:\[A]{65501})/expand + +# We can use pcre2test's utf8_input modifier to create wide pattern characters, +# even though this test is run when UTF is not supported. + +/abý¿¿¿¿¿z/utf8_input + abý¿¿¿¿¿z + 0: ab\x{7fffffff}z + ab\x{7fffffff}z + 0: ab\x{7fffffff}z + +/abÿý¿¿¿¿¿z/utf8_input + abÿý¿¿¿¿¿z + 0: ab\x{ffffffff}z + ab\x{ffffffff}z + 0: ab\x{ffffffff}z + +/abÿAz/utf8_input + abÿAz + 0: ab\x{80000041}z + ab\x{80000041}z + 0: ab\x{80000041}z + +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + 0: A\x{1b6}6666666 + +# End of testinput11 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 new file mode 100644 index 0000000..616d693 --- /dev/null +++ b/testdata/testoutput12-16 @@ -0,0 +1,1868 @@ +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. + +/ÃÃÃxxx/IB,utf,no_utf_check +** Failed: invalid UTF-8 string cannot be converted to 16-bit string + +/abc/utf + Ã] +** Failed: invalid UTF-8 string cannot be used as input in UTF mode + +# Check maximum character size + +/\x{ffff}/IB,utf +------------------------------------------------------------------ + Bra + \x{ffff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{ffff} +Subject length lower bound = 1 + +/\x{10000}/IB,utf +------------------------------------------------------------------ + Bra + \x{10000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d800} +Last code unit = \x{dc00} +Subject length lower bound = 1 + +/\x{100}/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/\x{1000}/IB,utf +------------------------------------------------------------------ + Bra + \x{1000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{1000} +Subject length lower bound = 1 + +/\x{10000}/IB,utf +------------------------------------------------------------------ + Bra + \x{10000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d800} +Last code unit = \x{dc00} +Subject length lower bound = 1 + +/\x{100000}/IB,utf +------------------------------------------------------------------ + Bra + \x{100000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{dbc0} +Last code unit = \x{dc00} +Subject length lower bound = 1 + +/\x{10ffff}/IB,utf +------------------------------------------------------------------ + Bra + \x{10ffff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{dbff} +Last code unit = \x{dfff} +Subject length lower bound = 1 + +/[\x{ff}]/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + +/[\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/\x80/IB,utf +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x80 +Subject length lower bound = 1 + +/\xff/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + +/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf +------------------------------------------------------------------ + Bra + \x{d55c}\x{ad6d}\x{c5b4} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + \x{D55c}\x{ad6d}\x{C5B4} + 0: \x{d55c}\x{ad6d}\x{c5b4} + +/\x{65e5}\x{672c}\x{8a9e}/IB,utf +------------------------------------------------------------------ + Bra + \x{65e5}\x{672c}\x{8a9e} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + \x{65e5}\x{672c}\x{8a9e} + 0: \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/IB,utf +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x80 +Subject length lower bound = 1 + +/\x{084}/IB,utf +------------------------------------------------------------------ + Bra + \x{84} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x84 +Subject length lower bound = 1 + +/\x{104}/IB,utf +------------------------------------------------------------------ + Bra + \x{104} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{104} +Subject length lower bound = 1 + +/\x{861}/IB,utf +------------------------------------------------------------------ + Bra + \x{861} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{861} +Subject length lower bound = 1 + +/\x{212ab}/IB,utf +------------------------------------------------------------------ + Bra + \x{212ab} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d844} +Last code unit = \x{deab} +Subject length lower bound = 1 + +/[^ab\xC0-\xF0]/IB,utf +------------------------------------------------------------------ + Bra + [\x00-`c-\xbf\xf1-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + \x{f1} + 0: \x{f1} + \x{bf} + 0: \x{bf} + \x{100} + 0: \x{100} + \x{1000} + 0: \x{1000} +\= Expect no match + \x{c0} +No match + \x{f0} +No match + +/Ā{3,4}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}{3} + \x{100}?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = \x{100} +Subject length lower bound = 3 + \x{100}\x{100}\x{100}\x{100\x{100} + 0: \x{100}\x{100}\x{100} + +/(\x{100}+|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}++ + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xff +Subject length lower bound = 1 + +/(\x{100}*a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}*+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xff +Subject length lower bound = 1 + +/(\x{100}{0,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}{0,2}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xff +Subject length lower bound = 1 + +/(\x{100}{1,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100} + \x{100}{0,1}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xff +Subject length lower bound = 1 + +/\x{100}/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/a\x{100}\x{101}*/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x{100} +Subject length lower bound = 2 + +/a\x{100}\x{101}+/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x{101} +Subject length lower bound = 3 + +/[^\x{c4}]/IB +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Z\x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[\xff]/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + >\x{ff}< + 0: \x{ff} + +/[^\xff]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{ff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/\x{100}abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + \x{100}abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +First code unit = \x{100} +Last code unit = 'z' +Subject length lower bound = 7 + +/\777/I,utf +Capture group count = 0 +Options: utf +First code unit = \x{1ff} +Subject length lower bound = 1 + \x{1ff} + 0: \x{1ff} + \777 + 0: \x{1ff} + +/\x{100}+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = \x{200} +Subject length lower bound = 2 + +/\x{100}+X/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = 'X' +Subject length lower bound = 2 + +/^[\QĀ\E-\QŐ\E/B,utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/X/utf + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff}\=no_utf_check + 0: X +\= Expect UTF error + XX\x{d800} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{da00} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{dc00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{de00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dfff} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{110000} +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + XX\x{d800}\x{1234} +Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 +\= Expect no match + XX\x{d800}\=offset=3 +No match + +/(?<=.)X/utf + XX\x{d800}\=offset=3 +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + +/(*UTF16)\x{11234}/ + abcd\x{11234}pqr + 0: \x{11234} + +/(*UTF)\x{11234}/I +Capture group count = 0 +Compile options: +Overall options: utf +First code unit = \x{d804} +Last code unit = \x{de34} +Subject length lower bound = 1 + abcd\x{11234}pqr + 0: \x{11234} + +/(*UTF-32)\x{11234}/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + abcd\x{11234}pqr + +/(*UTF-32)\x{112}/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + abcd\x{11234}pqr + +/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +Compile options: +Overall options: utf +\R matches any Unicode newline +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/\h/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + ABC\x{09} + 0: \x{09} + ABC\x{20} + 0: + ABC\x{a0} + 0: \x{a0} + ABC\x{1680} + 0: \x{1680} + ABC\x{180e} + 0: \x{180e} + ABC\x{2000} + 0: \x{2000} + ABC\x{202f} + 0: \x{202f} + ABC\x{205f} + 0: \x{205f} + ABC\x{3000} + 0: \x{3000} + +/\v/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + ABC\x{0a} + 0: \x{0a} + ABC\x{0b} + 0: \x{0b} + ABC\x{0c} + 0: \x{0c} + ABC\x{0d} + 0: \x{0d} + ABC\x{85} + 0: \x{85} + ABC\x{2028} + 0: \x{2028} + +/\h*A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 A \xa0 \xff +Last code unit = 'A' +Subject length lower bound = 1 + CDBABC + 0: A + \x{2000}ABC + 0: \x{2000}A + +/\R*A/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff +Last code unit = 'A' +Subject length lower bound = 1 + CDBABC + 0: A + \x{2028}A + 0: \x{2028}A + +/\v+A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Last code unit = 'A' +Subject length lower bound = 2 + +/\s?xxx\s/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Last code unit = 'x' +Subject length lower bound = 4 + +/\sxxx\s/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 +Last code unit = 'x' +Subject length lower bound = 5 + AB\x{85}xxx\x{a0}XYZ + 0: \x{85}xxx\x{a0} + AB\x{a0}xxx\x{85}XYZ + 0: \x{a0}xxx\x{85} + +/\S \S/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Last code unit = ' ' +Subject length lower bound = 3 + \x{a2} \x{84} + 0: \x{a2} \x{84} + A Z + 0: A Z + +/a+/utf + a\x{123}aa\=offset=1 + 0: aa + a\x{123}aa\=offset=2 + 0: aa + a\x{123}aa\=offset=3 + 0: a +\= Expect no match + a\x{123}aa\=offset=4 +No match +\= Expect bad offset error + a\x{123}aa\=offset=5 +Failed: error -33: bad offset value + a\x{123}aa\=offset=6 +Failed: error -33: bad offset value + +/\x{1234}+/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}+?/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}++/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}{2}/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Last code unit = \x{1234} +Subject length lower bound = 2 + +/[^\x{c4}]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/X+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + X++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'X' +Last code unit = \x{200} +Subject length lower bound = 2 + +/\R/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + +# Check bad offset + +/a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit + \x{10000}\=offset=1 +Error -36 (bad UTF-16 offset) + \x{10000}ab\=offset=1 +Error -36 (bad UTF-16 offset) +\= Expect 16-bit match, 32-bit no match + \x{10000}ab\=offset=2 + 0: a +\= Expect no match + \x{10000}ab\=offset=3 +No match +\= Expect no match in 16-bit, bad offset in 32-bit + \x{10000}ab\=offset=4 +No match +\= Expect bad offset + \x{10000}ab\=offset=5 +Failed: error -33: bad offset value + +/í¼€/utf +Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate + +/\w+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/B,utf +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/(*UTF)abc/never_utf +Failed: error 174 at offset 6: using UTF is disabled by the application + +/abc/utf,never_utf +Failed: error 174 at offset 0: using UTF is disabled by the application + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = \x{1fb0} (caseless) +Subject length lower bound = 5 + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \x{1fb0} +Subject length lower bound = 5 + +/AB\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \x{1fb0} +Subject length lower bound = 3 + +/AB\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = \x{1fb0} (caseless) +Subject length lower bound = 3 + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{401} (caseless) +Last code unit = \x{42f} (caseless) +Subject length lower bound = 17 + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/[â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i \x{2c65} + Ket + End +------------------------------------------------------------------ + +/[^â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i [^\x{2c65}] + Ket + End +------------------------------------------------------------------ + +/[[:blank:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/\x{212a}+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: K k \xff +Subject length lower bound = 1 + KKkk\x{212a} + 0: KKkk\x{212a} + +/s+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: S s \xff +Subject length lower bound = 1 + SSss\x{17f} + 0: SSss\x{17f} + +# Non-UTF characters should give errors in both 16-bit and 32-bit modes. + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/\o{4200000}/utf +Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large + +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: A \xff +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: Z \xff +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[z\Qa-d]Ā\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: - ] a d z \xff +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ā + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a b \xff +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xff +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} +\= Expect no match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} +\= Expect no match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xff +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + +/./utf + \x{110000} +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + +/(*UTF)abý¿¿¿¿¿z/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abý¿¿¿¿¿z/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/\pP/ucp + \x{7fffffff} +** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes +Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode + \x{d800}\=no_utf_check + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes +Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode + \x{dfff}\x{df01}\=no_utf_check + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + +# Offsets are different in 8-bit mode. + +/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout + 123abcáyzabcdef789abcሴqr + 1(2) Old 6 6 "" New 6 8 "<>" + 2(2) Old 12 12 "" New 14 16 "<>" + 3(2) Old 12 15 "def" New 16 21 "" + 4(2) Old 21 21 "" New 27 29 "<>" + 4: 123abc<>\x{e1}yzabc<>789abc<>\x{1234}qr + +# A few script run tests in non-UTF mode (but they need Unicode support) + +/^(*script_run:.{4})/ + \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han + 0: \x{3041}\x{30a1}\x{3007}\x{3007} + \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han + 0: \x{30a1}\x{3041}\x{3007}\x{3007} + \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul + 0: \x{1100}\x{2e80}\x{2e80}\x{1101} + +/^(*sr:.*)/utf,allow_surrogate_escapes +Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check + +/(?(n/utf +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +/(?(á/utf +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +# Invalid UTF-16/32 tests. + +/.../g,match_invalid_utf + abcd\x{df00}wxzy\x{df00}pqrs + 0: abc + 0: wxz + 0: pqr + abcd\x{80}wxzy\x{df00}pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/abc/match_invalid_utf + ab\x{df00}ab\=ph +Partial match: ab +\= Expect no match + ab\x{df00}cdef\=ph +No match + +/.a/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b +\= Expect no match + b\x{df00}\=ph +No match + b\x{df00}\=ps +No match + +/.a$/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b +\= Expect no match + b\x{df00}\=ph +No match + b\x{df00}\=ps +No match + +/ab$/match_invalid_utf + ab\x{df00}cdeab + 0: ab +\= Expect no match + ab\x{df00}cde +No match + +/.../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/(?<=x)../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + 0: zy + abcd\x{80}wxzy\x{df00}xpqrs + 0: zy + 0: pq + +/X$/match_invalid_utf +\= Expect no match + X\x{df00} +No match + +/(?<=..)X/match_invalid_utf,aftertext + AB\x{df00}AQXYZ + 0: X + 0+ YZ + AB\x{df00}AQXYZ\=offset=5 + 0: X + 0+ YZ + AB\x{df00}\x{df00}AXYZXC\=offset=5 + 0: X + 0+ C +\= Expect no match + AB\x{df00}XYZ +No match + AB\x{df00}XYZ\=offset=3 +No match + AB\x{df00}AXYZ +No match + AB\x{df00}AXYZ\=offset=4 +No match + AB\x{df00}\x{df00}AXYZ\=offset=5 +No match + +/.../match_invalid_utf +\= Expect no match + A\x{d800}B +No match + A\x{110000}B +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + +/aa/utf,ucp,match_invalid_utf,global + aa\x{d800}aa + 0: aa + 0: aa + +/aa/utf,ucp,match_invalid_utf,global + \x{d800}aa + 0: aa + +/A\z/utf,match_invalid_utf + A\x{df00}\n +No match + +# ---------------------------------------------------- + +/(*UTF)(?=\x{123})/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: utf +First code unit = \x{123} +Subject length lower bound = 1 + +/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf +Capture group count = 0 +Options: utf +First code unit = \xc1 (caseless) +Last code unit = \x{145} (caseless) +Subject length lower bound = 3 + +/[\xff\x{ffff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +/[\xff\x{ff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +/[\xff\x{ff}]/I +Capture group count = 0 +Starting code units: \xff +Subject length lower bound = 1 + +/[Ss]/I +Capture group count = 0 +First code unit = 'S' (caseless) +Subject length lower bound = 1 + +/[Ss]/I,utf +Capture group count = 0 +Options: utf +Starting code units: S s +Subject length lower bound = 1 + +/(?:\x{ff}|\x{3000})/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I +Capture group count = 0 +Options: caseless +First code unit = \x{120} +Subject length lower bound = 1 + +/\x{c1}/i,I,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/[\x{120}\x{121}]/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{120} + Ket + End +------------------------------------------------------------------ + +/[ab\x{120}]+/iB,ucp +------------------------------------------------------------------ + Bra + [ABab\x{120}-\x{121}]++ + Ket + End +------------------------------------------------------------------ + aABb\x{121}\x{120} + 0: aABb\x{121}\x{120} + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + 1: >\x{120}< + +/s/i,ucp + \x{17f} + 0: \x{17f} + +/s/i,utf + \x{17f} + 0: \x{17f} + +/[^s]/i,ucp +\= Expect no match + \x{17f} +No match + +/[^s]/i,utf +\= Expect no match + \x{17f} +No match + +# ---------------------------------------------------- + +# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This +# fails in 16-bit mode, but is OK for 32-bit. + +/\x{802a0000}*/ +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + \x{802a0000}\x{802a0000} + +# UTF matching without UTF, check invalid UTF characters +/\X++/ + a\x{110000}\x{ffffffff} +** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. + 0: a\x00\x{ffff} + +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{ffffffff}xyz + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# --------------------------------------------------------- + +# End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 new file mode 100644 index 0000000..3c9586e --- /dev/null +++ b/testdata/testoutput12-32 @@ -0,0 +1,1858 @@ +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. + +/ÃÃÃxxx/IB,utf,no_utf_check +** Failed: invalid UTF-8 string cannot be converted to 32-bit string + +/abc/utf + Ã] +** Failed: invalid UTF-8 string cannot be used as input in UTF mode + +# Check maximum character size + +/\x{ffff}/IB,utf +------------------------------------------------------------------ + Bra + \x{ffff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{ffff} +Subject length lower bound = 1 + +/\x{10000}/IB,utf +------------------------------------------------------------------ + Bra + \x{10000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{10000} +Subject length lower bound = 1 + +/\x{100}/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/\x{1000}/IB,utf +------------------------------------------------------------------ + Bra + \x{1000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{1000} +Subject length lower bound = 1 + +/\x{10000}/IB,utf +------------------------------------------------------------------ + Bra + \x{10000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{10000} +Subject length lower bound = 1 + +/\x{100000}/IB,utf +------------------------------------------------------------------ + Bra + \x{100000} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100000} +Subject length lower bound = 1 + +/\x{10ffff}/IB,utf +------------------------------------------------------------------ + Bra + \x{10ffff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{10ffff} +Subject length lower bound = 1 + +/[\x{ff}]/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + +/[\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/\x80/IB,utf +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x80 +Subject length lower bound = 1 + +/\xff/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + +/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf +------------------------------------------------------------------ + Bra + \x{d55c}\x{ad6d}\x{c5b4} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + \x{D55c}\x{ad6d}\x{C5B4} + 0: \x{d55c}\x{ad6d}\x{c5b4} + +/\x{65e5}\x{672c}\x{8a9e}/IB,utf +------------------------------------------------------------------ + Bra + \x{65e5}\x{672c}\x{8a9e} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + \x{65e5}\x{672c}\x{8a9e} + 0: \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/IB,utf +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x80 +Subject length lower bound = 1 + +/\x{084}/IB,utf +------------------------------------------------------------------ + Bra + \x{84} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x84 +Subject length lower bound = 1 + +/\x{104}/IB,utf +------------------------------------------------------------------ + Bra + \x{104} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{104} +Subject length lower bound = 1 + +/\x{861}/IB,utf +------------------------------------------------------------------ + Bra + \x{861} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{861} +Subject length lower bound = 1 + +/\x{212ab}/IB,utf +------------------------------------------------------------------ + Bra + \x{212ab} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{212ab} +Subject length lower bound = 1 + +/[^ab\xC0-\xF0]/IB,utf +------------------------------------------------------------------ + Bra + [\x00-`c-\xbf\xf1-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + \x{f1} + 0: \x{f1} + \x{bf} + 0: \x{bf} + \x{100} + 0: \x{100} + \x{1000} + 0: \x{1000} +\= Expect no match + \x{c0} +No match + \x{f0} +No match + +/Ā{3,4}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}{3} + \x{100}?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = \x{100} +Subject length lower bound = 3 + \x{100}\x{100}\x{100}\x{100\x{100} + 0: \x{100}\x{100}\x{100} + +/(\x{100}+|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}++ + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xff +Subject length lower bound = 1 + +/(\x{100}*a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}*+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xff +Subject length lower bound = 1 + +/(\x{100}{0,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}{0,2}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a x \xff +Subject length lower bound = 1 + +/(\x{100}{1,2}a|x)/IB,utf +------------------------------------------------------------------ + Bra + CBra 1 + \x{100} + \x{100}{0,1}+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: x \xff +Subject length lower bound = 1 + +/\x{100}/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + +/a\x{100}\x{101}*/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x{100} +Subject length lower bound = 2 + +/a\x{100}\x{101}+/IB,utf +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = \x{101} +Subject length lower bound = 3 + +/[^\x{c4}]/IB +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Z\x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[\xff]/IB,utf +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xff +Subject length lower bound = 1 + >\x{ff}< + 0: \x{ff} + +/[^\xff]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{ff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/\x{100}abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + \x{100}abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +First code unit = \x{100} +Last code unit = 'z' +Subject length lower bound = 7 + +/\777/I,utf +Capture group count = 0 +Options: utf +First code unit = \x{1ff} +Subject length lower bound = 1 + \x{1ff} + 0: \x{1ff} + \777 + 0: \x{1ff} + +/\x{100}+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = \x{200} +Subject length lower bound = 2 + +/\x{100}+X/IB,utf +------------------------------------------------------------------ + Bra + \x{100}++ + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{100} +Last code unit = 'X' +Subject length lower bound = 2 + +/^[\QĀ\E-\QŐ\E/B,utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/X/utf + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff}\=no_utf_check + 0: X +\= Expect UTF error + XX\x{d800} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{110000} +Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2 + XX\x{d800}\x{1234} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 +\= Expect no match + XX\x{d800}\=offset=3 +No match + +/(?<=.)X/utf + XX\x{d800}\=offset=3 +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + +/(*UTF16)\x{11234}/ +Failed: error 160 at offset 7: (*VERB) not recognized or malformed + abcd\x{11234}pqr + +/(*UTF)\x{11234}/I +Capture group count = 0 +Compile options: +Overall options: utf +First code unit = \x{11234} +Subject length lower bound = 1 + abcd\x{11234}pqr + 0: \x{11234} + +/(*UTF-32)\x{11234}/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + abcd\x{11234}pqr + +/(*UTF-32)\x{112}/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + abcd\x{11234}pqr + +/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +Compile options: +Overall options: utf +\R matches any Unicode newline +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/\h/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 \xa0 \xff +Subject length lower bound = 1 + ABC\x{09} + 0: \x{09} + ABC\x{20} + 0: + ABC\x{a0} + 0: \x{a0} + ABC\x{1680} + 0: \x{1680} + ABC\x{180e} + 0: \x{180e} + ABC\x{2000} + 0: \x{2000} + ABC\x{202f} + 0: \x{202f} + ABC\x{205f} + 0: \x{205f} + ABC\x{3000} + 0: \x{3000} + +/\v/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + ABC\x{0a} + 0: \x{0a} + ABC\x{0b} + 0: \x{0b} + ABC\x{0c} + 0: \x{0c} + ABC\x{0d} + 0: \x{0d} + ABC\x{85} + 0: \x{85} + ABC\x{2028} + 0: \x{2028} + +/\h*A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x20 A \xa0 \xff +Last code unit = 'A' +Subject length lower bound = 1 + CDBABC + 0: A + \x{2000}ABC + 0: \x{2000}A + +/\R*A/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff +Last code unit = 'A' +Subject length lower bound = 1 + CDBABC + 0: A + \x{2028}A + 0: \x{2028}A + +/\v+A/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Last code unit = 'A' +Subject length lower bound = 2 + +/\s?xxx\s/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x +Last code unit = 'x' +Subject length lower bound = 4 + +/\sxxx\s/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 +Last code unit = 'x' +Subject length lower bound = 5 + AB\x{85}xxx\x{a0}XYZ + 0: \x{85}xxx\x{a0} + AB\x{a0}xxx\x{85}XYZ + 0: \x{a0}xxx\x{85} + +/\S \S/I,utf,tables=2 +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Last code unit = ' ' +Subject length lower bound = 3 + \x{a2} \x{84} + 0: \x{a2} \x{84} + A Z + 0: A Z + +/a+/utf + a\x{123}aa\=offset=1 + 0: aa + a\x{123}aa\=offset=2 + 0: aa + a\x{123}aa\=offset=3 + 0: a +\= Expect no match + a\x{123}aa\=offset=4 +No match +\= Expect bad offset error + a\x{123}aa\=offset=5 +Failed: error -33: bad offset value + a\x{123}aa\=offset=6 +Failed: error -33: bad offset value + +/\x{1234}+/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}+?/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}++/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Subject length lower bound = 1 + +/\x{1234}{2}/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{1234} +Last code unit = \x{1234} +Subject length lower bound = 2 + +/[^\x{c4}]/IB,utf +------------------------------------------------------------------ + Bra + [^\x{c4}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/X+\x{200}/IB,utf +------------------------------------------------------------------ + Bra + X++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'X' +Last code unit = \x{200} +Subject length lower bound = 2 + +/\R/I,utf +Capture group count = 0 +Options: utf +Starting code units: \x0a \x0b \x0c \x0d \x85 \xff +Subject length lower bound = 1 + +# Check bad offset + +/a/utf +\= Expect bad UTF-16 offset, or no match in 32-bit + \x{10000}\=offset=1 +No match + \x{10000}ab\=offset=1 + 0: a +\= Expect 16-bit match, 32-bit no match + \x{10000}ab\=offset=2 +No match +\= Expect no match + \x{10000}ab\=offset=3 +No match +\= Expect no match in 16-bit, bad offset in 32-bit + \x{10000}ab\=offset=4 +Failed: error -33: bad offset value +\= Expect bad offset + \x{10000}ab\=offset=5 +Failed: error -33: bad offset value + +/í¼€/utf +Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined + +/\w+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/B,utf +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/B,utf +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/B,utf +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/B,utf,tables=2 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/(*UTF)abc/never_utf +Failed: error 174 at offset 6: using UTF is disabled by the application + +/abc/utf,never_utf +Failed: error 174 at offset 0: using UTF is disabled by the application + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = \x{1fb0} (caseless) +Subject length lower bound = 5 + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \x{1fb0} +Subject length lower bound = 5 + +/AB\x{1fb0}/IB,utf +------------------------------------------------------------------ + Bra + AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = \x{1fb0} +Subject length lower bound = 3 + +/AB\x{1fb0}/IBi,utf +------------------------------------------------------------------ + Bra + /i AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = \x{1fb0} (caseless) +Subject length lower bound = 3 + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf +Capture group count = 0 +Options: caseless utf +First code unit = \x{401} (caseless) +Last code unit = \x{42f} (caseless) +Subject length lower bound = 17 + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/[â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i \x{2c65} + Ket + End +------------------------------------------------------------------ + +/[^â±¥]/Bi,utf +------------------------------------------------------------------ + Bra + /i [^\x{2c65}] + Ket + End +------------------------------------------------------------------ + +/[[:blank:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/\x{212a}+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: K k \xff +Subject length lower bound = 1 + KKkk\x{212a} + 0: KKkk\x{212a} + +/s+/Ii,utf +Capture group count = 0 +Options: caseless utf +Starting code units: S s \xff +Subject length lower bound = 1 + SSss\x{17f} + 0: SSss\x{17f} + +# Non-UTF characters should give errors in both 16-bit and 32-bit modes. + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/\o{4200000}/utf +Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large + +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: A \xff +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: Z \xff +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[z\Qa-d]Ā\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: - ] a d z \xff +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ā + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Starting code units: a b \xff +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xff +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} +\= Expect no match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} +\= Expect no match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +Starting code units: \xff +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + +/./utf + \x{110000} +Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0 + +/(*UTF)abý¿¿¿¿¿z/B +------------------------------------------------------------------ + Bra + ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z + Ket + End +------------------------------------------------------------------ + +/abý¿¿¿¿¿z/utf +** Failed: character value greater than 0x10ffff cannot be converted to UTF + +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + \x{100} + 0: \x{100} + \x{308} + 0: \x{308} +\= Expect no match + 123 +No match + +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}] + Ket + End +------------------------------------------------------------------ + +/\pP/ucp + \x{7fffffff} +No match + +# A special extra option allows excaped surrogate code points in 32-bit mode, +# but subjects containing them must not be UTF-checked. These patterns give +# errors in 16-bit mode. + +/\x{d800}/I,utf,allow_surrogate_escapes +Capture group count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \x{d800} +Subject length lower bound = 1 + \x{d800}\=no_utf_check + 0: \x{d800} + +/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes + \x{dfff}\x{df01}\=no_utf_check + 0: \x{dfff}\x{df01} + +# This has different starting code units in 8-bit mode. + +/^[^ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [\x00-`c-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + c + 0: c + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} +\= Expect no match + aaa +No match + +# Offsets are different in 8-bit mode. + +/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout + 123abcáyzabcdef789abcሴqr + 1(2) Old 6 6 "" New 6 8 "<>" + 2(2) Old 12 12 "" New 14 16 "<>" + 3(2) Old 12 15 "def" New 16 21 "" + 4(2) Old 21 21 "" New 27 29 "<>" + 4: 123abc<>\x{e1}yzabc<>789abc<>\x{1234}qr + +# A few script run tests in non-UTF mode (but they need Unicode support) + +/^(*script_run:.{4})/ + \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han + 0: \x{3041}\x{30a1}\x{3007}\x{3007} + \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han + 0: \x{30a1}\x{3041}\x{3007}\x{3007} + \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul + 0: \x{1100}\x{2e80}\x{2e80}\x{1101} + +/^(*sr:.*)/utf,allow_surrogate_escapes + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + 0: \x{2e80}\x{3105}\x{2e80} + \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check + 0: \x{d800} + +/(?(n/utf +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +/(?(á/utf +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +# Invalid UTF-16/32 tests. + +/.../g,match_invalid_utf + abcd\x{df00}wxzy\x{df00}pqrs + 0: abc + 0: wxz + 0: pqr + abcd\x{80}wxzy\x{df00}pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/abc/match_invalid_utf + ab\x{df00}ab\=ph +Partial match: ab +\= Expect no match + ab\x{df00}cdef\=ph +No match + +/.a/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b +\= Expect no match + b\x{df00}\=ph +No match + b\x{df00}\=ps +No match + +/.a$/match_invalid_utf + ab\=ph +Partial match: b + ab\=ps +Partial match: b +\= Expect no match + b\x{df00}\=ph +No match + b\x{df00}\=ps +No match + +/ab$/match_invalid_utf + ab\x{df00}cdeab + 0: ab +\= Expect no match + ab\x{df00}cde +No match + +/.../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + 0: abc + 0: d\x{80}w + 0: xzy + 0: pqr + +/(?<=x)../g,match_invalid_utf + abcd\x{80}wxzy\x{df00}pqrs + 0: zy + abcd\x{80}wxzy\x{df00}xpqrs + 0: zy + 0: pq + +/X$/match_invalid_utf +\= Expect no match + X\x{df00} +No match + +/(?<=..)X/match_invalid_utf,aftertext + AB\x{df00}AQXYZ + 0: X + 0+ YZ + AB\x{df00}AQXYZ\=offset=5 + 0: X + 0+ YZ + AB\x{df00}\x{df00}AXYZXC\=offset=5 + 0: X + 0+ C +\= Expect no match + AB\x{df00}XYZ +No match + AB\x{df00}XYZ\=offset=3 +No match + AB\x{df00}AXYZ +No match + AB\x{df00}AXYZ\=offset=4 +No match + AB\x{df00}\x{df00}AXYZ\=offset=5 +No match + +/.../match_invalid_utf +\= Expect no match + A\x{d800}B +No match + A\x{110000}B +No match + +/aa/utf,ucp,match_invalid_utf,global + aa\x{d800}aa + 0: aa + 0: aa + +/aa/utf,ucp,match_invalid_utf,global + \x{d800}aa + 0: aa + +/A\z/utf,match_invalid_utf + A\x{df00}\n +No match + +# ---------------------------------------------------- + +/(*UTF)(?=\x{123})/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: utf +First code unit = \x{123} +Subject length lower bound = 1 + +/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf +Capture group count = 0 +Options: utf +First code unit = \xc1 (caseless) +Last code unit = \x{145} (caseless) +Subject length lower bound = 3 + +/[\xff\x{ffff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +/[\xff\x{ff}]/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +/[\xff\x{ff}]/I +Capture group count = 0 +Starting code units: \xff +Subject length lower bound = 1 + +/[Ss]/I +Capture group count = 0 +First code unit = 'S' (caseless) +Subject length lower bound = 1 + +/[Ss]/I,utf +Capture group count = 0 +Options: utf +Starting code units: S s +Subject length lower bound = 1 + +/(?:\x{ff}|\x{3000})/I,utf +Capture group count = 0 +Options: utf +Starting code units: \xff +Subject length lower bound = 1 + +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I +Capture group count = 0 +Options: caseless +First code unit = \x{120} +Subject length lower bound = 1 + +/\x{c1}/i,I,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/[\x{120}\x{121}]/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{120} + Ket + End +------------------------------------------------------------------ + +/[ab\x{120}]+/iB,ucp +------------------------------------------------------------------ + Bra + [ABab\x{120}-\x{121}]++ + Ket + End +------------------------------------------------------------------ + aABb\x{121}\x{120} + 0: aABb\x{121}\x{120} + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + 1: >\x{120}< + +/s/i,ucp + \x{17f} + 0: \x{17f} + +/s/i,utf + \x{17f} + 0: \x{17f} + +/[^s]/i,ucp +\= Expect no match + \x{17f} +No match + +/[^s]/i,utf +\= Expect no match + \x{17f} +No match + +# ---------------------------------------------------- + +# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This +# fails in 16-bit mode, but is OK for 32-bit. + +/\x{802a0000}*/ + \x{802a0000}\x{802a0000} + 0: \x{802a0000}\x{802a0000} + +# UTF matching without UTF, check invalid UTF characters +/\X++/ + a\x{110000}\x{ffffffff} + 0: a\x{110000}\x{ffffffff} + +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz + 0: \x{ffffffff} + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + 0: \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} +No match + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +No match + +# --------------------------------------------------------- + +# End of testinput12 diff --git a/testdata/testoutput13 b/testdata/testoutput13 new file mode 100644 index 0000000..f737ebe --- /dev/null +++ b/testdata/testoutput13 @@ -0,0 +1,27 @@ +# These DFA tests are for the handling of characters greater than 255 in +# 16-bit or 32-bit, non-UTF mode. + +#forbid_utf +#subject dfa + +/^\x{ffff}+/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}?/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}*/i + \x{ffff} + 0: \x{ffff} + +/^\x{ffff}{3}/i + \x{ffff}\x{ffff}\x{ffff} + 0: \x{ffff}\x{ffff}\x{ffff} + +/^\x{ffff}{0,3}/i + \x{ffff} + 0: \x{ffff} + +# End of testinput13 diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 new file mode 100644 index 0000000..dd1a977 --- /dev/null +++ b/testdata/testoutput14-16 @@ -0,0 +1,163 @@ +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. + +#subject dfa + +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + +/X/utf + XX\x{d800} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{d800}\=offset=3 +No match + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + XX\x{d800}\x{1234} +Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 + +/badutf/utf + X\xdf +No match + XX\xef +No match + XXX\xef\x80 +No match + X\xf7 +No match + XX\xf7\x80 +No match + XXX\xf7\x80\x80 +No match + +/shortutf/utf + XX\xdf\=ph +No match + XX\xef\=ph +No match + XX\xef\x80\=ph +No match + \xf7\=ph +No match + \xf7\x80\=ph +No match + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# ---------------------------------------------------- + +# End of testinput14 diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32 new file mode 100644 index 0000000..dc21569 --- /dev/null +++ b/testdata/testoutput14-32 @@ -0,0 +1,159 @@ +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. + +#subject dfa + +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + +/X/utf + XX\x{d800} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{d800}\=offset=3 +No match + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2 + XX\x{d800}\x{1234} +Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 + +/badutf/utf + X\xdf +No match + XX\xef +No match + XXX\xef\x80 +No match + X\xf7 +No match + XX\xf7\x80 +No match + XXX\xf7\x80\x80 +No match + +/shortutf/utf + XX\xdf\=ph +No match + XX\xef\=ph +No match + XX\xef\x80\=ph +No match + \xf7\=ph +No match + \xf7\x80\=ph +No match + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + 0: \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} +No match + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +No match + +# ---------------------------------------------------- + +# End of testinput14 diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8 new file mode 100644 index 0000000..69285db --- /dev/null +++ b/testdata/testoutput14-8 @@ -0,0 +1,163 @@ +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. + +#subject dfa + +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + +/X/utf + XX\x{d800} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{d800}\=offset=3 +Error -36 (bad UTF-8 offset) + XX\x{d800}\=no_utf_check + 0: X + XX\x{da00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{da00}\=no_utf_check + 0: X + XX\x{dc00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dc00}\=no_utf_check + 0: X + XX\x{de00} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{de00}\=no_utf_check + 0: X + XX\x{dfff} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + XX\x{dfff}\=no_utf_check + 0: X + XX\x{110000} +Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 + XX\x{d800}\x{1234} +Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 + +/badutf/utf + X\xdf +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 + XX\xef +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XXX\xef\x80 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 + X\xf7 +Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1 + XX\xf7\x80 +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XXX\xf7\x80\x80 +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 + +/shortutf/utf + XX\xdf\=ph +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 + XX\xef\=ph +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 + XX\xef\x80\=ph +Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 + \xf7\=ph +Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 + \xf7\x80\=ph +Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121} + +/[^\x{120}]/i +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large + \x{121} + +/[^\x{120}]/i,ucp +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121}\x{121} + +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# ---------------------------------------------------- + +# End of testinput14 diff --git a/testdata/testoutput15 b/testdata/testoutput15 new file mode 100644 index 0000000..943ab72 --- /dev/null +++ b/testdata/testoutput15 @@ -0,0 +1,540 @@ +# These are: +# +# (1) Tests of the match-limiting features. The results are different for +# interpretive or JIT matching, so this test should not be run with JIT. The +# same tests are run using JIT in test 17. + +# (2) Other tests that must not be run with JIT. + +# These tests are first so that they don't inherit a large enough heap frame +# vector from a previous test. + +/(*LIMIT_HEAP=21)\[(a)]{60}/expand + \[a]{60} +Failed: error -63: heap limit exceeded + +"(*LIMIT_HEAP=21)()((?))()()()()()()()()()()()()()()()()()()()()()()()(())()()()()()()()()()()()()()()()()()()()()()(())()()()()()()()()()()()()()" + xx +Failed: error -63: heap limit exceeded + +# ----------------------------------------------------------------------- + +/(a+)*zz/I +Capture group count = 1 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits_noheap +Minimum match limit = 7 +Minimum depth limit = 7 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + aaaaaaaaaaaaaz\=find_limits_noheap +Minimum match limit = 20481 +Minimum depth limit = 30 +No match + +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + /* this is a C style comment */\=find_limits_noheap +Minimum match limit = 64 +Minimum depth limit = 7 + 0: /* this is a C style comment */ + 1: /* this is a C style comment */ + +/^(?>a)++/ + aa\=find_limits_noheap +Minimum match limit = 5 +Minimum depth limit = 3 + 0: aa + aaaaaaaaa\=find_limits_noheap +Minimum match limit = 12 +Minimum depth limit = 3 + 0: aaaaaaaaa + +/(a)(?1)++/ + aa\=find_limits_noheap +Minimum match limit = 7 +Minimum depth limit = 5 + 0: aa + 1: a + aaaaaaaaa\=find_limits_noheap +Minimum match limit = 21 +Minimum depth limit = 5 + 0: aaaaaaaaa + 1: a + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap +Minimum match limit = 24 +Minimum depth limit = 3 + 0: abbbbbbbbbbbbbbbbbbbbba + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap +Minimum match limit = 66 +Minimum depth limit = 45 + 0: abbbbbbbbbbbbbbbbbbbbba + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits_noheap +Minimum match limit = 66 +Minimum depth limit = 45 + 0: abbbbbbbbbbbbbbbbbbbbba + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits_noheap +Minimum match limit = 7 +Minimum depth limit = 7 + 0: aabbccddee + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits_noheap +Minimum match limit = 12 +Minimum depth limit = 12 + 0: aabbccddee + 1: aa + 2: bb + 3: cc + 4: dd + 5: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits_noheap +Minimum match limit = 10 +Minimum depth limit = 10 + 0: aabbccddee + 1: aa + 2: cc + 3: ee + +/(*LIMIT_MATCH=12bc)abc/ +Failed: error 160 at offset 17: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=4294967290)abc/ +Failed: error 160 at offset 24: (*VERB) not recognized or malformed + +/(*LIMIT_DEPTH=4294967280)abc/I +Capture group count = 0 +Depth limit = 4294967280 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +No match +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(a+)*zz/ +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded + +/(*LIMIT_MATCH=3000)(a+)*zz/I +Capture group count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect limit exceeded + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +Capture group count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect limit exceeded + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(a+)*zz/I +Capture group count = 1 +Match limit = 60000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect no match + aaaaaaaaaaaaaz +No match +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(*LIMIT_DEPTH=10)(a+)*zz/I +Capture group count = 1 +Depth limit = 10 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect limit exceeded + aaaaaaaaaaaaaz +Failed: error -53: matching depth limit exceeded +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=1000 +Failed: error -53: matching depth limit exceeded + +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I +Capture group count = 1 +Depth limit = 1000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect no match + aaaaaaaaaaaaaz +No match + +/(*LIMIT_DEPTH=1000)(a+)*zz/I +Capture group count = 1 +Depth limit = 1000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +\= Expect no match + aaaaaaaaaaaaaz +No match +\= Expect limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded + +# These three have infinitely nested recursions. + +/((?2))((?1))/ + abc +Failed: error -52: nested recursion at the same subject position + +/((?(R2)a+|(?1)b))()/ + aaaabcde +Failed: error -52: nested recursion at the same subject position + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -52: nested recursion at the same subject position + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + 0: abcxyz + >>> + abcxyzpqr\=aftertext + 0: abcxyz + >>> + 0+ xyzpqr + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + 0: pqrabcxyz + <<< >>> + xyzpqrabcxyzpqr\=aftertext + 0: pqrabcxyz + <<< >>> + 0+ xyzpqr + +/a\b/ + a.\=allusedtext + 0: a. + > + a\=allusedtext + 0: a + +/abc\Kxyz/ + abcxyz\=allusedtext + 0: abcxyz + <<< + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + 0: abcxyz + >>> + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + 0: abcabcde + >>>>> + +#subject allusedtext + +/(?<=abc)123/ + xyzabc123pqr + 0: abc123 + <<< + xyzabc12\=ps +Partial match: abc12 + <<< + xyzabc12\=ph +Partial match: abc12 + <<< + +/\babc\b/ + +++abc+++ + 0: +abc+ + < > + +++ab\=ps +Partial match: +ab + < + +++ab\=ph +Partial match: +ab + < + +/(?<=abc)def/ + abc\=ph +Partial match: abc + <<< + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph +Partial match, mark=xx: 123a + <<< + xxxx123a\=ps +Partial match, mark=xx: 123a + <<< + +/(?<=(?<=a)b)c.*/I +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: abc + << +\= Expect no match + xbc\=ph +No match + +/(?<=ab)c.*/I +Capture group count = 0 +Max lookbehind = 2 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: abc + << +\= Expect no match + xbc\=ph +No match + +/abc(?<=bc)def/ + xxxabcd\=ph +Partial match: abcd + +/(?<=ab)cdef/ + xxabcd\=ph +Partial match: abcd + << + +/(?<=(?<=(?<=a)b)c)./I +Capture group count = 0 +Max lookbehind = 1 +Subject length lower bound = 1 + 123abcXYZ + 0: abcX + <<< + +/(?<=ab(cd(?<=...)))./I +Capture group count = 1 +Max lookbehind = 4 +Subject length lower bound = 1 + abcdX + 0: abcdX + <<<< + 1: cd + +/(?<=ab((?<=...)cd))./I +Capture group count = 1 +Max lookbehind = 4 +Subject length lower bound = 1 + ZabcdX + 0: ZabcdX + <<<<< + 1: cd + +/(?<=((?<=(?<=ab).))(?1)(?1))./I +Capture group count = 1 +Max lookbehind = 2 +Subject length lower bound = 1 + abxZ + 0: abxZ + <<< + 1: + +#subject +# ------------------------------------------------------------------- + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + abcd +Failed: error -52: nested recursion at the same subject position + +/(a|(?R))/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: a + 1: a + defg +Failed: error -52: nested recursion at the same subject position + +/(ab|(bc|(de|(?R))))/I +Capture group count = 3 +May match empty string +Subject length lower bound = 0 + abcd + 0: ab + 1: ab + fghi +Failed: error -52: nested recursion at the same subject position + +/(ab|(bc|(de|(?1))))/I +Capture group count = 3 +May match empty string +Subject length lower bound = 0 + abcd + 0: ab + 1: ab + fghi +Failed: error -52: nested recursion at the same subject position + +/x(ab|(bc|(de|(?1)x)x)x)/I +Capture group count = 3 +First code unit = 'x' +Subject length lower bound = 3 + xab123 + 0: xab + 1: ab + xfghi +Failed: error -52: nested recursion at the same subject position + +/(?!\w)(?R)/ + abcd +Failed: error -52: nested recursion at the same subject position + =abc +Failed: error -52: nested recursion at the same subject position + +/(?=\w)(?R)/ + =abc +Failed: error -52: nested recursion at the same subject position + abcd +Failed: error -52: nested recursion at the same subject position + +/(?abc + 1 ^ ^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^^ End of pattern +No match + +/(*NO_AUTO_POSSESS)\w+(?C1)/BI +------------------------------------------------------------------ + Bra + \w+ + Callout 1 26 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: no_auto_possess +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + abc\=callout_fail=1 +--->abc + 1 ^ ^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^ ^ End of pattern + 1 ^^ End of pattern + 1 ^^ End of pattern +No match + +# This test breaks the JIT stack limit + +/(|]+){2,2452}/ + (|]+){2,2452} + 0: + 1: + +/b(? + abcz + 0: abcz + < >> + +# This test triggers the recursion limit in the interpreter, but completes in +# JIT. It's in testinput2 with disable_recurse_loop_check to get it to work +# in the interpreter. + +/(a(?1)z||(?1)++)$/ + abcd +Failed: error -52: nested recursion at the same subject position + +# End of testinput15 diff --git a/testdata/testoutput16 b/testdata/testoutput16 new file mode 100644 index 0000000..54c9f18 --- /dev/null +++ b/testdata/testoutput16 @@ -0,0 +1,18 @@ +# This test is run only when JIT support is not available. It checks that an +# attempt to use it has the expected behaviour. It also tests things that +# are different without JIT. + +/abc/I,jit,jitverify +JIT compilation was not successful (bad JIT option) +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 +JIT support is not available in this version of PCRE2 + +/a*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +# End of testinput16 diff --git a/testdata/testoutput17 b/testdata/testoutput17 new file mode 100644 index 0000000..00c4bd4 --- /dev/null +++ b/testdata/testoutput17 @@ -0,0 +1,570 @@ +# This test is run only when JIT support is available. It checks JIT complete +# and partial modes, and things that are different with JIT. + +#pattern jitverify + +# JIT does not support this pattern (callout at start of condition). + +/(?(?C1)(?=a)a)/I +JIT compilation was not successful (no more memory) +Capture group count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was not successful (no more memory) + +# The following pattern cannot be compiled by JIT. + +/b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*b*/I +JIT compilation was not successful (no more memory) +Capture group count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was not successful (no more memory) + +# Check that an infinite recursion loop is caught. + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -46: JIT stack limit reached + +/abcd/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was successful + abcd + 0: abcd (JIT) +\= Expect no match + xyz +No match (JIT) + +/(*NO_JIT)abcd/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was not successful + abcd + 0: abcd +\= Expect no match + xyz +No match + +/abcd/ + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jitfast + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=1 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match (JIT) + xyz\=ps +No match + +/abcd/jit=1,jitfast + abcd + 0: abcd (JIT) + ab\=ps +Failed: error -45: bad JIT option + ab\=ph +Failed: error -45: bad JIT option + xyz\=ps +Failed: error -45: bad JIT option +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=2 + abcd + 0: abcd + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match + +/abcd/jit=2,jitfast + abcd +Failed: error -45: bad JIT option + ab\=ps +Partial match: ab (JIT) + ab\=ph +Failed: error -45: bad JIT option + xyz +Failed: error -45: bad JIT option + +/abcd/jit=3 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=4 + abcd + 0: abcd + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match + +/abcd/jit=5 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/jit=6 + abcd + 0: abcd + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match + +/abcd/jit=7 + abcd + 0: abcd (JIT) + ab\=ps +Partial match: ab (JIT) + ab\=ph +Partial match: ab (JIT) +\= Expect no match + xyz +No match (JIT) + +/abcd/I,jit=2 +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +JIT compilation was successful + +/(*NO_START_OPT)a(*:m)b/mark +\= Expect no match + a +No match, mark = m (JIT) + +/^12345678abcd/m + 12345678abcd + 0: 12345678abcd (JIT) + +# Limits tests that give different output with JIT. + +/(a+)*zz/I +Capture group count = 1 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits +Minimum match limit = 2 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT) + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +\= Expect no match + aaaaaaaaaaaaaz\=find_limits +Minimum match limit = 16383 +No match (JIT) + +!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + /* this is a C style comment */\=find_limits +Minimum match limit = 29 + 0: /* this is a C style comment */ (JIT) + 1: /* this is a C style comment */ + +/^(?>a)++/ + aa\=find_limits +Minimum match limit = 1 + 0: aa (JIT) + aaaaaaaaa\=find_limits +Minimum match limit = 1 + 0: aaaaaaaaa (JIT) + +/(a)(?1)++/ + aa\=find_limits +Minimum match limit = 1 + 0: aa (JIT) + 1: a + aaaaaaaaa\=find_limits +Minimum match limit = 1 + 0: aaaaaaaaa (JIT) + 1: a + +/a(?:.)*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) + +/a(?:.(*THEN))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) + +/a(?:.(*THEN:ABC))*?a/ims + abbbbbbbbbbbbbbbbbbbbba\=find_limits +Minimum match limit = 22 + 0: abbbbbbbbbbbbbbbbbbbbba (JIT) + +/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + +/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: bb + 3: cc + 4: dd + 5: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + +/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast + aabbccddee\=find_limits +Minimum match limit = 5 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + aabbccddee\=jitstack=1 + 0: aabbccddee (JIT) + 1: aa + 2: cc + 3: ee + +/(a+)*zz/ +\= Expect no match + aaaaaaaaaaaaaz +No match (JIT) +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=3000)(a+)*zz/I +Capture group count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful +\= Expect limit exceeded + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=60000 +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I +Capture group count = 1 +Match limit = 3000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful +\= Expect limit exceeded + aaaaaaaaaaaaaz +Failed: error -47: match limit exceeded + +/(*LIMIT_MATCH=60000)(a+)*zz/I +Capture group count = 1 +Match limit = 60000 +Starting code units: a z +Last code unit = 'z' +Subject length lower bound = 2 +JIT compilation was successful +\= Expect no match + aaaaaaaaaaaaaz +No match (JIT) +\= Expect limit exceeded + aaaaaaaaaaaaaz\=match_limit=3000 +Failed: error -47: match limit exceeded + +# These three have infinitely nested recursions. + +/((?2))((?1))/ +\= Expect JIT stack limit reached + abc +Failed: error -46: JIT stack limit reached + +/((?(R2)a+|(?1)b))()/ +\= Expect JIT stack limit reached + aaaabcde +Failed: error -46: JIT stack limit reached + +/(?(R)a*(?1)|((?R))b)/ +\= Expect JIT stack limit reached + aaaabcde +Failed: error -46: JIT stack limit reached + +# Invalid options disable JIT when called via pcre2_match(), causing the +# match to happen via the interpreter, but for fast JIT invalid options are +# ignored, so an unanchored match happens. + +/abcd/ + abcd\=anchored + 0: abcd +\= Expect no match + fail abcd\=anchored +No match + +/abcd/jitfast + abcd\=anchored + 0: abcd (JIT) + succeed abcd\=anchored + 0: abcd (JIT) + +# Push/pop does not lose the JIT information, though jitverify applies only to +# compilation, but serializing (save/load) discards JIT data completely. + +/^abc\Kdef/info,push +** Applies only to compile when pattern is stacked with 'push': jitverify +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 6 +JIT compilation was successful +#pop jitverify + abcdef + 0: def (JIT) + +/^abc\Kdef/info,push +** Applies only to compile when pattern is stacked with 'push': jitverify +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 6 +JIT compilation was successful +#save testsaved1 +#load testsaved1 +#pop jitverify + abcdef + 0: def + +#load testsaved1 +#pop jit,jitverify + abcdef + 0: def (JIT) + +/abcd/pushcopy,jitverify +** Applies only to compile when pattern is stacked with 'push': jitverify + abcd + 0: abcd (JIT) + +#pop jitverify + abcd + 0: abcd + +# Test pattern compilation + +/(?:a|b|c|d|e)(?R)/jit=1 + +/(?:a|b|c|d|e)(?R)(?R)/jit=1 + +/(a(?:a|b|c|d|e)b){8,16}/jit=1 + +/(?:|a|){100}x/jit=1 + +# These tests provoke recursion loops, which give a different error message +# when JIT is used. + +/(?R)/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd +Failed: error -46: JIT stack limit reached + +/(a|(?R))/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: a (JIT) + 1: a + defg +Failed: error -46: JIT stack limit reached + +/(ab|(bc|(de|(?R))))/I +Capture group count = 3 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: ab (JIT) + 1: ab + fghi +Failed: error -46: JIT stack limit reached + +/(ab|(bc|(de|(?1))))/I +Capture group count = 3 +May match empty string +Subject length lower bound = 0 +JIT compilation was successful + abcd + 0: ab (JIT) + 1: ab + fghi +Failed: error -46: JIT stack limit reached + +/x(ab|(bc|(de|(?1)x)x)x)/I +Capture group count = 3 +First code unit = 'x' +Subject length lower bound = 3 +JIT compilation was successful + xab123 + 0: xab (JIT) + 1: ab + xfghi +Failed: error -46: JIT stack limit reached + +/(?!\w)(?R)/ + abcd +Failed: error -46: JIT stack limit reached + =abc +Failed: error -46: JIT stack limit reached + +/(?=\w)(?R)/ + =abc +Failed: error -46: JIT stack limit reached + abcd +Failed: error -46: JIT stack limit reached + +/(? + 3: def + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox +\= Expect no match + The Quick Brown Fox +No match: POSIX code 17: match failed + +/the quick brown fox/i + the quick brown fox + 0: the quick brown fox + The Quick Brown Fox + 0: The Quick Brown Fox + +/(*LF)abc.def/ +\= Expect no match + abc\ndef +No match: POSIX code 17: match failed + +/(*LF)abc$/ + abc + 0: abc + abc\n + 0: abc + +/(abc)\2/ +Failed: POSIX code 15: bad back reference at offset 6 + +/(abc\1)/ +\= Expect no match + abc +No match: POSIX code 17: match failed + +/a*(b+)(z)(z)/ + aaaabbbbzzzz + 0: aaaabbbbzz + 1: bbbb + 2: z + 3: z + aaaabbbbzzzz\=ovector=0 +Matched without capture + aaaabbbbzzzz\=ovector=1 + 0: aaaabbbbzz + aaaabbbbzzzz\=ovector=2 + 0: aaaabbbbzz + 1: bbbb + +/(*ANY)ab.cd/ + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd +\= Expect no match + ab\ncd +No match: POSIX code 17: match failed + +/ab.cd/s + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd + ab\ncd + 0: ab\x0acd + +/a(b)c/posix_nosub + abc +Matched with REG_NOSUB + +/a(?Pb)c/posix_nosub + abc +Matched with REG_NOSUB + +/(a)\1/posix_nosub + zaay +Matched with REG_NOSUB + +/a?|b?/ + abc + 0: a +\= Expect no match + ddd\=notempty +No match: POSIX code 17: match failed + +/\w+A/ + CDAAAAB + 0: CDAAAA + +/\w+A/ungreedy + CDAAAAB + 0: CDA + +/\Biss\B/I,aftertext +** Ignored with POSIX interface: info + Mississippi + 0: iss + 0+ issippi + +/abc/\ +Failed: POSIX code 9: bad escape sequence at offset 4 + +"(?(?C)" +Failed: POSIX code 11: unbalanced () at offset 6 + +"(?(?C))" +Failed: POSIX code 3: pattern error at offset 6 + +/abcd/substitute_extended +** Ignored with POSIX interface: substitute_extended + +/\[A]{1000000}**/expand,regerror_buffsize=31 +Failed: POSIX code 4: ? * + invalid at offset 100000 +** regerror() message truncated + +/\[A]{1000000}**/expand,regerror_buffsize=32 +Failed: POSIX code 4: ? * + invalid at offset 1000001 + +//posix_nosub + \=offset=70000 +** Ignored with POSIX interface: offset +Matched with REG_NOSUB + +/^d(e)$/posix + acdef\=posix_startend=2:4 + 0: de + 1: e + acde\=posix_startend=2 + 0: de + 1: e +\= Expect no match + acdef +No match: POSIX code 17: match failed + acdef\=posix_startend=2 +No match: POSIX code 17: match failed + +/^a\x{00}b$/posix + a\x{00}b\=posix_startend=0:3 + 0: a\x00b + +/"A" 00 "B"/hex + A\x{00}B\=posix_startend=0:3 + 0: A\x00B + +/ABC/use_length + ABC + 0: ABC + +/a\b(c/literal,posix + a\\b(c + 0: a\b(c + +/a\b(c/literal,posix,dotall +Failed: POSIX code 16: bad argument at offset 0 + +/((a)(b)?(c))/posix + 123ace + 0: ac + 1: ac + 2: a + 3: + 4: c + 123ace\=posix_startend=2:6 + 0: ac + 1: ac + 2: a + 3: + 4: c + +//posix +\= Expect errors + \=null_subject +No match: POSIX code 16: bad argument + abc\=null_subject +No match: POSIX code 16: bad argument + +/(*LIMIT_HEAP=0)xx/posix +\= Expect error + xxxx +No match: POSIX code 14: failed to get memory + +# End of testdata/testinput18 diff --git a/testdata/testoutput19 b/testdata/testoutput19 new file mode 100644 index 0000000..25aa67d --- /dev/null +++ b/testdata/testoutput19 @@ -0,0 +1,30 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). + +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + 0: a\x{1234}b + +/\w/ +\= Expect no match + +++\x{c2} +No match: POSIX code 17: match failed + +/\w/ucp + +++\x{c2} + 0: \xc2 + +/"^AB" 00 "\x{1234}$"/hex,utf + AB\x{00}\x{1234}\=posix_startend=0:6 + 0: AB\x{00}\x{1234} + +/\w/utf +\= Expect UTF error + A\xabB +No match: POSIX code 16: bad argument + +# End of testdata/testinput19 diff --git a/testdata/testoutput2 b/testdata/testoutput2 new file mode 100644 index 0000000..8375668 --- /dev/null +++ b/testdata/testoutput2 @@ -0,0 +1,18070 @@ +# This set of tests is not Perl-compatible. It checks on special features +# of PCRE2's API, error diagnostics, and the compiled code of some patterns. +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl +# bug. + +# NOTE: This is a non-UTF set of tests. When UTF support is needed, use +# test 5. + +#forbid_utf +#newline_default lf any anycrlf + +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex +------------------------------------------------------------------ + Bra + DE + Ket + End +------------------------------------------------------------------ + DE + 0: DE + +/(a)b|/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + +/abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + defabc + 0: abc + abc\=anchored + 0: abc +\= Expect no match + defabc\=anchored +No match + ABC +No match + +/^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc + abc\=anchored + 0: abc +\= Expect no match + defabc +No match + defabc\=anchored +No match + +/a+bc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/a*bc/I +Capture group count = 0 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/a{3}bc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 5 + +/(abc|a+z)/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/^abc$/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc +\= Expect no match + def\nabc +No match + +/ab\idef/ +Failed: error 103 at offset 3: unrecognized character follows \ + +/(?X)ab\idef/ +Failed: error 111 at offset 2: unrecognized character after (? or (?- + +/x{5,4}/ +Failed: error 104 at offset 5: numbers out of order in {} quantifier + +/z{65536}/ +Failed: error 105 at offset 7: number too big in {} quantifier + +/[abcd/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\B]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\R]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\X]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[z-a]/ +Failed: error 108 at offset 3: range out of order in character class + +/^*/ +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/(abc/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?# abc/ +Failed: error 118 at offset 7: missing ) after (?# comment + +/(?z)abc/ +Failed: error 111 at offset 2: unrecognized character after (? or (?- + +/.*b/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 1 + +/.*?b/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 1 + +/cat|dog|elephant/I +Capture group count = 0 +Starting code units: c d e +Subject length lower bound = 3 + this sentence eventually mentions a cat + 0: cat + this sentences rambles on and on for a while and then reaches elephant + 0: elephant + +/cat|dog|elephant/I +Capture group count = 0 +Starting code units: c d e +Subject length lower bound = 3 + this sentence eventually mentions a cat + 0: cat + this sentences rambles on and on for a while and then reaches elephant + 0: elephant + +/cat|dog|elephant/Ii +Capture group count = 0 +Options: caseless +Starting code units: C D E c d e +Subject length lower bound = 3 + this sentence eventually mentions a CAT cat + 0: CAT + this sentences rambles on and on for a while to elephant ElePhant + 0: elephant + +/a|[bcd]/I +Capture group count = 0 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a|[^\dZ])/I +Capture group count = 1 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y [ \ ] ^ _ ` a b c d + e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 + \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/(a|b)*[\s]/I +Capture group count = 1 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b +Subject length lower bound = 1 + +/(ab\2)/ +Failed: error 115 at offset 4: reference to non-existent subpattern + +/{4,5}abc/ +Failed: error 109 at offset 4: quantifier does not follow a repeatable item + +/(a)(b)(c)\2/I +Capture group count = 3 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 4 + abcb + 0: abcb + 1: a + 2: b + 3: c + abcb\=ovector=0 + 0: abcb + 1: a + 2: b + 3: c + abcb\=ovector=1 +Matched, but too many substrings + 0: abcb + abcb\=ovector=2 +Matched, but too many substrings + 0: abcb + 1: a + abcb\=ovector=3 +Matched, but too many substrings + 0: abcb + 1: a + 2: b + abcb\=ovector=4 + 0: abcb + 1: a + 2: b + 3: c + +/(a)bc|(a)(b)\2/I +Capture group count = 3 +Max back reference = 2 +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc + 1: a + abc\=ovector=0 + 0: abc + 1: a + abc\=ovector=1 +Matched, but too many substrings + 0: abc + abc\=ovector=2 + 0: abc + 1: a + aba + 0: aba + 1: + 2: a + 3: b + aba\=ovector=0 + 0: aba + 1: + 2: a + 3: b + aba\=ovector=1 +Matched, but too many substrings + 0: aba + aba\=ovector=2 +Matched, but too many substrings + 0: aba + 1: + aba\=ovector=3 +Matched, but too many substrings + 0: aba + 1: + 2: a + aba\=ovector=4 + 0: aba + 1: + 2: a + 3: b + +/abc$/I,dollar_endonly +Capture group count = 0 +Options: dollar_endonly +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc +\= Expect no match + abc\n +No match + abc\ndef +No match + +/(a)(b)(c)(d)(e)\6/ +Failed: error 115 at offset 16: reference to non-existent subpattern + +/the quick brown fox/I +Capture group count = 0 +First code unit = 't' +Last code unit = 'x' +Subject length lower bound = 19 + the quick brown fox + 0: the quick brown fox + this is a line with the quick brown fox + 0: the quick brown fox + +/the quick brown fox/I,anchored +Capture group count = 0 +Options: anchored +First code unit = 't' +Subject length lower bound = 19 + the quick brown fox + 0: the quick brown fox +\= Expect no match + this is a line with the quick brown fox +No match + +/ab(?z)cd/ +Failed: error 111 at offset 4: unrecognized character after (? or (?- + +/^abc|def/I +Capture group count = 0 +Starting code units: a d +Subject length lower bound = 3 + abcdef + 0: abc + abcdef\=notbol + 0: def + +/.*((abc)$|(def))/I +Capture group count = 3 +First code unit at start or follows newline +Subject length lower bound = 3 + defabc + 0: defabc + 1: abc + 2: abc + defabc\=noteol + 0: def + 1: def + 2: + 3: def + +/)/ +Failed: error 122 at offset 0: unmatched closing parenthesis + +/a[]b/ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[^aeiou ]{3,}/I +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 + 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ + \ ] ^ _ ` b c d f g h j k l m n p q r s t v w x y z { | } ~ \x7f \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 3 + co-processors, and for + 0: -pr + +/<.*>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: ghi + +/<.*?>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/<.*>/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/(?U)<.*>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/<.*?>/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: ghi + +/={3,}/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '=' +Last code unit = '=' +Subject length lower bound = 3 + abc========def + 0: === + +/(?U)={3,}?/I +Capture group count = 0 +First code unit = '=' +Last code unit = '=' +Subject length lower bound = 3 + abc========def + 0: ======== + +/(? +Overall options: anchored +First code unit = '1' +Subject length lower bound = 4 + +/(^b|(?i)^d)/I +Capture group count = 1 +Compile options: +Overall options: anchored +Starting code units: D b d +Subject length lower bound = 1 + +/(?s).*/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/[abcd]/I +Capture group count = 0 +Starting code units: a b c d +Subject length lower bound = 1 + +/(?i)[abcd]/I +Capture group count = 0 +Starting code units: A B C D a b c d +Subject length lower bound = 1 + +/(?m)[xy]|(b|c)/I +Capture group count = 1 +Starting code units: b c x y +Subject length lower bound = 1 + +/(^a|^b)/Im +Capture group count = 1 +Options: multiline +First code unit at start or follows newline +Subject length lower bound = 1 + +/(?i)(^a|^b)/Im +Capture group count = 1 +Options: multiline +First code unit at start or follows newline +Subject length lower bound = 1 + +/(a)(?(1)a|b|c)/ +Failed: error 127 at offset 3: conditional subpattern contains more than two branches + +/(?(?=a)a|b|c)/ +Failed: error 127 at offset 0: conditional subpattern contains more than two branches + +/(?(1a)/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(1a))/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(?i))/ +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +/(?(abc))/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/(?(? +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + aaaaabbbbbcccccdef + 0: aaaaabbbbbcccccdef + 1: aaaaabbbbbcccccdef + 2: aaaaa + 3: b + 4: bbbbccccc + 5: def + +/(?<=foo)[ab]/I +Capture group count = 0 +Max lookbehind = 3 +Starting code units: a b +Subject length lower bound = 1 + +/(?^abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + def\nabc + 0: abc +\= Expect no match + defabc +No match + +/(?<=ab(c+)d)ef/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(?<=ab(?<=c+)d)ef/ +Failed: error 125 at offset 6: length of lookbehind assertion is not limited + +/The next three are in testinput2 because they have variable length branches/ + +/(?<=bullock|donkey)-cart/I +Capture group count = 0 +Max lookbehind = 7 +First code unit = '-' +Last code unit = 't' +Subject length lower bound = 5 + the bullock-cart + 0: -cart + a donkey-cart race + 0: -cart +\= Expect no match + cart +No match + horse-and-cart +No match + +/(?<=ab(?i)x|y|z)/I +Capture group count = 0 +Max lookbehind = 3 +May match empty string +Subject length lower bound = 0 + +/(?>.*)(?<=(abcd)|(xyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + alphabetabcd + 0: alphabetabcd + 1: abcd + endingxyz + 0: endingxyz + 1: + 2: xyz + +/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I +Capture group count = 0 +Max lookbehind = 4 +First code unit = 'Z' +Last code unit = 'Z' +Subject length lower bound = 2 + abxyZZ + 0: ZZ + abXyZZ + 0: ZZ + ZZZ + 0: ZZ + zZZ + 0: ZZ + bZZ + 0: ZZ + BZZ + 0: ZZ +\= Expect no match + ZZ +No match + abXYZZ +No match + zzz +No match + bzz +No match + +/(? +Overall options: anchored +Starting code units: a b +Subject length lower bound = 4 + adef\=get=1,get=2,get=3,get=4,getall + 0: adef + 1: a + 2: + 3: f + 1G a (1) +Get substring 2 failed (-55): requested value is not set + 3G f (1) +Get substring 4 failed (-49): unknown substring + 0L adef + 1L a + 2L + 3L f + bcdef\=get=1,get=2,get=3,get=4,getall + 0: bcdef + 1: bc + 2: bc + 3: f + 1G bc (2) + 2G bc (2) + 3G f (1) +Get substring 4 failed (-49): unknown substring + 0L bcdef + 1L bc + 2L bc + 3L f + adefghijk\=copy=0 + 0: adef + 1: a + 2: + 3: f + 0C adef (4) + +/^abc\00def/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 7 + abc\00def\=copy=0,getall + 0: abc\x00def + 0C abc\x00def (7) + 0L abc\x00def + +/word ((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ +)((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ +)?)?)?)?)?)?)?)?)?otherword/I +Capture group count = 8 +Contains explicit CR or LF match +First code unit = 'w' +Last code unit = 'd' +Subject length lower bound = 14 + +/.*X/IB +------------------------------------------------------------------ + Bra + Any* + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'X' +Subject length lower bound = 1 + +/.*X/IBs +------------------------------------------------------------------ + Bra + AllAny* + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'X' +Subject length lower bound = 1 + +/(.*X|^B)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Any* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit at start or follows newline +Subject length lower bound = 1 + +/(.*X|^B)/IBs +------------------------------------------------------------------ + Bra + CBra 1 + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/(?s)(.*X|^B)/IB +------------------------------------------------------------------ + Bra + CBra 1 + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + +/(?s:.*X|^B)/IB +------------------------------------------------------------------ + Bra + Bra + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + +/\Biss\B/I,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/iss/I,aftertext,altglobal +Capture group count = 0 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi + +/\Biss\B/I,aftertext,altglobal +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/\Biss\B/Ig,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi +\= Expect no match + Mississippi\=anchored +No match + +/(?<=[Ms])iss/Ig,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi + +/(?<=[Ms])iss/I,aftertext,altglobal +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/^iss/Ig,aftertext +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'i' +Subject length lower bound = 3 + ississippi + 0: iss + 0+ issippi + +/.*iss/Ig,aftertext +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 's' +Subject length lower bound = 3 + abciss\nxyzisspqr + 0: abciss + 0+ \x0axyzisspqr + 0: xyziss + 0+ pqr + +/.i./Ig,aftertext +Capture group count = 0 +Last code unit = 'i' +Subject length lower bound = 3 + Mississippi + 0: Mis + 0+ sissippi + 0: sis + 0+ sippi + 0: sip + 0+ pi + Mississippi\=anchored + 0: Mis + 0+ sissippi + 0: sis + 0+ sippi + 0: sip + 0+ pi + Missouri river + 0: Mis + 0+ souri river + 0: ri + 0+ river + 0: riv + 0+ er + Missouri river\=anchored + 0: Mis + 0+ souri river + +/^.is/Ig,aftertext +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 3 + Mississippi + 0: Mis + 0+ sissippi + +/^ab\n/Ig,aftertext +Capture group count = 0 +Contains explicit CR or LF match +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + ab\nab\ncd + 0: ab\x0a + 0+ ab\x0acd + +/^ab\n/Igm,aftertext +Capture group count = 0 +Contains explicit CR or LF match +Options: multiline +First code unit at start or follows newline +Last code unit = \x0a +Subject length lower bound = 3 + ab\nab\ncd + 0: ab\x0a + 0+ ab\x0acd + 0: ab\x0a + 0+ cd + +/^/gm,newline=any + a\rb\nc\r\nxyz\=aftertext + 0: + 0+ a\x0db\x0ac\x0d\x0axyz + 0: + 0+ b\x0ac\x0d\x0axyz + 0: + 0+ c\x0d\x0axyz + 0: + 0+ xyz + +/abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc|bac/I +Capture group count = 0 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(abc|bac)/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(abc|(c|dc))/I +Capture group count = 2 +Starting code units: a c d +Last code unit = 'c' +Subject length lower bound = 1 + +/(abc|(d|de)c)/I +Capture group count = 2 +Starting code units: a d +Last code unit = 'c' +Subject length lower bound = 2 + +/a*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/a+/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/(baa|a+)/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'a' +Subject length lower bound = 1 + +/a{0,3}/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/baa{3,}/I +Capture group count = 0 +First code unit = 'b' +Last code unit = 'a' +Subject length lower bound = 5 + +/"([^\\"]+|\\.)*"/I +Capture group count = 1 +First code unit = '"' +Last code unit = '"' +Subject length lower bound = 2 + +/(abc|ab[cd])/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 3 + +/(a|.)/I +Capture group count = 1 +Subject length lower bound = 1 + +/a|ba|\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/abc(?=pqr)/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'r' +Subject length lower bound = 3 + +/...(?<=abc)/I +Capture group count = 0 +Max lookbehind = 3 +Subject length lower bound = 3 + +/abc(?!pqr)/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/ab./I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/ab[xyz]/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/abc*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/ab.c*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/a.c*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 2 + +/.c*/I +Capture group count = 0 +Subject length lower bound = 1 + +/ac*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/(a.c*|b.c*)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/a.c*|aba/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 2 + +/.+a/I +Capture group count = 0 +Last code unit = 'a' +Subject length lower bound = 2 + +/(?=abcda)a.*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 2 + +/(?=a)a.*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/a(b)*/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/a\d*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/ab\d*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/a(\d)*/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/abcde{0,0}/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + +/ab\d+/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/a(?(1)b)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/a(?(1)bag|big)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'g' +Subject length lower bound = 5 + +/a(?(1)bag|big)*(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/a(?(1)bag|big)+(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'g' +Subject length lower bound = 5 + +/a(?(1)b..|b..)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 5 + +/ab\d{0}e/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 3 + +/a?b?/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + a + 0: a + b + 0: b + ab + 0: ab + \ + 0: +\= Expect no match + \=notempty +No match + +/|-/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + abcd + 0: + -abc + 0: + ab-c\=notempty + 0: - +\= Expect no match + abc\=notempty +No match + +/^.?abcd/I +Capture group count = 0 +Compile options: +Overall options: anchored +Last code unit = 'd' +Subject length lower bound = 4 + +/\( # ( at start + (?: # Non-capturing bracket + (?>[^()]+) # Either a sequence of non-brackets (no backtracking) + | # Or + (?R) # Recurse - i.e. nested bracketed string + )* # Zero or more contents + \) # Closing ) + /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (abcd) + 0: (abcd) + (abcd)xyz + 0: (abcd) + xyz(abcd) + 0: (abcd) + (ab(xy)cd)pqr + 0: (ab(xy)cd) + (ab(xycd)pqr + 0: (xycd) + () abc () + 0: () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + 0: (abcde(fsh)xyz(foo(bar))lmno) +\= Expect no match + abcd +No match + abcd) +No match + (abcd +No match + +/\( ( (?>[^()]+) | (?R) )* \) /Igx +Capture group count = 1 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd)pqr + 0: (ab(xy)cd) + 1: cd + 1(abcd)(x(y)z)pqr + 0: (abcd) + 1: abcd + 0: (x(y)z) + 1: z + +/\( (?: (?>[^()]+) | (?R) ) \) /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 3 + (abcd) + 0: (abcd) + (ab(xy)cd) + 0: (xy) + (a(b(c)d)e) + 0: (c) + ((ab)) + 0: ((ab)) +\= Expect no match + () +No match + +/\( (?: (?>[^()]+) | (?R) )? \) /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + () + 0: () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + 0: (fsh) + +/\( ( (?>[^()]+) | (?R) )* \) /Ix +Capture group count = 1 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: cd + +/\( ( ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 2 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: cd + +/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: + 2: ab(xy)cd + 3: cd + (123ab(xy)cd) + 0: (123ab(xy)cd) + 1: 123 + 2: ab(xy)cd + 3: cd + +/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: + 3: cd + (123ab(xy)cd) + 0: (123ab(xy)cd) + 1: 123ab(xy)cd + 2: 123 + 3: cd + +/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /Ix +Capture group count = 11 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: ab(xy)cd + 3: ab(xy)cd + 4: ab(xy)cd + 5: ab(xy)cd + 6: ab(xy)cd + 7: ab(xy)cd + 8: ab(xy)cd + 9: ab(xy)cd +10: ab(xy)cd +11: cd + +/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (abcd(xyz

qrs)123) + 0: (abcd(xyz

qrs)123) + 1: abcd(xyz

qrs)123 + 2: 123 + +/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(cd)ef) + 0: (ab(cd)ef) + 1: ab(cd)ef + 2: ef + 3: (cd) + (ab(cd(ef)gh)ij) + 0: (ab(cd(ef)gh)ij) + 1: ab(cd(ef)gh)ij + 2: ij + 3: (cd(ef)gh) + +/^[[:alnum:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^alnum:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-/:-@[-`{-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 + \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 + \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 + \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 + \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:alpha:]]/IB +------------------------------------------------------------------ + Bra + ^ + [A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^alpha:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-@[-`{-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/[_[:alpha:]]/I +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:ascii:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ + \x7f +Subject length lower bound = 1 + +/^[[:^ascii:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a + \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 + \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 + \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 + \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 + \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 + \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 + \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 + \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x09 ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x09 \x20 +Subject length lower bound = 1 + +/^[[:^blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x08\x0a-\x1f!-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\n\x0b\x0c\x0d[:blank:]]/I +Capture group count = 0 +Contains explicit CR or LF match +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/^[[:cntrl:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x1f\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x7f +Subject length lower bound = 1 + +/^[[:digit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + +/^[[:graph:]]/IB +------------------------------------------------------------------ + Bra + ^ + [!-~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : + ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Subject length lower bound = 1 + +/^[[:lower:]]/IB +------------------------------------------------------------------ + Bra + ^ + [a-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:print:]]/IB +------------------------------------------------------------------ + Bra + ^ + [ -~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Subject length lower bound = 1 + +/^[[:punct:]]/IB +------------------------------------------------------------------ + Bra + ^ + [!-/:-@[-`{-~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ + _ ` { | } ~ +Subject length lower bound = 1 + +/^[[:space:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/^[[:upper:]]/IB +------------------------------------------------------------------ + Bra + ^ + [A-Z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +Subject length lower bound = 1 + +/^[[:xdigit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Fa-f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f +Subject length lower bound = 1 + +/^[[:word:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Z_a-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^cntrl:]]/IB +------------------------------------------------------------------ + Bra + ^ + [ -~\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[12[:^digit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-/12:-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; < + = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a + b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:^blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x08\x0a-\x1f!-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[01[:alpha:]%]/IB +------------------------------------------------------------------ + Bra + [%01A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W + X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/[[.ch.]]/I +Failed: error 113 at offset 1: POSIX collating elements are not supported + +/[[=ch=]]/I +Failed: error 113 at offset 1: POSIX collating elements are not supported + +/[[:rhubarb:]]/I +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:upper:]]/Ii +Capture group count = 0 +Options: caseless +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + A + 0: A + a + 0: a + +/[[:lower:]]/Ii +Capture group count = 0 +Options: caseless +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + A + 0: A + a + 0: a + +/((?-i)[[:lower:]])[[:lower:]]/Ii +Capture group count = 1 +Options: caseless +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 2 + ab + 0: ab + 1: a + aB + 0: aB + 1: a +\= Expect no match + Ab +No match + AB +No match + +/[\200-\110]/I +Failed: error 108 at offset 9: range out of order in character class + +/^(?(0)f|b)oo/I +Failed: error 115 at offset 5: reference to non-existent subpattern + +# This one's here because of the large output vector needed + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I +Capture group count = 271 +Max back reference = 270 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 + 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 + 9: 9 +10: 10 +11: 11 +12: 12 +13: 13 +14: 14 +15: 15 +16: 16 +17: 17 +18: 18 +19: 19 +20: 20 +21: 21 +22: 22 +23: 23 +24: 24 +25: 25 +26: 26 +27: 27 +28: 28 +29: 29 +30: 30 +31: 31 +32: 32 +33: 33 +34: 34 +35: 35 +36: 36 +37: 37 +38: 38 +39: 39 +40: 40 +41: 41 +42: 42 +43: 43 +44: 44 +45: 45 +46: 46 +47: 47 +48: 48 +49: 49 +50: 50 +51: 51 +52: 52 +53: 53 +54: 54 +55: 55 +56: 56 +57: 57 +58: 58 +59: 59 +60: 60 +61: 61 +62: 62 +63: 63 +64: 64 +65: 65 +66: 66 +67: 67 +68: 68 +69: 69 +70: 70 +71: 71 +72: 72 +73: 73 +74: 74 +75: 75 +76: 76 +77: 77 +78: 78 +79: 79 +80: 80 +81: 81 +82: 82 +83: 83 +84: 84 +85: 85 +86: 86 +87: 87 +88: 88 +89: 89 +90: 90 +91: 91 +92: 92 +93: 93 +94: 94 +95: 95 +96: 96 +97: 97 +98: 98 +99: 99 +100: 100 +101: 101 +102: 102 +103: 103 +104: 104 +105: 105 +106: 106 +107: 107 +108: 108 +109: 109 +110: 110 +111: 111 +112: 112 +113: 113 +114: 114 +115: 115 +116: 116 +117: 117 +118: 118 +119: 119 +120: 120 +121: 121 +122: 122 +123: 123 +124: 124 +125: 125 +126: 126 +127: 127 +128: 128 +129: 129 +130: 130 +131: 131 +132: 132 +133: 133 +134: 134 +135: 135 +136: 136 +137: 137 +138: 138 +139: 139 +140: 140 +141: 141 +142: 142 +143: 143 +144: 144 +145: 145 +146: 146 +147: 147 +148: 148 +149: 149 +150: 150 +151: 151 +152: 152 +153: 153 +154: 154 +155: 155 +156: 156 +157: 157 +158: 158 +159: 159 +160: 160 +161: 161 +162: 162 +163: 163 +164: 164 +165: 165 +166: 166 +167: 167 +168: 168 +169: 169 +170: 170 +171: 171 +172: 172 +173: 173 +174: 174 +175: 175 +176: 176 +177: 177 +178: 178 +179: 179 +180: 180 +181: 181 +182: 182 +183: 183 +184: 184 +185: 185 +186: 186 +187: 187 +188: 188 +189: 189 +190: 190 +191: 191 +192: 192 +193: 193 +194: 194 +195: 195 +196: 196 +197: 197 +198: 198 +199: 199 +200: 200 +201: 201 +202: 202 +203: 203 +204: 204 +205: 205 +206: 206 +207: 207 +208: 208 +209: 209 +210: 210 +211: 211 +212: 212 +213: 213 +214: 214 +215: 215 +216: 216 +217: 217 +218: 218 +219: 219 +220: 220 +221: 221 +222: 222 +223: 223 +224: 224 +225: 225 +226: 226 +227: 227 +228: 228 +229: 229 +230: 230 +231: 231 +232: 232 +233: 233 +234: 234 +235: 235 +236: 236 +237: 237 +238: 238 +239: 239 +240: 240 +241: 241 +242: 242 +243: 243 +244: 244 +245: 245 +246: 246 +247: 247 +248: 248 +249: 249 +250: 250 +251: 251 +252: 252 +253: 253 +254: 254 +255: 255 +256: 256 +257: 257 +258: 258 +259: 259 +260: 260 +261: 261 +262: 262 +263: 263 +264: 264 +265: 265 +266: 266 +267: 267 +268: 268 +269: 269 +270: ABC +271: ABC + +# This one's here because Perl does this differently and PCRE2 can't at present + +/(main(O)?)+/I +Capture group count = 2 +First code unit = 'm' +Last code unit = 'n' +Subject length lower bound = 4 + mainmain + 0: mainmain + 1: main + mainOmain + 0: mainOmain + 1: main + 2: O + +# These are all cases where Perl does it differently (nested captures) + +/^(a(b)?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 1 + aba + 0: aba + 1: a + 2: b + +/^(aa(bb)?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(aa|aa(bb))+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(aa(bb)??)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(?:aa(bb)?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + +/^(aa(b(b))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + 3: b + +/^(?:aa(b(b))?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + 2: b + +/^(?:aa(b(?:b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + +/^(?:aa(bb(?:b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bbb + +/^(?:aa(b(?:bb))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bbb + +/^(?:aa(?:b(b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: b + +/^(?:aa(?:b(bb))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bb + +/^(aa(b(bb))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: aa + 2: bbb + 3: bb + +/^(aa(bb(bb))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbbaa + 0: aabbbbaa + 1: aa + 2: bbbb + 3: bb + +# ---------------- + +/#/IBx +------------------------------------------------------------------ + Bra + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/IBx +------------------------------------------------------------------ + Bra + a + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/[\s]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[\S]/IB +------------------------------------------------------------------ + Bra + [\x00-\x08\x0e-\x1f!-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/a(?i)b/IB +------------------------------------------------------------------ + Bra + a + /i b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' (caseless) +Subject length lower bound = 2 + ab + 0: ab + aB + 0: aB +\= Expect no match + AB +No match + +/(a(?i)b)/IB +------------------------------------------------------------------ + Bra + CBra 1 + a + /i b + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' (caseless) +Subject length lower bound = 2 + ab + 0: ab + 1: ab + aB + 0: aB + 1: aB +\= Expect no match + AB +No match + +/ (?i)abc/IBx +------------------------------------------------------------------ + Bra + /i abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/#this is a comment + (?i)abc/IBx +------------------------------------------------------------------ + Bra + /i abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB +------------------------------------------------------------------ + Bra + 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = '1' +Last code unit = '0' +Subject length lower bound = 300 + +/\Q123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB +------------------------------------------------------------------ + Bra + 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = '1' +Last code unit = '0' +Subject length lower bound = 300 + +/\Q\E/IB +------------------------------------------------------------------ + Bra + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + \ + 0: + +/\Q\Ex/IB +------------------------------------------------------------------ + Bra + x + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Subject length lower bound = 1 + +/ \Q\E/IB +------------------------------------------------------------------ + Bra + + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = ' ' +Subject length lower bound = 1 + +/a\Q\E/IB +------------------------------------------------------------------ + Bra + a + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + abc + 0: a + bca + 0: a + bac + 0: a + +/a\Q\Eb/IB +------------------------------------------------------------------ + Bra + ab + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + abc + 0: ab + +/\Q\Eabc/IB +------------------------------------------------------------------ + Bra + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/x*+\w/IB +------------------------------------------------------------------ + Bra + x*+ + \w + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 +\= Expect no match + xxxxx +No match + +/x?+/IB +------------------------------------------------------------------ + Bra + x?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/x++/IB +------------------------------------------------------------------ + Bra + x++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Subject length lower bound = 1 + +/x{1,3}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + x + x{0,2}+ + Ket + End +------------------------------------------------------------------ + +/x{1,3}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i x + /i x{0,2}+ + Ket + End +------------------------------------------------------------------ + +/[^x]{1,3}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [^x] + [^x]{0,2}+ + Ket + End +------------------------------------------------------------------ + +/[^x]{1,3}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i [^x] + /i [^x]{0,2}+ + Ket + End +------------------------------------------------------------------ + +/(x)*+/IB +------------------------------------------------------------------ + Bra + Braposzero + CBraPos 1 + x + KetRpos + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + +/^(\w++|\s++)*$/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + now is the time for all good men to come to the aid of the party + 0: now is the time for all good men to come to the aid of the party + 1: party +\= Expect no match + this is not a line with only words and spaces! +No match + +/(\d++)(\w)/I +Capture group count = 2 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 2 + 12345a + 0: 12345a + 1: 12345 + 2: a +\= Expect no match + 12345+ +No match + +/a++b/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + +/(a++b)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + 1: aaab + +/(a++)b/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + 1: aaa + +/([^()]++|\([^()]*\))+/I +Capture group count = 1 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( * + , - . / 0 1 2 3 4 5 + 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: x + +/\(([^()]++|\([^()]+\))+\)/I +Capture group count = 1 +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 3 + (abc) + 0: (abc) + 1: abc + (abc(def)xyz) + 0: (abc(def)xyz) + 1: xyz +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(abc){1,3}+/IB +------------------------------------------------------------------ + Bra + Once + CBra 1 + abc + Ket + Brazero + Bra + CBra 1 + abc + Ket + Brazero + CBra 1 + abc + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/a+?+/I +Failed: error 109 at offset 3: quantifier does not follow a repeatable item + +/a{2,3}?+b/I +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/(?U)a+?+/I +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/a{2,3}?+b/I,ungreedy +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/x(?U)a++b/IB +------------------------------------------------------------------ + Bra + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + xaaaab + 0: xaaaab + +/(?U)xa++b/IB +------------------------------------------------------------------ + Bra + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + xaaaab + 0: xaaaab + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/IB +------------------------------------------------------------------ + Bra + ^ + CBra 1 + CBra 2 + a+ + Ket + CBra 3 + [ab]+? + Ket + CBra 4 + [bc]+ + Ket + CBra 5 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 5 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/^x(?U)a+b/IB +------------------------------------------------------------------ + Bra + ^ + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + +/^x(?U)(a+)b/IB +------------------------------------------------------------------ + Bra + ^ + x + CBra 1 + a+? + Ket + b + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + +/[.x.]/I +Failed: error 113 at offset 0: POSIX collating elements are not supported + +/[=x=]/I +Failed: error 113 at offset 0: POSIX collating elements are not supported + +/[:x:]/I +Failed: error 112 at offset 0: POSIX named classes are supported only within a class + +/\F/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\l/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\L/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{name}/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\u/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\U/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{4}/ + abcdefg + 0: abcd + +/\N{,}/ +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{25,ab}/ +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/a{1,3}b/ungreedy + ab + 0: ab + +/[/I +Failed: error 106 at offset 1: missing terminating ] for character class + +/[a-/I +Failed: error 106 at offset 3: missing terminating ] for character class + +/[[:space:]/I +Failed: error 106 at offset 10: missing terminating ] for character class + +/[\s]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[[:space:]]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[[:space:]abcde]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d a-e] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b c d e +Subject length lower bound = 1 + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix +Capture group count = 0 +Options: extended +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + <> + 0: <> + + 0: + hij> + 0: hij> + hij> + 0: + def> + 0: def> + + 0: <> +\= Expect no match + iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB +------------------------------------------------------------------ + Bra + 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X + \b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Max lookbehind = 1 +First code unit = '8' +Last code unit = 'X' +Subject length lower bound = 409 + +/\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB +------------------------------------------------------------------ + Bra + $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X + \b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Max lookbehind = 1 +First code unit = '$' +Last code unit = 'X' +Subject length lower bound = 404 + +/(.*)\d+\1/I +Capture group count = 1 +Max back reference = 1 +Subject length lower bound = 1 + +/(.*)\d+/I +Capture group count = 1 +First code unit at start or follows newline +Subject length lower bound = 1 + +/(.*)\d+\1/Is +Capture group count = 1 +Max back reference = 1 +Options: dotall +Subject length lower bound = 1 + +/(.*)\d+/Is +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/(.*(xyz))\d+\2/I +Capture group count = 2 +Max back reference = 2 +First code unit at start or follows newline +Last code unit = 'z' +Subject length lower bound = 7 + +/((.*))\d+\1/I +Capture group count = 2 +Max back reference = 1 +Subject length lower bound = 1 + abc123bc + 0: bc123bc + 1: bc + 2: bc + +/a[b]/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?=a).*/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(?=abc).xyz/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Last code unit = 'z' (caseless) +Subject length lower bound = 4 + +/(?=abc)(?i).xyz/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'z' (caseless) +Subject length lower bound = 4 + +/(?=a)(?=b)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(?=.)a/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=abcda)a)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 2 + +/((?=abcda)ab)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/()a/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/(?:(?=.)|(?abcdef + 0 ^ ^ d + 0: abcdef + 1234abcdef +--->1234abcdef + 0 ^ ^ d + 0: abcdef +\= Expect no match + abcxyz +No match + abcxyzf +--->abcxyzf + 0 ^ ^ d +No match + +/abc(?C)de(?C1)f/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + 123abcdef +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f + 0: abcdef + +/(?C1)\dabc(?C2)def/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'f' +Subject length lower bound = 7 + 1234abcdef +--->1234abcdef + 1 ^ \d + 1 ^ \d + 1 ^ \d + 1 ^ \d + 2 ^ ^ d + 0: 4abcdef +\= Expect no match + abcdef +No match + +/(?C1)\dabc(?C2)def/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'f' +Subject length lower bound = 7 + 1234abcdef +--->1234abcdef + 1 ^ \d + 1 ^ \d + 1 ^ \d + 1 ^ \d + 2 ^ ^ d + 0: 4abcdef +\= Expect no match + abcdef +No match + +/(?C255)ab/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?C256)ab/I +Failed: error 138 at offset 6: number after (?C is greater than 255 + +/(?Cab)xx/I +Failed: error 182 at offset 3: unrecognized string delimiter follows (?C + +/(?C12vr)x/I +Failed: error 139 at offset 5: closing parenthesis for (?C expected + +/abc(?C)def/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + \x83\x0\x61bcdef +--->\x83\x00abcdef + 0 ^ ^ d + 0: abcdef + +/(abc)(?C)de(?C1)f/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + 123abcdef +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f + 0: abcdef + 1: abc + 123abcdef\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->123abcdef + ^ ^ d +Callout 1: last capture = 1 + 1: abc +--->123abcdef + ^ ^ f + 0: abcdef + 1: abc + 123abcdefC-\=callout_none + 0: abcdef + 1: abc +\= Expect no match + 123abcdef\=callout_fail=1 +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f +No match + +/(?C0)(abc(?C1))*/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + abcabcabc +--->abcabcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabcabc + 1: abc + abcabc\=callout_fail=1:4 +--->abcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabc + 1: abc + abcabcabc\=callout_fail=1:4 +--->abcabcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabc + 1: abc + +/(\d{3}(?C))*/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + 123\=callout_capture +Callout 0: last capture = 0 +--->123 + ^ ^ )* + 0: 123 + 1: 123 + 123456\=callout_capture +Callout 0: last capture = 0 +--->123456 + ^ ^ )* +Callout 0: last capture = 1 + 1: 123 +--->123456 + ^ ^ )* + 0: 123456 + 1: 456 + 123456789\=callout_capture +Callout 0: last capture = 0 +--->123456789 + ^ ^ )* +Callout 0: last capture = 1 + 1: 123 +--->123456789 + ^ ^ )* +Callout 0: last capture = 1 + 1: 456 +--->123456789 + ^ ^ )* + 0: 123456789 + 1: 789 + +/((xyz)(?C)p|(?C1)xyzabc)/I +Capture group count = 2 +First code unit = 'x' +Subject length lower bound = 4 + xyzabc\=callout_capture +Callout 0: last capture = 2 + 1: + 2: xyz +--->xyzabc + ^ ^ p +Callout 1: last capture = 0 +--->xyzabc + ^ x + 0: xyzabc + 1: xyzabc + +/(X)((xyz)(?C)p|(?C1)xyzabc)/I +Capture group count = 3 +First code unit = 'X' +Last code unit = 'x' +Subject length lower bound = 5 + Xxyzabc\=callout_capture +Callout 0: last capture = 3 + 1: X + 2: + 3: xyz +--->Xxyzabc + ^ ^ p +Callout 1: last capture = 1 + 1: X +--->Xxyzabc + ^^ x + 0: Xxyzabc + 1: X + 2: xyzabc + +/(?=(abc))(?C)abcdef/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + abcdef\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->abcdef + ^ a + 0: abcdef + 1: abc + +/(?!(abc)(?C1)d)(?C2)abcxyz/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 6 + abcxyz\=callout_capture +Callout 1: last capture = 1 + 1: abc +--->abcxyz + ^ ^ d +Callout 2: last capture = 0 +--->abcxyz + ^ a + 0: abcxyz + +/(?<=(abc)(?C))xyz/I +Capture group count = 1 +Max lookbehind = 3 +First code unit = 'x' +Last code unit = 'z' +Subject length lower bound = 3 + abcxyz\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->abcxyz + ^ ) + 0: xyz + 1: abc + +/a(b+)(c*)(?C1)/I +Capture group count = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 +\= Expect no match + abbbbbccc\=callout_data=1 +--->abbbbbccc + 1 ^ ^ End of pattern +Callout data = 1 +No match + +/a(b+?)(c*?)(?C1)/I +Capture group count = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 +\= Expect no match + abbbbbccc\=callout_data=1 +--->abbbbbccc + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 +No match + +/(?C)abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/(?C)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(?C)a|b/I +Capture group count = 0 +Starting code units: a b +Subject length lower bound = 1 + +/a|(b)(?C)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 1 + b +--->b + 0 ^^ End of pattern + 0: b + 1: b + +/x(ab|(bc|(de|(?R))))/I +Capture group count = 3 +First code unit = 'x' +Subject length lower bound = 3 + xab + 0: xab + 1: ab + xbc + 0: xbc + 1: bc + 2: bc + xde + 0: xde + 1: de + 2: de + 3: de + xxab + 0: xxab + 1: xab + 2: xab + 3: xab + xxxab + 0: xxxab + 1: xxab + 2: xxab + 3: xxab +\= Expect no match + xyab +No match + +/^([^()]|\((?1)*\))*$/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + abc + 0: abc + 1: c + a(b)c + 0: a(b)c + 1: c + a(b(c))d + 0: a(b(c))d + 1: d +\= Expect no match) + a(b(c)d +No match + +/^>abc>([^()]|\((?1)*\))* +Overall options: anchored +First code unit = '>' +Last code unit = '<' +Subject length lower bound = 10 + >abc>123abc>123abc>1(2)3abc>1(2)3abc>(1(2)3)abc>(1(2)3) +Overall options: anchored +Starting code units: ( - 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + 12 + 0: 12 + 1: 12 + (((2+2)*-3)-7) + 0: (((2+2)*-3)-7) + 1: (((2+2)*-3)-7) + 2: - + -12 + 0: -12 + 1: -12 +\= Expect no match + ((2+2)*-3)-7) +No match + +/^(x(y|(?1){2})z)/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'x' +Subject length lower bound = 3 + xyz + 0: xyz + 1: xyz + 2: y + xxyzxyzz + 0: xxyzxyzz + 1: xxyzxyzz + 2: xyzxyz +\= Expect no match + xxyzz +No match + xxyzxyzxyzz +No match + +/((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/Ix +Capture group count = 2 +Options: extended +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + <> + 0: <> + 1: <> + 2: <> + + 0: + 1: + 2: + hij> + 0: hij> + 1: hij> + 2: hij> + hij> + 0: + 1: + 2: + def> + 0: def> + 1: def> + 2: def> + + 0: <> + 1: <> + 2: <> +\= Expect no match + +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 9 + abcdefabc + 0: abcdefabc + 1: abc + +/^(a|b|c)=(?1)+/I +Capture group count = 1 +Compile options: +Overall options: anchored +Starting code units: a b c +Subject length lower bound = 2 + a=a + 0: a=a + 1: a + a=b + 0: a=b + 1: a + a=bc + 0: a=bc + 1: a + +/^(a|b|c)=((?1))+/I +Capture group count = 2 +Compile options: +Overall options: anchored +Starting code units: a b c +Subject length lower bound = 2 + a=a + 0: a=a + 1: a + 2: a + a=b + 0: a=b + 1: a + 2: b + a=bc + 0: a=bc + 1: a + 2: c + +/a(?Pb|c)d(?Pe)/IB +------------------------------------------------------------------ + Bra + a + CBra 1 + b + Alt + c + Ket + d + CBra 2 + e + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + longername2 2 + name1 1 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 4 + abde + 0: abde + 1: b + 2: e + acde + 0: acde + 1: c + 2: e + +/(?:a(?Pc(?Pd)))(?Pa)/IB +------------------------------------------------------------------ + Bra + Bra + a + CBra 1 + c + CBra 2 + d + Ket + Ket + Ket + CBra 3 + a + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 3 +Named capture groups: + a 3 + c 1 + d 2 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 4 + +/(?Pa)...(?P=a)bbb(?P>a)d/IB +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Any + Any + Any + \1 + bbb + Recurse + d + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +Named capture groups: + a 1 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 10 + +/^\W*(?:(?P(?P.)\W*(?P>one)\W*(?P=two)|)|(?P(?P.)\W*(?P>three)\W*(?P=four)|\W*.\W*))\W*$/Ii +Capture group count = 4 +Max back reference = 4 +Named capture groups: + four 4 + one 1 + three 3 + two 2 +May match empty string +Compile options: caseless +Overall options: anchored caseless +Subject length lower bound = 0 + 1221 + 0: 1221 + 1: 1221 + 2: 1 + Satan, oscillate my metallic sonatas! + 0: Satan, oscillate my metallic sonatas! + 1: + 2: + 3: Satan, oscillate my metallic sonatas + 4: S + A man, a plan, a canal: Panama! + 0: A man, a plan, a canal: Panama! + 1: + 2: + 3: A man, a plan, a canal: Panama + 4: A + Able was I ere I saw Elba. + 0: Able was I ere I saw Elba. + 1: + 2: + 3: Able was I ere I saw Elba + 4: A +\= Expect no match + The quick brown fox +No match + +/((?(R)a|b))\1(?1)?/I +Capture group count = 1 +Max back reference = 1 +Subject length lower bound = 2 + bb + 0: bb + 1: b + bbaa + 0: bba + 1: b + +/(.*)a/Is +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'a' +Subject length lower bound = 1 + +/(.*)a\1/Is +Capture group count = 1 +Max back reference = 1 +Options: dotall +Last code unit = 'a' +Subject length lower bound = 1 + +/(.*)a(b)\2/Is +Capture group count = 2 +Max back reference = 2 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'b' +Subject length lower bound = 3 + +/((.*)a|(.*)b)z/Is +Capture group count = 3 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|(.*)b)z\1/Is +Capture group count = 3 +Max back reference = 1 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 3 + +/((.*)a|(.*)b)z\2/Is +Capture group count = 3 +Max back reference = 2 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|(.*)b)z\3/Is +Capture group count = 3 +Max back reference = 3 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|^(.*)b)z\3/Is +Capture group count = 3 +Max back reference = 3 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is +Capture group count = 31 +May match empty string +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 0 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is +Capture group count = 31 +Max back reference = 31 +May match empty string +Options: dotall +Subject length lower bound = 0 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is +Capture group count = 32 +Max back reference = 32 +May match empty string +Options: dotall +Subject length lower bound = 0 + +/(a)(bc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + +/(?Pa)(bc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Bra + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Named capture groups: + one 1 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + 1: a + +/(a)(?Pbc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + Bra + a + Ket + CBra 1 + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Named capture groups: + named 1 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/(aaa(?C1)bbb|ab)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaabbb +--->aaabbb + 1 ^ ^ b + 0: aaabbb + 1: aaabbb + aaabbb\=callout_data=0 +--->aaabbb + 1 ^ ^ b + 0: aaabbb + 1: aaabbb + aaabbb\=callout_data=1 +--->aaabbb + 1 ^ ^ b +Callout data = 1 + 0: ab + 1: ab +\= Expect no match + aaabbb\=callout_data=-1 +--->aaabbb + 1 ^ ^ b +Callout data = -1 +No match + +/ab(?Pcd)ef(?Pgh)/I +Capture group count = 2 +Named capture groups: + one 1 + two 2 +First code unit = 'a' +Last code unit = 'h' +Subject length lower bound = 8 + abcdefgh + 0: abcdefgh + 1: cd + 2: gh + abcdefgh\=copy=1,get=two + 0: abcdefgh + 1: cd + 2: gh + 1C cd (2) + G gh (2) two (group 2) + abcdefgh\=copy=one,copy=two + 0: abcdefgh + 1: cd + 2: gh + C cd (2) one (group 1) + C gh (2) two (group 2) + abcdefgh\=copy=three + 0: abcdefgh + 1: cd + 2: gh +Number not found for group 'three' +Copy substring 'three' failed (-49): unknown substring + +/(?P)(?P)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + Tes 1 + Test 2 +May match empty string +Subject length lower bound = 0 + +/(?P)(?P)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + Tes 2 + Test 1 +May match empty string +Subject length lower bound = 0 + +/(?Pzz)(?Paa)/I +Capture group count = 2 +Named capture groups: + A 2 + Z 1 +First code unit = 'z' +Last code unit = 'a' +Subject length lower bound = 4 + zzaa\=copy=Z + 0: zzaa + 1: zz + 2: aa + C zz (2) Z (group 1) + zzaa\=copy=A + 0: zzaa + 1: zz + 2: aa + C aa (2) A (group 2) + +/(?Peks)(?Peccs)/I +Failed: error 143 at offset 16: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +/(?Pabc(?Pdef)(?Pxyz))/I +Failed: error 143 at offset 31: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +"\[((?P\d+)(,(?P>elem))*)\]"I +Capture group count = 3 +Named capture groups: + elem 2 +First code unit = '[' +Last code unit = ']' +Subject length lower bound = 3 + [10,20,30,5,5,4,4,2,43,23,4234] + 0: [10,20,30,5,5,4,4,2,43,23,4234] + 1: 10,20,30,5,5,4,4,2,43,23,4234 + 2: 10 + 3: ,4234 +\= Expect no match + [] +No match + +"\[((?P\d+)(,(?P>elem))*)?\]"I +Capture group count = 3 +Named capture groups: + elem 2 +First code unit = '[' +Last code unit = ']' +Subject length lower bound = 2 + [10,20,30,5,5,4,4,2,43,23,4234] + 0: [10,20,30,5,5,4,4,2,43,23,4234] + 1: 10,20,30,5,5,4,4,2,43,23,4234 + 2: 10 + 3: ,4234 + [] + 0: [] + +/(a(b(?2)c))?/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(b(?2)c))*/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + KetRmax + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(b(?2)c)){0,2}/IB +------------------------------------------------------------------ + Bra + Brazero + Bra + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/[ab]{1}+/B +------------------------------------------------------------------ + Bra + [ab] + Ket + End +------------------------------------------------------------------ + +/()(?1){1}/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse + Ket + End +------------------------------------------------------------------ + +/()(?1)/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse + Ket + End +------------------------------------------------------------------ + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii +Capture group count = 3 +Options: caseless +Last code unit = 'g' (caseless) +Subject length lower bound = 8 + Baby Bjorn Active Carrier - With free SHIPPING!! + 0: Baby Bjorn Active Carrier - With free SHIPPING!! + 1: Baby Bjorn Active Carrier - With free SHIPPING!! + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii +Capture group count = 3 +Options: caseless +Last code unit = 'g' (caseless) +Subject length lower bound = 8 + Baby Bjorn Active Carrier - With free SHIPPING!! + 0: Baby Bjorn Active Carrier - With free SHIPPING!! + 1: Baby Bjorn Active Carrier - With free SHIPPING!! + +/a*.*b/IB +------------------------------------------------------------------ + Bra + a* + Any* + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 1 + +/(a|b)*.?c/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + Alt + b + KetRmax + Any? + c + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Last code unit = 'c' +Subject length lower bound = 1 + +/abc(?C255)de(?C)f/IB +------------------------------------------------------------------ + Bra + abc + Callout 255 10 1 + de + Callout 0 16 1 + f + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + +/abcde/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + a + Callout 255 1 1 + b + Callout 255 2 1 + c + Callout 255 3 1 + d + Callout 255 4 1 + e + Callout 255 5 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abcde +--->abcde + +0 ^ a + +1 ^^ b + +2 ^ ^ c + +3 ^ ^ d + +4 ^ ^ e + +5 ^ ^ End of pattern + 0: abcde +\= Expect no match + abcdfe +--->abcdfe + +0 ^ a + +1 ^^ b + +2 ^ ^ c + +3 ^ ^ d + +4 ^ ^ e +No match + +/a*b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a*+ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 1 + ab +--->ab + +0 ^ a* + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a* + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab + aaaacb +--->aaaacb + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^^ b + +0 ^ a* + +2 ^ b + +3 ^^ End of pattern + 0: b + +/a*b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a*+ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 1 + ab +--->ab + +0 ^ a* + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a* + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab + aaaacb +--->aaaacb + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^^ b + +0 ^ a* + +2 ^ b + +3 ^^ End of pattern + 0: b + +/a+b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a++ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + ab +--->ab + +0 ^ a+ + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a+ + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab +\= Expect no match + aaaacb +--->aaaacb + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^^ b +No match + +/(abc|def)x/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 1 + a + Callout 255 2 1 + b + Callout 255 3 1 + c + Callout 255 4 1 + Alt + Callout 255 5 1 + d + Callout 255 6 1 + e + Callout 255 7 1 + f + Callout 255 8 1 + Ket + Callout 255 9 1 + x + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a d +Last code unit = 'x' +Subject length lower bound = 4 + abcx +--->abcx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: abcx + 1: abc + defx +--->defx + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: defx + 1: def +\= Expect no match + abcdefzx +--->abcdefzx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x + +5 ^ d + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x +No match + +/(abc|def)x/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 1 + a + Callout 255 2 1 + b + Callout 255 3 1 + c + Callout 255 4 1 + Alt + Callout 255 5 1 + d + Callout 255 6 1 + e + Callout 255 7 1 + f + Callout 255 8 1 + Ket + Callout 255 9 1 + x + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a d +Last code unit = 'x' +Subject length lower bound = 4 + abcx +--->abcx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: abcx + 1: abc + defx +--->defx + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: defx + 1: def +\= Expect no match + abcdefzx +--->abcdefzx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x + +5 ^ d + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x +No match + +/(ab|cd){3,4}/I,auto_callout +Capture group count = 1 +Options: auto_callout +Starting code units: a c +Subject length lower bound = 6 + ababab +--->ababab + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c ++12 ^ ^ End of pattern + 0: ababab + 1: ab + abcdabcd +--->abcdabcd + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdabcd + 1: cd + abcdcdcdcdcd +--->abcdcdcdcdcd + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdcdcd + 1: cd + +/([ab]{,}c|xy)/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 4 + [ab] + Callout 255 5 1 + { + Callout 255 6 1 + , + Callout 255 7 1 + } + Callout 255 8 1 + c + Callout 255 9 1 + Alt + Callout 255 10 1 + x + Callout 255 11 1 + y + Callout 255 12 1 + Ket + Callout 255 13 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Subject length lower bound = 2 +\= Expect no match + Note: that {,} does NOT introduce a quantifier +--->Note: that {,} does NOT introduce a quantifier + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x +No match + +/([ab]{,}c|xy)/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 4 + [ab] + Callout 255 5 1 + { + Callout 255 6 1 + , + Callout 255 7 1 + } + Callout 255 8 1 + c + Callout 255 9 1 + Alt + Callout 255 10 1 + x + Callout 255 11 1 + y + Callout 255 12 1 + Ket + Callout 255 13 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Subject length lower bound = 2 +\= Expect no match + Note: that {,} does NOT introduce a quantifier +--->Note: that {,} does NOT introduce a quantifier + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x +No match + +/([ab]{1,4}c|xy){4,5}?123/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + Braminzero + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + Callout 255 21 1 + 1 + Callout 255 22 1 + 2 + Callout 255 23 1 + 3 + Callout 255 24 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Last code unit = '3' +Subject length lower bound = 11 + aacaacaacaacaac123 +--->aacaacaacaacaac123 + +0 ^ ( + +1 ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | ++21 ^ ^ 1 + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | ++21 ^ ^ 1 ++22 ^ ^ 2 ++23 ^ ^ 3 ++24 ^ ^ End of pattern + 0: aacaacaacaacaac123 + 1: aac + +/\b.*/I +Capture group count = 0 +Max lookbehind = 1 +May match empty string +Subject length lower bound = 0 + ab cd\=offset=1 + 0: cd + +/\b.*/Is +Capture group count = 0 +Max lookbehind = 1 +May match empty string +Options: dotall +Subject length lower bound = 0 + ab cd\=startoffset=1 + 0: cd + +/(?!.bcd).*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + Xbcd12345 + 0: bcd12345 + +/abcde/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + ab\=ps +Partial match: ab + abc\=ps +Partial match: abc + abcd\=ps +Partial match: abcd + abcde\=ps + 0: abcde + the quick brown abc\=ps +Partial match: abc +\= Expect no match\=ps + the quick brown abxyz fox\=ps +No match + +"^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I +Capture group count = 3 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = '/' +Subject length lower bound = 6 + 13/05/04\=ps + 0: 13/05/04 + 1: 13 + 2: 05 + 13/5/2004\=ps + 0: 13/5/2004 + 1: 13 + 2: 5 + 3: 20 + 02/05/09\=ps + 0: 02/05/09 + 1: 02 + 2: 05 + 1\=ps +Partial match: 1 + 1/2\=ps +Partial match: 1/2 + 1/2/0\=ps +Partial match: 1/2/0 + 1/2/04\=ps + 0: 1/2/04 + 1: 1 + 2: 2 + 0\=ps +Partial match: 0 + 02/\=ps +Partial match: 02/ + 02/0\=ps +Partial match: 02/0 + 02/1\=ps +Partial match: 02/1 +\= Expect no match\=ps + \=ps +No match + 123\=ps +No match + 33/4/04\=ps +No match + 3/13/04\=ps +No match + 0/1/2003\=ps +No match + 0/\=ps +No match + 02/0/\=ps +No match + 02/13\=ps +No match + +/0{0,2}ABC/I +Capture group count = 0 +Starting code units: 0 A +Last code unit = 'C' +Subject length lower bound = 3 + +/\d{3,}ABC/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'C' +Subject length lower bound = 6 + +/\d*ABC/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A +Last code unit = 'C' +Subject length lower bound = 3 + +/[abc]+DE/I +Capture group count = 0 +Starting code units: a b c +Last code unit = 'E' +Subject length lower bound = 3 + +/[abc]?123/I +Capture group count = 0 +Starting code units: 1 a b c +Last code unit = '3' +Subject length lower bound = 3 + 123\=ps + 0: 123 + a\=ps +Partial match: a + b\=ps +Partial match: b + c\=ps +Partial match: c + c12\=ps +Partial match: c12 + c123\=ps + 0: c123 + +/^(?:\d){3,5}X/I +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'X' +Subject length lower bound = 4 + 1\=ps +Partial match: 1 + 123\=ps +Partial match: 123 + 123X + 0: 123X + 1234\=ps +Partial match: 1234 + 1234X + 0: 1234X + 12345\=ps +Partial match: 12345 + 12345X + 0: 12345X +\= Expect no match + 1X +No match + 123456\=ps +No match + +"<(\w+)/?>(.)*"Igms +Capture group count = 3 +Max back reference = 1 +Options: dotall multiline +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 7 + \n\n\nPartner der LCO\nde\nPartner der LINEAS Consulting\nGmbH\nLINEAS Consulting GmbH Hamburg\nPartnerfirmen\n30 days\nindex,follow\n\nja\n3\nPartner\n\n\nLCO\nLINEAS Consulting\n15.10.2003\n\n\n\n\nDie Partnerfirmen der LINEAS Consulting\nGmbH\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\=jitstack=1024 + 0: \x0a\x0aPartner der LCO\x0ade\x0aPartner der LINEAS Consulting\x0aGmbH\x0aLINEAS Consulting GmbH Hamburg\x0aPartnerfirmen\x0a30 days\x0aindex,follow\x0a\x0aja\x0a3\x0aPartner\x0a\x0a\x0aLCO\x0aLINEAS Consulting\x0a15.10.2003\x0a\x0a\x0a\x0a\x0aDie Partnerfirmen der LINEAS Consulting\x0aGmbH\x0a\x0a\x0a \x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a + 1: seite + 2: \x0a + 3: seite + +/line\nbreak/I +Capture group count = 0 +Contains explicit CR or LF match +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak + line one\nthis is a line\nbreak in the second line + 0: line\x0abreak + +/line\nbreak/I,firstline +Capture group count = 0 +Contains explicit CR or LF match +Options: firstline +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/line\nbreak/Im,firstline +Capture group count = 0 +Contains explicit CR or LF match +Options: firstline multiline +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/(?i)(?-i)AbCd/I +Capture group count = 0 +First code unit = 'A' +Last code unit = 'd' +Subject length lower bound = 4 + AbCd + 0: AbCd +\= Expect no match + abcd +No match + +/a{11111111111111111111}/I +Failed: error 105 at offset 8: number too big in {} quantifier + +/(){64294967295}/I +Failed: error 105 at offset 9: number too big in {} quantifier + +/(){2,4294967295}/I +Failed: error 105 at offset 11: number too big in {} quantifier + +"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' (caseless) +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: k + +"(?Pa)(?Pb)(?Pc)(?Pd)(?Pe)(?Pf)(?Pg)(?Ph)(?Pi)(?Pj)(?Pk)(?Pl)A\11B"I +Capture group count = 12 +Max back reference = 11 +Named capture groups: + n0 1 + n1 2 + n10 11 + n11 12 + n2 3 + n3 4 + n4 5 + n5 6 + n6 7 + n7 8 + n8 9 + n9 10 +First code unit = 'a' +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: l + +"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"I +Capture group count = 12 +Max back reference = 11 +First code unit = 'a' +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: l + +"(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)"I +Capture group count = 101 +Named capture groups: + name0 1 + name1 2 + name10 11 + name100 101 + name11 12 + name12 13 + name13 14 + name14 15 + name15 16 + name16 17 + name17 18 + name18 19 + name19 20 + name2 3 + name20 21 + name21 22 + name22 23 + name23 24 + name24 25 + name25 26 + name26 27 + name27 28 + name28 29 + name29 30 + name3 4 + name30 31 + name31 32 + name32 33 + name33 34 + name34 35 + name35 36 + name36 37 + name37 38 + name38 39 + name39 40 + name4 5 + name40 41 + name41 42 + name42 43 + name43 44 + name44 45 + name45 46 + name46 47 + name47 48 + name48 49 + name49 50 + name5 6 + name50 51 + name51 52 + name52 53 + name53 54 + name54 55 + name55 56 + name56 57 + name57 58 + name58 59 + name59 60 + name6 7 + name60 61 + name61 62 + name62 63 + name63 64 + name64 65 + name65 66 + name66 67 + name67 68 + name68 69 + name69 70 + name7 8 + name70 71 + name71 72 + name72 73 + name73 74 + name74 75 + name75 76 + name76 77 + name77 78 + name78 79 + name79 80 + name8 9 + name80 81 + name81 82 + name82 83 + name83 84 + name84 85 + name85 86 + name86 87 + name87 88 + name88 89 + name89 90 + name9 10 + name90 91 + name91 92 + name92 93 + name93 94 + name94 95 + name95 96 + name96 97 + name97 98 + name98 99 + name99 100 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 101 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"I +Capture group count = 101 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 101 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +/[^()]*(?:\((?R)\)[^()]*)*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + (this(and)that)stuff + 0: (this(and)that)stuff + +/[^()]*(?:\((?>(?R))\)[^()]*)*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + +/[^()]*(?:\((?R)\))*[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + +/(?:\((?R)\))*[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: + ((this)) + 0: ((this)) + +/(?:\((?R)\))|[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: + (this) + 0: (this) + ((this)) + 0: ((this)) + +/\x{0000ff}/I +Capture group count = 0 +First code unit = \xff +Subject length lower bound = 1 + +/^((?Pa1)|(?Pa2)b)/I +Failed: error 143 at offset 18: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +/^((?Pa1)|(?Pa2)b)/I,dupnames +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + a1b\=copy=A + 0: a1 + 1: a1 + 2: a1 + C a1 (2) A (non-unique) + a2b\=copy=A + 0: a2b + 1: a2b + 2: + 3: a2 + C a2 (2) A (non-unique) + a1b\=copy=Z,copy=A + 0: a1 + 1: a1 + 2: a1 +Number not found for group 'Z' +Copy substring 'Z' failed (-49): unknown substring + C a1 (2) A (non-unique) + +/(?|(?)(?)(?)|(?)(?)(?))/I,dupnames +Capture group count = 3 +Named capture groups: + a 1 + a 3 + b 2 +May match empty string +Options: dupnames +Subject length lower bound = 0 + +/^(?Pa)(?Pb)/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + ab\=copy=A + 0: ab + 1: a + 2: b + C a (1) A (non-unique) + +/^(?Pa)(?Pb)|cd/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + ab\=copy=A + 0: ab + 1: a + 2: b + C a (1) A (non-unique) + cd\=copy=A + 0: cd +Copy substring 'A' failed (-55): requested value is not set + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames +Capture group count = 4 +Named capture groups: + A 1 + A 2 + A 3 + A 4 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + cdefgh\=copy=A + 0: cdefgh + 1: + 2: + 3: ef + 4: gh + C ef (2) A (non-unique) + +/^((?Pa1)|(?Pa2)b)/I,dupnames +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + a1b\=get=A + 0: a1 + 1: a1 + 2: a1 + G a1 (2) A (non-unique) + a2b\=get=A + 0: a2b + 1: a2b + 2: + 3: a2 + G a2 (2) A (non-unique) + a1b\=get=Z,get=A + 0: a1 + 1: a1 + 2: a1 +Number not found for group 'Z' +Get substring 'Z' failed (-49): unknown substring + G a1 (2) A (non-unique) + +/^(?Pa)(?Pb)/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + ab\=get=A + 0: ab + 1: a + 2: b + G a (1) A (non-unique) + +/^(?Pa)(?Pb)|cd/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + ab\=get=A + 0: ab + 1: a + 2: b + G a (1) A (non-unique) + cd\=get=A + 0: cd +Get substring 'A' failed (-55): requested value is not set + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames +Capture group count = 4 +Named capture groups: + A 1 + A 2 + A 3 + A 4 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + cdefgh\=get=A + 0: cdefgh + 1: + 2: + 3: ef + 4: gh + G ef (2) A (non-unique) + +/(?J)^((?Pa1)|(?Pa2)b)/I +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: +Overall options: anchored +Duplicate name status changes +First code unit = 'a' +Subject length lower bound = 2 + a1b\=copy=A + 0: a1 + 1: a1 + 2: a1 + C a1 (2) A (non-unique) + a2b\=copy=A + 0: a2b + 1: a2b + 2: + 3: a2 + C a2 (2) A (non-unique) + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I +Failed: error 143 at offset 38: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I +Capture group count = 4 +Named capture groups: + A 1 + B 2 + B 3 + C 4 +Compile options: +Overall options: anchored +Duplicate name status changes +First code unit = 'a' +Subject length lower bound = 6 + a bc d\=copy=A,copy=B,copy=C + 0: a bc d + 1: a + 2: b + 3: c + 4: d + C a (1) A (group 1) + C b (1) B (non-unique) + C d (1) C (group 4) + +/^(?Pa)?(?(A)a|b)/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + A 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + aabc + 0: aa + 1: a + bc + 0: b +\= Expect no match + abc +No match + +/(?:(?(ZZ)a|b)(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 2 + bXaX + 0: bXaX + 1: X + +/(?:(?(2y)a|b)(X))+/I +Failed: error 124 at offset 7: missing closing parenthesis for condition + +/(?:(?(ZA)a|b)(?PX))+/I +Failed: error 115 at offset 6: reference to non-existent subpattern + +/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 3 + bbXaaX + 0: bbXaaX + 1: X + +/(?:(?(ZZ)a|\(b\))\\(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 3 + (b)\\Xa\\X + 0: (b)\Xa\X + 1: X + +/(?PX|Y))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + A 1 +Subject length lower bound = 2 + bXXaYYaY + 0: bXXaYYaY + 1: Y + bXYaXXaX + 0: bX + 1: X + +/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?PX|Y))+/I +Capture group count = 10 +Max back reference = 10 +Named capture groups: + A 10 +Subject length lower bound = 2 + bXXaYYaY + 0: bXXaYYaY + 1: + 2: + 3: + 4: + 5: + 6: + 7: + 8: + 9: +10: Y + +/\s*,\s*/I +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 , +Last code unit = ',' +Subject length lower bound = 1 + \x0b,\x0b + 0: \x0b,\x0b + \x0c,\x0d + 0: \x0c,\x0d + +/^abc/Im,newline=lf +Capture group count = 0 +Options: multiline +Forced newline is LF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\nabc + 0: abc + xyz\r\nabc + 0: abc +\= Expect no match + xyz\rabc +No match + xyzabc\r +No match + xyzabc\rpqr +No match + xyzabc\r\n +No match + xyzabc\r\npqr +No match + +/^abc/Im,newline=crlf +Capture group count = 0 +Options: multiline +Forced newline is CRLF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\r\nabclf> + 0: abc +\= Expect no match + xyz\nabclf +No match + xyz\rabclf +No match + +/^abc/Im,newline=cr +Capture group count = 0 +Options: multiline +Forced newline is CR +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\rabc + 0: abc +\= Expect no match + xyz\nabc +No match + xyz\r\nabc +No match + +/^abc/Im,newline=bad +** Invalid value in 'newline=bad' + +/.*/I,newline=lf +Capture group count = 0 +May match empty string +Forced newline is LF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d + +/.*/I,newline=cr +Capture group count = 0 +May match empty string +Forced newline is CR +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc + abc\r\ndef + 0: abc + +/.*/I,newline=crlf +Capture group count = 0 +May match empty string +Forced newline is CRLF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc + +/\w+(.)(.)?def/Is +Capture group count = 2 +Options: dotall +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = 'f' +Subject length lower bound = 5 + abc\ndef + 0: abc\x0adef + 1: \x0a + abc\rdef + 0: abc\x0ddef + 1: \x0d + abc\r\ndef + 0: abc\x0d\x0adef + 1: \x0d + 2: \x0a + +/(?P25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I +Capture group count = 1 +Named capture groups: + B 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = '.' +Subject length lower bound = 7 + +/()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + (.(.))/Ix +Capture group count = 102 +Options: extended +Subject length lower bound = 2 + XY\=ovector=133 + 0: XY + 1: + 2: + 3: + 4: + 5: + 6: + 7: + 8: + 9: +10: +11: +12: +13: +14: +15: +16: +17: +18: +19: +20: +21: +22: +23: +24: +25: +26: +27: +28: +29: +30: +31: +32: +33: +34: +35: +36: +37: +38: +39: +40: +41: +42: +43: +44: +45: +46: +47: +48: +49: +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +70: +71: +72: +73: +74: +75: +76: +77: +78: +79: +80: +81: +82: +83: +84: +85: +86: +87: +88: +89: +90: +91: +92: +93: +94: +95: +96: +97: +98: +99: +100: +101: XY +102: Y + +/(a*b|(?i:c*(?-i)d))/I +Capture group count = 1 +Starting code units: C a b c d +Subject length lower bound = 1 + +/()[ab]xyz/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'z' +Subject length lower bound = 4 + +/(|)[ab]xyz/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'z' +Subject length lower bound = 4 + +/(|c)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c +Last code unit = 'z' +Subject length lower bound = 4 + +/(|c?)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c +Last code unit = 'z' +Subject length lower bound = 4 + +/(d?|c?)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c d +Last code unit = 'z' +Subject length lower bound = 4 + +/(d?|c)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c d +Last code unit = 'z' +Subject length lower bound = 4 + +/^a*b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a*+b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a*?b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a+A\d/IB +------------------------------------------------------------------ + Bra + ^ + a++ + A + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Last code unit = 'A' +Subject length lower bound = 3 + aaaA5 + 0: aaaA5 +\= Expect no match + aaaa5 +No match + +/^a*A\d/IBi +------------------------------------------------------------------ + Bra + ^ + /i a* + /i A + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: caseless +Overall options: anchored caseless +Starting code units: A a +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aaaA5 + 0: aaaA5 + aaaa5 + 0: aaaa5 + a5 + 0: a5 + +/(a*|b*)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a+|b*)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a*|b+)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a+|b+)[cd]/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + ((( + a + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + ))) +/Ix +Capture group count = 203 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + large nest +Matched, but too many substrings + 0: a + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +/a*\d/B +------------------------------------------------------------------ + Bra + a*+ + \d + Ket + End +------------------------------------------------------------------ + +/a*\D/B +------------------------------------------------------------------ + Bra + a* + \D + Ket + End +------------------------------------------------------------------ + +/0*\d/B +------------------------------------------------------------------ + Bra + 0* + \d + Ket + End +------------------------------------------------------------------ + +/0*\D/B +------------------------------------------------------------------ + Bra + 0*+ + \D + Ket + End +------------------------------------------------------------------ + +/a*\s/B +------------------------------------------------------------------ + Bra + a*+ + \s + Ket + End +------------------------------------------------------------------ + +/a*\S/B +------------------------------------------------------------------ + Bra + a* + \S + Ket + End +------------------------------------------------------------------ + +/ *\s/B +------------------------------------------------------------------ + Bra + * + \s + Ket + End +------------------------------------------------------------------ + +/ *\S/B +------------------------------------------------------------------ + Bra + *+ + \S + Ket + End +------------------------------------------------------------------ + +/a*\w/B +------------------------------------------------------------------ + Bra + a* + \w + Ket + End +------------------------------------------------------------------ + +/a*\W/B +------------------------------------------------------------------ + Bra + a*+ + \W + Ket + End +------------------------------------------------------------------ + +/=*\w/B +------------------------------------------------------------------ + Bra + =*+ + \w + Ket + End +------------------------------------------------------------------ + +/=*\W/B +------------------------------------------------------------------ + Bra + =* + \W + Ket + End +------------------------------------------------------------------ + +/\d*a/B +------------------------------------------------------------------ + Bra + \d*+ + a + Ket + End +------------------------------------------------------------------ + +/\d*2/B +------------------------------------------------------------------ + Bra + \d* + 2 + Ket + End +------------------------------------------------------------------ + +/\d*\d/B +------------------------------------------------------------------ + Bra + \d* + \d + Ket + End +------------------------------------------------------------------ + +/\d*\D/B +------------------------------------------------------------------ + Bra + \d*+ + \D + Ket + End +------------------------------------------------------------------ + +/\d*\s/B +------------------------------------------------------------------ + Bra + \d*+ + \s + Ket + End +------------------------------------------------------------------ + +/\d*\S/B +------------------------------------------------------------------ + Bra + \d* + \S + Ket + End +------------------------------------------------------------------ + +/\d*\w/B +------------------------------------------------------------------ + Bra + \d* + \w + Ket + End +------------------------------------------------------------------ + +/\d*\W/B +------------------------------------------------------------------ + Bra + \d*+ + \W + Ket + End +------------------------------------------------------------------ + +/\D*a/B +------------------------------------------------------------------ + Bra + \D* + a + Ket + End +------------------------------------------------------------------ + +/\D*2/B +------------------------------------------------------------------ + Bra + \D*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\D*\d/B +------------------------------------------------------------------ + Bra + \D*+ + \d + Ket + End +------------------------------------------------------------------ + +/\D*\D/B +------------------------------------------------------------------ + Bra + \D* + \D + Ket + End +------------------------------------------------------------------ + +/\D*\s/B +------------------------------------------------------------------ + Bra + \D* + \s + Ket + End +------------------------------------------------------------------ + +/\D*\S/B +------------------------------------------------------------------ + Bra + \D* + \S + Ket + End +------------------------------------------------------------------ + +/\D*\w/B +------------------------------------------------------------------ + Bra + \D* + \w + Ket + End +------------------------------------------------------------------ + +/\D*\W/B +------------------------------------------------------------------ + Bra + \D* + \W + Ket + End +------------------------------------------------------------------ + +/\s*a/B +------------------------------------------------------------------ + Bra + \s*+ + a + Ket + End +------------------------------------------------------------------ + +/\s*2/B +------------------------------------------------------------------ + Bra + \s*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\s*\d/B +------------------------------------------------------------------ + Bra + \s*+ + \d + Ket + End +------------------------------------------------------------------ + +/\s*\D/B +------------------------------------------------------------------ + Bra + \s* + \D + Ket + End +------------------------------------------------------------------ + +/\s*\s/B +------------------------------------------------------------------ + Bra + \s* + \s + Ket + End +------------------------------------------------------------------ + +/\s*\S/B +------------------------------------------------------------------ + Bra + \s*+ + \S + Ket + End +------------------------------------------------------------------ + +/\s*\w/B +------------------------------------------------------------------ + Bra + \s*+ + \w + Ket + End +------------------------------------------------------------------ + +/\s*\W/B +------------------------------------------------------------------ + Bra + \s* + \W + Ket + End +------------------------------------------------------------------ + +/\S*a/B +------------------------------------------------------------------ + Bra + \S* + a + Ket + End +------------------------------------------------------------------ + +/\S*2/B +------------------------------------------------------------------ + Bra + \S* + 2 + Ket + End +------------------------------------------------------------------ + +/\S*\d/B +------------------------------------------------------------------ + Bra + \S* + \d + Ket + End +------------------------------------------------------------------ + +/\S*\D/B +------------------------------------------------------------------ + Bra + \S* + \D + Ket + End +------------------------------------------------------------------ + +/\S*\s/B +------------------------------------------------------------------ + Bra + \S*+ + \s + Ket + End +------------------------------------------------------------------ + +/\S*\S/B +------------------------------------------------------------------ + Bra + \S* + \S + Ket + End +------------------------------------------------------------------ + +/\S*\w/B +------------------------------------------------------------------ + Bra + \S* + \w + Ket + End +------------------------------------------------------------------ + +/\S*\W/B +------------------------------------------------------------------ + Bra + \S* + \W + Ket + End +------------------------------------------------------------------ + +/\w*a/B +------------------------------------------------------------------ + Bra + \w* + a + Ket + End +------------------------------------------------------------------ + +/\w*2/B +------------------------------------------------------------------ + Bra + \w* + 2 + Ket + End +------------------------------------------------------------------ + +/\w*\d/B +------------------------------------------------------------------ + Bra + \w* + \d + Ket + End +------------------------------------------------------------------ + +/\w*\D/B +------------------------------------------------------------------ + Bra + \w* + \D + Ket + End +------------------------------------------------------------------ + +/\w*\s/B +------------------------------------------------------------------ + Bra + \w*+ + \s + Ket + End +------------------------------------------------------------------ + +/\w*\S/B +------------------------------------------------------------------ + Bra + \w* + \S + Ket + End +------------------------------------------------------------------ + +/\w*\w/B +------------------------------------------------------------------ + Bra + \w* + \w + Ket + End +------------------------------------------------------------------ + +/\w*\W/B +------------------------------------------------------------------ + Bra + \w*+ + \W + Ket + End +------------------------------------------------------------------ + +/\W*a/B +------------------------------------------------------------------ + Bra + \W*+ + a + Ket + End +------------------------------------------------------------------ + +/\W*2/B +------------------------------------------------------------------ + Bra + \W*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\W*\d/B +------------------------------------------------------------------ + Bra + \W*+ + \d + Ket + End +------------------------------------------------------------------ + +/\W*\D/B +------------------------------------------------------------------ + Bra + \W* + \D + Ket + End +------------------------------------------------------------------ + +/\W*\s/B +------------------------------------------------------------------ + Bra + \W* + \s + Ket + End +------------------------------------------------------------------ + +/\W*\S/B +------------------------------------------------------------------ + Bra + \W* + \S + Ket + End +------------------------------------------------------------------ + +/\W*\w/B +------------------------------------------------------------------ + Bra + \W*+ + \w + Ket + End +------------------------------------------------------------------ + +/\W*\W/B +------------------------------------------------------------------ + Bra + \W* + \W + Ket + End +------------------------------------------------------------------ + +/[^a]+a/B +------------------------------------------------------------------ + Bra + [^a]++ + a + Ket + End +------------------------------------------------------------------ + +/[^a]+a/Bi +------------------------------------------------------------------ + Bra + /i [^a]++ + /i a + Ket + End +------------------------------------------------------------------ + +/[^a]+A/Bi +------------------------------------------------------------------ + Bra + /i [^a]++ + /i A + Ket + End +------------------------------------------------------------------ + +/[^a]+b/B +------------------------------------------------------------------ + Bra + [^a]+ + b + Ket + End +------------------------------------------------------------------ + +/[^a]+\d/B +------------------------------------------------------------------ + Bra + [^a]+ + \d + Ket + End +------------------------------------------------------------------ + +/a*[^a]/B +------------------------------------------------------------------ + Bra + a*+ + [^a] + Ket + End +------------------------------------------------------------------ + +/(?Px)(?Py)/I +Capture group count = 2 +Named capture groups: + abc 1 + xyz 2 +First code unit = 'x' +Last code unit = 'y' +Subject length lower bound = 2 + xy\=copy=abc,copy=xyz + 0: xy + 1: x + 2: y + C x (1) abc (group 1) + C y (1) xyz (group 2) + +/(?x)(?'xyz'y)/I +Capture group count = 2 +Named capture groups: + abc 1 + xyz 2 +First code unit = 'x' +Last code unit = 'y' +Subject length lower bound = 2 + xy\=copy=abc,copy=xyz + 0: xy + 1: x + 2: y + C x (1) abc (group 1) + C y (1) xyz (group 2) + +/(?x)(?'xyz>y)/I +Failed: error 142 at offset 15: syntax error in subpattern name (missing terminator?) + +/(?P'abc'x)(?Py)/I +Failed: error 141 at offset 3: unrecognized character after (?P + +/^(?:(?(ZZ)a|b)(?X))+/ + bXaX + 0: bXaX + 1: X + bXbX + 0: bX + 1: X +\= Expect no match + aXaX +No match + aXbX +No match + +/^(?P>abc)(?xxx)/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/^(?P>abc)(?x|y)/ + xx + 0: xx + 1: x + xy + 0: xy + 1: y + yy + 0: yy + 1: y + yx + 0: yx + 1: x + +/^(?P>abc)(?Px|y)/ + xx + 0: xx + 1: x + xy + 0: xy + 1: y + yy + 0: yy + 1: y + yx + 0: yx + 1: x + +/^((?(abc)a|b)(?x|y))+/ + bxay + 0: bxay + 1: ay + 2: y + bxby + 0: bx + 1: bx + 2: x +\= Expect no match + axby +No match + +/^(((?P=abc)|X)(?x|y))+/ + XxXxxx + 0: XxXxxx + 1: xx + 2: x + 3: x + XxXyyx + 0: XxXyyx + 1: yx + 2: y + 3: x + XxXyxx + 0: XxXy + 1: Xy + 2: X + 3: y +\= Expect no match + x +No match + +/^(?1)(abc)/ + abcabc + 0: abcabc + 1: abc + +/^(?:(?:\1|X)(a|b))+/ + Xaaa + 0: Xaaa + 1: a + Xaba + 0: Xa + 1: a + +/^[\E\Qa\E-\Qz\E]+/B +------------------------------------------------------------------ + Bra + ^ + [a-z]++ + Ket + End +------------------------------------------------------------------ + +/^[a\Q]bc\E]/B +------------------------------------------------------------------ + Bra + ^ + [\]a-c] + Ket + End +------------------------------------------------------------------ + +/^[a-\Q\E]/B +------------------------------------------------------------------ + Bra + ^ + [\-a] + Ket + End +------------------------------------------------------------------ + +/^(?P>abc)[()](?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + [()] + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^((?(abc)y)[()](?Px))+/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + Cond + 2 Cond ref + y + Ket + [()] + CBra 2 + x + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + (xy)x + 0: (xy)x + 1: y)x + 2: x + +/^(?P>abc)\Q()\E(?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + () + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?P>abc)[a\Q(]\E(](?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + [(\]a] + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?P>abc) # this is (a comment) + (?)/Bx +------------------------------------------------------------------ + Bra + ^ + Recurse + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^\W*(?:(?(?.)\W*(?&one)\W*\k|)|(?(?.)\W*(?&three)\W*\k'four'|\W*.\W*))\W*$/Ii +Capture group count = 4 +Max back reference = 4 +Named capture groups: + four 4 + one 1 + three 3 + two 2 +May match empty string +Compile options: caseless +Overall options: anchored caseless +Subject length lower bound = 0 + 1221 + 0: 1221 + 1: 1221 + 2: 1 + Satan, oscillate my metallic sonatas! + 0: Satan, oscillate my metallic sonatas! + 1: + 2: + 3: Satan, oscillate my metallic sonatas + 4: S + A man, a plan, a canal: Panama! + 0: A man, a plan, a canal: Panama! + 1: + 2: + 3: A man, a plan, a canal: Panama + 4: A + Able was I ere I saw Elba. + 0: Able was I ere I saw Elba. + 1: + 2: + 3: Able was I ere I saw Elba + 4: A +\= Expect no match + The quick brown fox +No match + +/(?=(\w+))\1:/I +Capture group count = 1 +Max back reference = 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = ':' +Subject length lower bound = 2 + abcd: + 0: abcd: + 1: abcd + +/(?=(?'abc'\w+))\k:/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + abc 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = ':' +Subject length lower bound = 2 + abcd: + 0: abcd: + 1: abcd + +/(?'abc'a|b)(?d|e)\k{2}/dupnames + adaa + 0: adaa + 1: a + 2: d +\= Expect no match + addd +No match + adbb +No match + +/(?'abc'a|b)(?d|e)(?&abc){2}/dupnames + bdaa + 0: bdaa + 1: b + 2: d + bdab + 0: bdab + 1: b + 2: d +\= Expect no match + bddd +No match + +/(?( (?'B' abc (?(R) (?(R&A)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + abcabc1Xabc2XabcXabcabc + 0: abcabc1Xabc2XabcX + 1: abcabc1Xabc2XabcX + 2: abcabc1Xabc2XabcX + +/(? (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x +Failed: error 115 at offset 27: reference to non-existent subpattern + +/^(?(DEFINE) abc | xyz ) /x +Failed: error 154 at offset 4: DEFINE subpattern contains more than one branch + +/(?(DEFINE) abc) xyz/Ix +Capture group count = 0 +Options: extended +First code unit = 'x' +Last code unit = 'z' +Subject length lower bound = 3 + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 1: +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 +No match + +/^a.b/newline=lf + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/^a.b/newline=cr + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/^a.b/newline=anycrlf + a\x85b + 0: a\x85b +\= Expect no match + a\rb +No match + +/^a.b/newline=any +\= Expect no match + a\nb +No match + a\rb +No match + a\x85b +No match + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc9 + +/^a\Rb/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b +\= Expect no match + a\n\rb +No match + +/^a\R*b/bsr=unicode + ab + 0: ab + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb +\= Expect no match + ab +No match + +/^a\R{1,3}b/bsr=unicode + a\nb + 0: a\x0ab + a\n\rb + 0: a\x0a\x0db + a\n\r\x85b + 0: a\x0a\x0d\x85b + a\r\n\r\nb + 0: a\x0d\x0a\x0d\x0ab + a\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0ab + a\n\r\n\rb + 0: a\x0a\x0d\x0a\x0db + a\n\n\r\nb + 0: a\x0a\x0a\x0d\x0ab +\= Expect no match + a\n\n\n\rb +No match + a\r +No match + +/(?&abc)X(?P)/I +Capture group count = 1 +Named capture groups: + abc 1 +Last code unit = 'P' +Subject length lower bound = 3 + abcPXP123 + 0: PXP + 1: P + +/(?1)X(?P)/I +Capture group count = 1 +Named capture groups: + abc 1 +Last code unit = 'P' +Subject length lower bound = 3 + abcPXP123 + 0: PXP + 1: P + +/(?:a(?&abc)b)*(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + 123axbaxbaxb456 + 0: x + 1: x + +/(?:a(?&abc)b){1,5}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(?:a(?&abc)b){2,5}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(?:a(?&abc)b){2,}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(abc)(?i:(?1))/ + defabcabcxyz + 0: abcabc + 1: abc +\= Expect no match + DEFabcABCXYZ +No match + +/(abc)(?:(?i)(?1))/ + defabcabcxyz + 0: abcabc + 1: abc +\= Expect no match + DEFabcABCXYZ +No match + +/^(a)\g-2/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/^(a)\g/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(a)\g{0}/ +Failed: error 115 at offset 9: reference to non-existent subpattern + +/^(a)\g{3/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(a)\g{aa}/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/^a.b/newline=lf + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/.+foo/ + afoo + 0: afoo +\= Expect no match + \r\nfoo +No match + \nfoo +No match + +/.+foo/newline=crlf + afoo + 0: afoo + \nfoo + 0: \x0afoo +\= Expect no match + \r\nfoo +No match + +/.+foo/newline=any + afoo + 0: afoo +\= Expect no match + \nfoo +No match + \r\nfoo +No match + +/.+foo/s + afoo + 0: afoo + \r\nfoo + 0: \x0d\x0afoo + \nfoo + 0: \x0afoo + +/^$/gm,newline=any + abc\r\rxyz + 0: + abc\n\rxyz + 0: +\= Expect no match + abc\r\nxyz +No match + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a + 0: \x0d\x0a + 0+ + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a\x0d\x0a + 0: + 0+ \x0d\x0a + 0: + 0+ + +/abc.$/gmx,newline=anycrlf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + 0: abc1 + 0: abc4 + 0: abc5 + 0: abc9 + +/^X/m + XABC + 0: X +\= Expect no match + XABC\=notbol +No match + +/(ab|c)(?-1)/B +------------------------------------------------------------------ + Bra + CBra 1 + ab + Alt + c + Ket + Recurse + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: ab + +/xy(?+1)(abc)/B +------------------------------------------------------------------ + Bra + xy + Recurse + CBra 1 + abc + Ket + Ket + End +------------------------------------------------------------------ + xyabcabc + 0: xyabcabc + 1: abc +\= Expect no match + xyabc +No match + +/x(?-0)y/ +Failed: error 126 at offset 5: a relative value of zero is not allowed + +/x(?-1)y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/x(?+0)y/ +Failed: error 126 at offset 5: a relative value of zero is not allowed + +/x(?+1)y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/^(abc)?(?(-1)X|Y)/B +------------------------------------------------------------------ + Bra + ^ + Brazero + CBra 1 + abc + Ket + Cond + 1 Cond ref + X + Alt + Y + Ket + Ket + End +------------------------------------------------------------------ + abcX + 0: abcX + 1: abc + Y + 0: Y +\= Expect no match + abcY +No match + +/^((?(+1)X|Y)(abc))+/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + Cond + 2 Cond ref + X + Alt + Y + Ket + CBra 2 + abc + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + YabcXabc + 0: YabcXabc + 1: Xabc + 2: abc + YabcXabcXabc + 0: YabcXabcXabc + 1: Xabc + 2: abc +\= Expect no match + XabcXabc +No match + +/(?(-1)a)/B +Failed: error 115 at offset 5: reference to non-existent subpattern + +/((?(-1)a))/B +------------------------------------------------------------------ + Bra + CBra 1 + Cond + 1 Cond ref + a + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/((?(-2)a))/B +Failed: error 115 at offset 6: reference to non-existent subpattern + +/^(?(+1)X|Y)(.)/B +------------------------------------------------------------------ + Bra + ^ + Cond + 1 Cond ref + X + Alt + Y + Ket + CBra 1 + Any + Ket + Ket + End +------------------------------------------------------------------ + Y! + 0: Y! + 1: ! + +/(?tom|bon)-\k{A}/ + tom-tom + 0: tom-tom + 1: tom + bon-bon + 0: bon-bon + 1: bon +\= Expect no match + tom-bon +No match + +/\g{A/ +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +/(?|(abc)|(xyz))/B +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 1 + xyz + Ket + Ket + Ket + End +------------------------------------------------------------------ + >abc< + 0: abc + 1: abc + >xyz< + 0: xyz + 1: xyz + +/(x)(?|(abc)|(xyz))(x)/B +------------------------------------------------------------------ + Bra + CBra 1 + x + Ket + Bra + CBra 2 + abc + Ket + Alt + CBra 2 + xyz + Ket + Ket + CBra 3 + x + Ket + Ket + End +------------------------------------------------------------------ + xabcx + 0: xabcx + 1: x + 2: abc + 3: x + xxyzx + 0: xxyzx + 1: x + 2: xyz + 3: x + +/(x)(?|(abc)(pqr)|(xyz))(x)/B +------------------------------------------------------------------ + Bra + CBra 1 + x + Ket + Bra + CBra 2 + abc + Ket + CBra 3 + pqr + Ket + Alt + CBra 2 + xyz + Ket + Ket + CBra 4 + x + Ket + Ket + End +------------------------------------------------------------------ + xabcpqrx + 0: xabcpqrx + 1: x + 2: abc + 3: pqr + 4: x + xxyzx + 0: xxyzx + 1: x + 2: xyz + 3: + 4: x + +/\H++X/B +------------------------------------------------------------------ + Bra + \H++ + X + Ket + End +------------------------------------------------------------------ +\= Expect no match + XXXX +No match + +/\H+\hY/B +------------------------------------------------------------------ + Bra + \H++ + \h + Y + Ket + End +------------------------------------------------------------------ + XXXX Y + 0: XXXX Y + +/\H+ Y/B +------------------------------------------------------------------ + Bra + \H++ + Y + Ket + End +------------------------------------------------------------------ + +/\h+A/B +------------------------------------------------------------------ + Bra + \h++ + A + Ket + End +------------------------------------------------------------------ + +/\v*B/B +------------------------------------------------------------------ + Bra + \v*+ + B + Ket + End +------------------------------------------------------------------ + +/\V+\x0a/B +------------------------------------------------------------------ + Bra + \V++ + \x0a + Ket + End +------------------------------------------------------------------ + +/A+\h/B +------------------------------------------------------------------ + Bra + A++ + \h + Ket + End +------------------------------------------------------------------ + +/ *\H/B +------------------------------------------------------------------ + Bra + *+ + \H + Ket + End +------------------------------------------------------------------ + +/A*\v/B +------------------------------------------------------------------ + Bra + A*+ + \v + Ket + End +------------------------------------------------------------------ + +/\x0b*\V/B +------------------------------------------------------------------ + Bra + \x0b*+ + \V + Ket + End +------------------------------------------------------------------ + +/\d+\h/B +------------------------------------------------------------------ + Bra + \d++ + \h + Ket + End +------------------------------------------------------------------ + +/\d*\v/B +------------------------------------------------------------------ + Bra + \d*+ + \v + Ket + End +------------------------------------------------------------------ + +/S+\h\S+\v/B +------------------------------------------------------------------ + Bra + S++ + \h + \S++ + \v + Ket + End +------------------------------------------------------------------ + +/\w{3,}\h\w+\v/B +------------------------------------------------------------------ + Bra + \w{3} + \w*+ + \h + \w++ + \v + Ket + End +------------------------------------------------------------------ + +/\h+\d\h+\w\h+\S\h+\H/B +------------------------------------------------------------------ + Bra + \h++ + \d + \h++ + \w + \h++ + \S + \h++ + \H + Ket + End +------------------------------------------------------------------ + +/\v+\d\v+\w\v+\S\v+\V/B +------------------------------------------------------------------ + Bra + \v++ + \d + \v++ + \w + \v++ + \S + \v++ + \V + Ket + End +------------------------------------------------------------------ + +/\H+\h\H+\d/B +------------------------------------------------------------------ + Bra + \H++ + \h + \H+ + \d + Ket + End +------------------------------------------------------------------ + +/\V+\v\V+\w/B +------------------------------------------------------------------ + Bra + \V++ + \v + \V+ + \w + Ket + End +------------------------------------------------------------------ + +/\( (?: [^()]* | (?R) )* \)/x +(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)\=jitstack=1024 + 0: (0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0) + +/[\E]AAA/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\E]AAA/ +Failed: error 106 at offset 9: missing terminating ] for character class + +/[^\E]AAA/ +Failed: error 106 at offset 8: missing terminating ] for character class + +/[^\Q\E]AAA/ +Failed: error 106 at offset 10: missing terminating ] for character class + +/[\E^]AAA/ +Failed: error 106 at offset 8: missing terminating ] for character class + +/[\Q\E^]AAA/ +Failed: error 106 at offset 10: missing terminating ] for character class + +/A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B +------------------------------------------------------------------ + Bra + A + *PRUNE + B + *SKIP + C + *THEN + D + *COMMIT + E + *FAIL + F + *FAIL + G + *FAIL + H + *ACCEPT + I + Ket + End +------------------------------------------------------------------ + +/^a+(*FAIL)/auto_callout +\= Expect no match + aaaaaa +--->aaaaaa + +0 ^ ^ + +1 ^ a+ + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^^ (*FAIL) +No match + +/a+b?c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) +No match + +/a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) +No match + +/a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*COMMIT) ++13 ^ ^ c+ ++15 ^ ^ (*FAIL) ++15 ^ ^ (*FAIL) ++15 ^ ^ (*FAIL) +No match + +/a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match + aaabcccaaabccc +--->aaabcccaaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*SKIP) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*SKIP) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) +No match + +/a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) +No match + +/a(*MARK)b/ +Failed: error 166 at offset 7: (*MARK) must have an argument + +/\g6666666666/ +Failed: error 161 at offset 7: subpattern number is too big + +/[\g6666666666]/B +------------------------------------------------------------------ + Bra + [6g] + Ket + End +------------------------------------------------------------------ + +/(?1)\c[/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/.+A/newline=crlf +\= Expect no match + \r\nA +No match + +/\nA/newline=crlf + \r\nA + 0: \x0aA + +/[\r\n]A/newline=crlf + \r\nA + 0: \x0aA + +/(\r|\n)A/newline=crlf + \r\nA + 0: \x0aA + 1: \x0a + +/a(*CR)b/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + +/(*CR)a.b/ + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/(*CR)a.b/newline=lf + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/(*LF)a.b/newline=CRLF + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/(*CRLF)a.b/ + a\rb + 0: a\x0db + a\nb + 0: a\x0ab +\= Expect no match + a\r\nb +No match + +/(*ANYCRLF)a.b/newline=CR +\= Expect no match + a\rb +No match + a\nb +No match + a\r\nb +No match + +/(*ANY)a.b/newline=cr +\= Expect no match + a\rb +No match + a\nb +No match + a\r\nb +No match + a\x85b +No match + +/(*ANY).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*ANYCRLF).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*CRLF).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*NUL)^.*/ + a\nb\x00ccc + 0: a\x0ab + +/(*NUL)^.*/s + a\nb\x00ccc + 0: a\x0ab\x00ccc + +/^x/m,newline=NUL + ab\x00xy + 0: x + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + 0: x\x0ay + +/(*NUL)^X\NY/ + X\nY + 0: X\x0aY + X\rY + 0: X\x0dY +\= Expect no match + X\x00Y +No match + +/a\Rb/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\Rb/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R?b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\R?b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\n\nb + 0: a\x0d\x0a\x0ab + a\n\r\rb + 0: a\x0a\x0d\x0db + a\r\n\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0a\x0d\x0ab +\= Expect no match + a\x85\x85b +No match + a\x0b\x0bb +No match + +/a\R{2,4}b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\rb + 0: a\x0d\x0db + a\n\n\nb + 0: a\x0a\x0a\x0ab + a\r\n\n\r\rb + 0: a\x0d\x0a\x0a\x0d\x0db + a\x85\x85b + 0: a\x85\x85b + a\x0b\x0bb + 0: a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb +No match + +/(*BSR_ANYCRLF)a\Rb/I +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + +/(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\x85b + 0: a\x85b + +/(*BSR_ANYCRLF)(*CRLF)a\Rb/I +Capture group count = 0 +\R matches CR, LF, or CRLF +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + +/(*CRLF)(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +\R matches any Unicode newline +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\x85b + 0: a\x85b + +/(*CRLF)(*BSR_ANYCRLF)(*CR)ab/I +Capture group count = 0 +\R matches CR, LF, or CRLF +Forced newline is CR +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?)(?&)/ +Failed: error 162 at offset 9: subpattern name expected + +/(?)(?&a)/ +Failed: error 115 at offset 11: reference to non-existent subpattern + +/(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ +Failed: error 115 at offset 9: reference to non-existent subpattern + +/(?+-a)/ +Failed: error 129 at offset 2: digit expected after (?+ or (?- + +/(?-+a)/ +Failed: error 111 at offset 3: unrecognized character after (? or (?- + +/(?(-1))/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/(?(+10))/ +Failed: error 115 at offset 4: reference to non-existent subpattern + +/(?(10))/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/(?(+2))()()/ + +/(?(2))()()/ + +/\k''/ +Failed: error 162 at offset 3: subpattern name expected + +/\k<>/ +Failed: error 162 at offset 3: subpattern name expected + +/\k{}/ +Failed: error 162 at offset 3: subpattern name expected + +/\k/ +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name + +/\kabc/ +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name + +/(?P=)/ +Failed: error 162 at offset 4: subpattern name expected + +/(?P>)/ +Failed: error 162 at offset 4: subpattern name expected + +/[[:foo:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:1234:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:f\oo:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[: :]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:...:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:l\ower:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[[:abc\:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/[abc[:x\]pqr:]]/ +Failed: error 130 at offset 6: unknown POSIX class name + +/[[:a\dz:]]/ +Failed: error 130 at offset 3: unknown POSIX class name + +/(^(a|b\g<-1'c))/ +Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(?+1)(?x|y){0}z/ + xzxx + 0: xz + yzyy + 0: yz +\= Expect no match + xxz +No match + +/(\3)(\1)(a)/ +\= Expect no match + cat +No match + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + 0: a + 1: + 2: + 3: a + +/TA]/ + The ACTA] comes + 0: TA] + +/TA]/allow_empty_class,match_unset_backref,dupnames + The ACTA] comes + 0: TA] + +/(?2)[]a()b](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcbabc + +/(?2)[^]a()b](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcbabc + +/(?1)[]a()b](abc)/ + abcbabc + 0: abcbabc + 1: abc +\= Expect no match + abcXabc +No match + +/(?1)[^]a()b](abc)/ + abcXabc + 0: abcXabc + 1: abc +\= Expect no match + abcbabc +No match + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + 0: xyzbabcxyz + 1: abc + 2: xyz + +/(?&N)[]a(?)](?abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abc)](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcadc + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +7 ^ b ++11 ^ ^ ++12 ^ ) ++13 ^ End of pattern + 0: + abc +--->abc + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +8 ^ ^ ) + +9 ^ b + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^^ b + +7 ^ b + +8 ^^ ) + +9 ^ b ++10 ^^ | ++13 ^^ End of pattern + 0: b + +/(?(?=b).*b|^d)/I +Capture group count = 0 +Subject length lower bound = 1 + +/(?(?=.*b).*b|^d)/I +Capture group count = 0 +Subject length lower bound = 1 + +/xyz/auto_callout + xyz +--->xyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz + abcxyz +--->abcxyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +No match + abcxypqr +No match + +/xyz/auto_callout,no_start_optimize + abcxyz +--->abcxyz + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +--->abc + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + abcxypqr +--->abcxypqr + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + +/(*NO_START_OPT)xyz/auto_callout + abcxyz +--->abcxyz ++15 ^ x ++15 ^ x ++15 ^ x ++15 ^ x ++16 ^^ y ++17 ^ ^ z ++18 ^ ^ End of pattern + 0: xyz + +/(*NO_AUTO_POSSESS)a+b/B +------------------------------------------------------------------ + Bra + a+ + b + Ket + End +------------------------------------------------------------------ + +/xyz/auto_callout,no_start_optimize + abcxyz +--->abcxyz + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz + +/^"((?(?=[a])[^"])|b)*"$/auto_callout + "ab" +--->"ab" + +0 ^ ^ + +1 ^ " + +2 ^^ ( + +3 ^^ (? + +5 ^^ (?= + +8 ^^ [a] ++11 ^ ^ ) ++12 ^^ [^"] ++16 ^ ^ ) ++17 ^ ^ | + +3 ^ ^ (? + +5 ^ ^ (?= + +8 ^ ^ [a] ++17 ^ ^ | ++21 ^ ^ " ++18 ^ ^ b ++19 ^ ^ )* + +3 ^ ^ (? + +5 ^ ^ (?= + +8 ^ ^ [a] ++17 ^ ^ | ++21 ^ ^ " ++22 ^ ^ $ ++23 ^ ^ End of pattern + 0: "ab" + 1: + +/^"((?(?=[a])[^"])|b)*"$/ + "ab" + 0: "ab" + 1: + +/^X(?5)(a)(?|(b)|(q))(c)(d)Y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + XYabcdY + +/^X(?&N)(a)(?|(b)|(q))(c)(d)(?Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: c + 4: d + 5: Y + +/Xa{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\d{2,4}b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}?b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}+b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\D{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[^a]{2,4}b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}?b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}+b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/(Y)X\1{2,4}b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}?b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}+b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/\++\KZ|\d+X|9+Y/startchar + ++++123999\=ps +Partial match: 123999 + ++++123999Y\=ps + 0: 999Y + ++++Z1234\=ps + 0: ++++Z + ^^^^ + +/Z(*F)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/Z(?!)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/dog(sbody)?/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/dog(sbody)??/ + dogs\=ps + 0: dog + dogs\=ph + 0: dog + +/dog|dogsbody/ + dogs\=ps + 0: dog + dogs\=ph + 0: dog + +/dogsbody|dog/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/\bthe cat\b/ + the cat\=ps + 0: the cat + the cat\=ph +Partial match: the cat + +/abc/ + abc\=ps + 0: abc + abc\=ph + 0: abc + +/abc\K123/startchar + xyzabc123pqr + 0: abc123 + ^^^ + xyzabc12\=ps +Partial match: abc12 + xyzabc12\=ph +Partial match: abc12 + +/(?<=abc)123/ + xyzabc123pqr + 0: 123 + xyzabc12\=ps +Partial match: 12 + xyzabc12\=ph +Partial match: 12 + +/\babc\b/ + +++abc+++ + 0: abc + +++ab\=ps +Partial match: ab + +++ab\=ph +Partial match: ab + +/(?&word)(?&element)(?(DEFINE)(?<[^m][^>]>[^<])(?\w*+))/B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Cond + Cond false + CBra 1 + < + [^m] + [^>] + > + [^<] + Ket + CBra 2 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/(?&word)(?&element)(?(DEFINE)(?<[^\d][^>]>[^<])(?\w*+))/B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Cond + Cond false + CBra 1 + < + [\x00-/:-\xff] (neg) + [^>] + > + [^<] + Ket + CBra 2 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/(ab)(x(y)z(cd(*ACCEPT)))pq/B +------------------------------------------------------------------ + Bra + CBra 1 + ab + Ket + CBra 2 + x + CBra 3 + y + Ket + z + CBra 4 + cd + Close 4 + Close 2 + *ACCEPT + Ket + Ket + pq + Ket + End +------------------------------------------------------------------ + +/abc\K/aftertext,startchar + abcdef + 0: abc + ^^^ + 0+ def + abcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def + xyzabcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def +\= Expect no match + abcdef\=notempty +No match + xyzabcdef\=notempty +No match + +/^(?:(?=abc)|abc\K)/aftertext,startchar + abcdef + 0: + 0+ abcdef + abcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def +\= Expect no match + abcdef\=notempty +No match + +/a?b?/aftertext + xyz + 0: + 0+ xyz + xyzabc + 0: + 0+ xyzabc + xyzabc\=notempty + 0: ab + 0+ c + xyzabc\=notempty_atstart + 0: + 0+ yzabc + xyz\=notempty_atstart + 0: + 0+ yz +\= Expect no match + xyz\=notempty +No match + +/^a?b?/aftertext + xyz + 0: + 0+ xyz + xyzabc + 0: + 0+ xyzabc +\= Expect no match + xyzabc\=notempty +No match + xyzabc\=notempty_atstart +No match + xyz\=notempty_atstart +No match + xyz\=notempty +No match + +/^(?a|b\gc)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(?a|b\g'name'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g<1>c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g'1'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g'-1'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/(^(a|b\g<-1>c))/ + aaaa + 0: a + 1: a + 2: a + bacxxx + 0: bac + 1: bac + 2: bac + bbaccxxx + 0: bbacc + 1: bbacc + 2: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + 2: bbbaccc + +/(?-i:\g)(?i:(?a))/ + XaaX + 0: aa + 1: a + XAAX + 0: AA + 1: A + +/(?i:\g)(?-i:(?a))/ + XaaX + 0: aa + 1: a +\= Expect no match + XAAX +No match + +/(?-i:\g<+1>)(?i:(a))/ + XaaX + 0: aa + 1: a + XAAX + 0: AA + 1: A + +/(?=(?(?#simplesyntax)\$(?[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*)(?:\[(?[a-zA-Z0-9_\x{7f}-\x{ff}]+|\$\g)\]|->\g(\(.*?\))?)?|(?#simple syntax withbraces)\$\{(?:\g(?\[(?:\g|'(?:\\.|[^'\\])*'|"(?:\g|\\.|[^"\\])*")\])?|\g|\$\{\g\})\}|(?#complexsyntax)\{(?\$(?\g(\g*|\(.*?\))?)(?:->\g)*|\$\g|\$\{\g\})\}))\{/ + +/(?a|b|c)\g*/ + abc + 0: abc + 1: a + accccbbb + 0: accccbbb + 1: a + +/^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: + 4: + 5: c + 6: d + 7: Y + +/(?<=b(?1)|zzz)(a)/ + xbaax + 0: a + 1: a + xzzzax + 0: a + 1: a + +/(a)(?<=b\1)/ + +/(a)(?<=b+(?1))/ +Failed: error 125 at offset 3: length of lookbehind assertion is not limited + +/(a+)(?<=b(?1))/ +Failed: error 125 at offset 4: length of lookbehind assertion is not limited + +/(a(?<=b(?1)))/ +Failed: error 125 at offset 2: length of lookbehind assertion is not limited + +/(?<=b(?1))xyz/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(?<=b(?1))xyz(b+)pqrstuvew/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(a|bc)\1/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bc)\1{2,3}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 3 + +/(a|bc)(?1)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|b\1)(a|b\1)/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|b\1){2}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bbbb\1)(a|bbbb\1)/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bbbb\1){2}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'F' +Last code unit = ':' +Subject length lower bound = 22 + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/Iis +Capture group count = 11 +Options: caseless dotall +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 47 + +"(?>.*/)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 4 + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /Ix +Capture group count = 0 +Options: extended +Last code unit = '-' +Subject length lower bound = 8 + +/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii +Capture group count = 1 +Options: caseless +Starting code units: A B C a b c +Subject length lower bound = 1 + +/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I +Capture group count = 0 +Starting code units: c d +Last code unit = 'b' +Subject length lower bound = 41 + +/A)|(?
B))/I +Capture group count = 1 +Named capture groups: + a 1 +Starting code units: A B +Subject length lower bound = 1 + AB\=copy=a + 0: A + 1: A + C A (1) a (group 1) + BA\=copy=a + 0: B + 1: B + C B (1) a (group 1) + +/(?|(?A)|(?B))/ +Failed: error 165 at offset 16: different names for subpatterns of the same number are not allowed + +/(?:a(? (?')|(?")) | + b(? (?')|(?")) ) + (?('quote')[a-z]+|[0-9]+)/Ix,dupnames +Capture group count = 6 +Max back reference = 4 +Named capture groups: + apostrophe 2 + apostrophe 5 + quote 1 + quote 4 + realquote 3 + realquote 6 +Options: dupnames extended +Starting code units: a b +Subject length lower bound = 3 + a"aaaaa + 0: a"aaaaa + 1: " + 2: + 3: " + b"aaaaa + 0: b"aaaaa + 1: + 2: + 3: + 4: " + 5: + 6: " +\= Expect no match + b"11111 +No match + a"11111 +No match + +/^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames +------------------------------------------------------------------ + Bra + ^ + Bra + CBra 1 + a + Ket + CBra 2 + b + Ket + CBra 3 + c + Ket + CBra 4 + d + Ket + Alt + CBra 1 + e + Ket + Ket + Cond + Cond ref 2 + X + Alt + Y + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 4 +Max back reference = 4 +Named capture groups: + D 4 + D 1 +Compile options: dupnames extended +Overall options: anchored dupnames extended +Starting code units: a e +Subject length lower bound = 2 + abcdX + 0: abcdX + 1: a + 2: b + 3: c + 4: d + eX + 0: eX + 1: e +\= Expect no match + abcdY +No match + ey +No match + +/(?a) (b)(c) (?d (?(R&A)$ | (?4)) )/IBx,dupnames +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + CBra 2 + b + Ket + CBra 3 + c + Ket + CBra 4 + d + Cond + Cond recurse 2 + $ + Alt + Recurse + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 4 +Max back reference = 4 +Named capture groups: + A 1 + A 4 +Options: dupnames extended +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + abcdd + 0: abcdd + 1: a + 2: b + 3: c + 4: dd +\= Expect no match + abcdde +No match + +/abcd*/ + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + +/abcd*/i + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + XXXXABCD\=ps + 0: ABCD + XXXXABCD\=ph +Partial match: ABCD + +/abc\d*/ + xxxxabc1\=ps + 0: abc1 + xxxxabc1\=ph +Partial match: abc1 + +/(a)bc\1*/ + xxxxabca\=ps + 0: abca + 1: a + xxxxabca\=ph +Partial match: abca + +/abc[de]*/ + xxxxabcde\=ps + 0: abcde + xxxxabcde\=ph +Partial match: abcde + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + 0: a + 1: + 2: + 3: a + +/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames +Capture group count = 3 +Max back reference = 3 +Options: allow_empty_class dupnames match_unset_backref +Last code unit = 'a' +Subject length lower bound = 1 + cat + 0: a + 1: + 2: + 3: a + +/(\3)(\1)(a)/I +Capture group count = 3 +Max back reference = 3 +Last code unit = 'a' +Subject length lower bound = 3 +\= Expect no match + cat +No match + +/i(?(DEFINE)(?a))/I +Capture group count = 1 +Named capture groups: + s 1 +First code unit = 'i' +Subject length lower bound = 1 + i + 0: i + +/()i(?(1)a)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'i' +Subject length lower bound = 1 + ia + 0: ia + 1: + +/(?i)a(?-i)b|c/B +------------------------------------------------------------------ + Bra + /i a + b + Alt + c + Ket + End +------------------------------------------------------------------ + XabX + 0: ab + XAbX + 0: Ab + CcC + 0: c +\= Expect no match + XABX +No match + +/(?i)a(?s)b|c/B +------------------------------------------------------------------ + Bra + /i ab + Alt + /i c + Ket + End +------------------------------------------------------------------ + +/(?i)a(?s-i)b|c/B +------------------------------------------------------------------ + Bra + /i a + b + Alt + c + Ket + End +------------------------------------------------------------------ + +/^(ab(c\1)d|x){2}$/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + ab + CBra 2 + c + \1 + Ket + d + Alt + x + Ket + CBra 1 + ab + CBra 2 + c + \1 + Ket + d + Alt + x + Ket + $ + Ket + End +------------------------------------------------------------------ + xabcxd + 0: xabcxd + 1: abcxd + 2: cx + +/^(?&t)*+(?(DEFINE)(?.))$/B +------------------------------------------------------------------ + Bra + ^ + Braposzero + SBraPos + Recurse + KetRpos + Cond + Cond false + CBra 1 + Any + Ket + Ket + $ + Ket + End +------------------------------------------------------------------ + +/^(?&t)*(?(DEFINE)(?.))$/B +------------------------------------------------------------------ + Bra + ^ + Brazero + SBra + Recurse + KetRmax + Cond + Cond false + CBra 1 + Any + Ket + Ket + $ + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. + +/(?>a\Kb)z|(ab)/ + ab\=startchar + 0: ab + 1: ab + +/(?P(?P0|)|(?P>L2)(?P>L1))/ + abcd + 0: + 1: + 2: + 0abc + 0: 0 + 1: 0 + 2: 0 + +/abc(*MARK:)pqr/ +Failed: error 166 at offset 10: (*MARK) must have an argument + +/abc(*:)pqr/ +Failed: error 166 at offset 6: (*MARK) must have an argument + +/(*COMMIT:X)/B +------------------------------------------------------------------ + Bra + *COMMIT X + Ket + End +------------------------------------------------------------------ + +# This should, and does, fail. In Perl, it does not, which I think is a +# bug because replacing the B in the pattern by (B|D) does make it fail. +# Turning off Perl's optimization by inserting (??{""}) also makes it fail. + +/A(*COMMIT)B/aftertext,mark +\= Expect no match + ACABX +No match + +# These should be different, but in Perl they are not, which I think +# is a bug in Perl. + +/A(*THEN)B|A(*THEN)C/mark + AC + 0: AC + +/A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match + AC +No match + +# Mark names can be duplicated. Perl doesn't give a mark for this one, +# though PCRE2 does. + +/^A(*:A)B|^X(*:A)Y/mark +\= Expect no match + XAQQ +No match, mark = A + +# COMMIT at the start of a pattern should be the same as an anchor. Perl +# optimizations defeat this. So does the PCRE2 optimization unless we disable +# it. + +/(*COMMIT)ABC/ + ABCDEFG + 0: ABC + +/(*COMMIT)ABC/no_start_optimize +\= Expect no match + DEFGABC +No match + +/^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match + abcccd +No match + +/^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match + abcccd +No match + +/^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match + abcccd +No match + +# Perl gets some of these wrong + +/(?>.(*ACCEPT))*?5/ + abcde + 0: a + +/(.(*ACCEPT))*?5/ + abcde + 0: a + 1: a + +/(.(*ACCEPT))5/ + abcde + 0: a + 1: a + +/(.(*ACCEPT))*5/ + abcde + 0: a + 1: a + +/A\NB./B +------------------------------------------------------------------ + Bra + A + Any + B + Any + Ket + End +------------------------------------------------------------------ + ACBD + 0: ACBD +\= Expect no match + A\nB +No match + ACB\n +No match + +/A\NB./Bs +------------------------------------------------------------------ + Bra + A + Any + B + AllAny + Ket + End +------------------------------------------------------------------ + ACBD + 0: ACBD + ACB\n + 0: ACB\x0a +\= Expect no match + A\nB +No match + +/A\NB/newline=crlf + A\nB + 0: A\x0aB + A\rB + 0: A\x0dB +\= Expect no match + A\r\nB +No match + +/\R+b/B +------------------------------------------------------------------ + Bra + \R++ + b + Ket + End +------------------------------------------------------------------ + +/\R+\n/B +------------------------------------------------------------------ + Bra + \R+ + \x0a + Ket + End +------------------------------------------------------------------ + +/\R+\d/B +------------------------------------------------------------------ + Bra + \R++ + \d + Ket + End +------------------------------------------------------------------ + +/\d*\R/B +------------------------------------------------------------------ + Bra + \d*+ + \R + Ket + End +------------------------------------------------------------------ + +/\s*\R/B +------------------------------------------------------------------ + Bra + \s* + \R + Ket + End +------------------------------------------------------------------ + \x20\x0a + 0: \x0a + \x20\x0d + 0: \x0d + \x20\x0d\x0a + 0: \x0d\x0a + +/\S*\R/B +------------------------------------------------------------------ + Bra + \S*+ + \R + Ket + End +------------------------------------------------------------------ + a\x0a + 0: a\x0a + +/X\h*\R/B +------------------------------------------------------------------ + Bra + X + \h*+ + \R + Ket + End +------------------------------------------------------------------ + X\x20\x0a + 0: X \x0a + +/X\H*\R/B +------------------------------------------------------------------ + Bra + X + \H* + \R + Ket + End +------------------------------------------------------------------ + X\x0d\x0a + 0: X\x0d\x0a + +/X\H+\R/B +------------------------------------------------------------------ + Bra + X + \H+ + \R + Ket + End +------------------------------------------------------------------ + X\x0d\x0a + 0: X\x0d\x0a + +/X\H++\R/B +------------------------------------------------------------------ + Bra + X + \H++ + \R + Ket + End +------------------------------------------------------------------ +\= Expect no match + X\x0d\x0a +No match + +/(?<=abc)def/ + abc\=ph +Partial match: + +/abc$/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc$/m + abc + 0: abc + abc\n + 0: abc + abc\=ph +Partial match: abc + abc\n\=ph + 0: abc + abc\=ps + 0: abc + abc\n\=ps + 0: abc + +/abc\z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\Z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\b/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\B/ + abc\=ps +Partial match: abc + abc\=ph +Partial match: abc +\= Expect no match + abc +No match + +/.+/ +\= Bad offsets + abc\=offset=4 +Failed: error -33: bad offset value + abc\=offset=-4 +** Invalid value in 'offset=-4' +\= Valid data + abc\=offset=0 + 0: abc + abc\=offset=1 + 0: bc + abc\=offset=2 + 0: c +\= Expect no match + abc\=offset=3 +No match + +/^\cÄ£/ +Failed: error 168 at offset 3: \c must be followed by a printable ASCII character + +/(?P(?P=abn)xxx)/B +------------------------------------------------------------------ + Bra + CBra 1 + \1 + xxx + Ket + Ket + End +------------------------------------------------------------------ + +/(a\1z)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + \1 + z + Ket + Ket + End +------------------------------------------------------------------ + +/(?P(?P=abn)(?(?P=axn)xxx)/B +Failed: error 115 at offset 12: reference to non-existent subpattern + +/(?P(?P=axn)xxx)(?yy)/B +------------------------------------------------------------------ + Bra + CBra 1 + \2 + xxx + Ket + CBra 2 + yy + Ket + Ket + End +------------------------------------------------------------------ + +# These tests are here because Perl gets the first one wrong. + +/(\R*)(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + +/(\R)*(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0a + 2: \x0d + +/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + +# ------------- + +/^abc$/B +------------------------------------------------------------------ + Bra + ^ + abc + $ + Ket + End +------------------------------------------------------------------ + +/^abc$/Bm +------------------------------------------------------------------ + Bra + /m ^ + abc + /m $ + Ket + End +------------------------------------------------------------------ + +/^(a)*+(\w)/ + aaaaX + 0: aaaaX + 1: a + 2: X +\= Expect no match + aaaa +No match + +/^(?:a)*+(\w)/ + aaaaX + 0: aaaaX + 1: X +\= Expect no match + aaaa +No match + +/(a)++1234/IB +------------------------------------------------------------------ + Bra + CBraPos 1 + a + KetRpos + 1234 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = '4' +Subject length lower bound = 5 + +/([abc])++1234/I +Capture group count = 1 +Starting code units: a b c +Last code unit = '4' +Subject length lower bound = 5 + +/(?<=(abc)+)X/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(^ab)/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(^ab)++/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(^ab|^)+/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(^ab|^)++/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?:^ab)/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(?:^ab)++/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(?:^ab|^)+/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?:^ab|^)++/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(.*ab)/I +Capture group count = 1 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(.*ab)++/I +Capture group count = 1 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(.*ab|.*)+/I +Capture group count = 1 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(.*ab|.*)++/I +Capture group count = 1 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?:.*ab)/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(?:.*ab)++/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(?:.*ab|.*)+/I +Capture group count = 0 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?:.*ab|.*)++/I +Capture group count = 0 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?=a)[bcd]/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))+[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))++[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/(?=a+)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a+?)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a++)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a{3})[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Last code unit = 'a' (caseless) +Subject length lower bound = 2 + +/(abc)\1+/ + +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) + +/(?1)(?:(b(*ACCEPT))){0}/ + b + 0: b + +/(?1)(?:(b(*ACCEPT))){0}c/ + bc + 0: bc +\= Expect no match + b +No match + +/(?1)(?:((*ACCEPT))){0}c/ + c + 0: c + c\=notempty + 0: c + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba +No match + +/^.*?(?(?=a)a|bc)/ + ba + 0: ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac +No match + +/^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match + ac +No match + +/^.*?(a(*THEN)b)c/ +\= Expect no match + aabc +No match + +/^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x + aabc + 0: aabc + +/^.*?(a(*THEN)b|z)c/ + aabc + 0: aabc + 1: ab + +/^.*?(z|a(*THEN)b)c/ + aabc + 0: aabc + 1: ab + +# These are here because they are not Perl-compatible; the studying means the +# mark is not seen. + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C + 0: C + 1: C +MK: A +\= Expect no match + D +No match, mark = A + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC +No match, mark = A + +# ---------------------------- + +"(?=a*(*ACCEPT)b)c" + c + 0: c + c\=notempty + 0: c + +/(?1)c(?(DEFINE)((*ACCEPT)b))/ + c + 0: c + c\=notempty + 0: c + +/(?>(*ACCEPT)b)c/ + c + 0: +\= Expect no match + c\=notempty +No match + +/(?:(?>(a)))+a%/allaftertext + %aa% + 0: aa% + 0+ + 1: a + 1+ a% + +/(a)b|ac/allaftertext + ac\=ovector=1 + 0: ac + 0+ + +/(a)(b)x|abc/allaftertext + abc\=ovector=2 + 0: abc + 0+ + +/(a)bc|(a)(b)\2/ + abc\=ovector=1 +Matched, but too many substrings + 0: abc + abc\=ovector=2 + 0: abc + 1: a + aba\=ovector=1 +Matched, but too many substrings + 0: aba + aba\=ovector=2 +Matched, but too many substrings + 0: aba + 1: + aba\=ovector=3 +Matched, but too many substrings + 0: aba + 1: + 2: a + aba\=ovector=4 + 0: aba + 1: + 2: a + 3: b + +/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I +Capture group count = 2 +Starting code units: a b +Subject length lower bound = 3 + +/(a(?2)|b)(b(?1)|a)(?1)(?2)/I +Capture group count = 2 +Starting code units: a b +Subject length lower bound = 4 + +/(abc)(?1)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 6 + +/(?:(foo)|(bar)|(baz))X/allcaptures + bazfooX + 0: fooX + 1: foo + 2: + 3: + foobazbarX + 0: barX + 1: + 2: bar + 3: + barfooX + 0: fooX + 1: foo + 2: + 3: + bazX + 0: bazX + 1: + 2: + 3: baz + foobarbazX + 0: bazX + 1: + 2: + 3: baz + bazfooX\=ovector=0 + 0: fooX + 1: foo + 2: + 3: + bazfooX\=ovector=1 +Matched, but too many substrings + 0: fooX + bazfooX\=ovector=2 + 0: fooX + 1: foo + bazfooX\=ovector=3 + 0: fooX + 1: foo + 2: + +/(?=abc){3}abc/B +------------------------------------------------------------------ + Bra + Assert + abc + Ket + Assert + abc + Ket + Assert + abc + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc)+abc/B +------------------------------------------------------------------ + Bra + Assert + abc + Ket + Brazero + Assert + abc + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc)++abc/B +------------------------------------------------------------------ + Bra + Once + Assert + abc + Ket + Brazero + Assert + abc + Ket + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc){0}xyz/B +------------------------------------------------------------------ + Bra + Skip zero + Assert + abc + Ket + xyz + Ket + End +------------------------------------------------------------------ + +/(?=(a))?./B +------------------------------------------------------------------ + Bra + Brazero + Assert + CBra 1 + a + Ket + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?=(a))??./B +------------------------------------------------------------------ + Bra + Braminzero + Assert + CBra 1 + a + Ket + Ket + Any + Ket + End +------------------------------------------------------------------ + +/^(?=(a)){0}b(?1)/B +------------------------------------------------------------------ + Bra + ^ + Skip zero + Assert + CBra 1 + a + Ket + Ket + b + Recurse + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)(a))?b(?1)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a + Ket + Ket + b + Recurse + Ket + End +------------------------------------------------------------------ + +/^(?=(?1))?[az]([abc])d/B +------------------------------------------------------------------ + Bra + ^ + Brazero + Assert + Recurse + Ket + [az] + CBra 1 + [a-c] + Ket + d + Ket + End +------------------------------------------------------------------ + +/^(?!a){0}\w+/B +------------------------------------------------------------------ + Bra + ^ + Skip zero + Assert not + a + Ket + \w++ + Ket + End +------------------------------------------------------------------ + +/(?<=(abc))?xyz/B +------------------------------------------------------------------ + Bra + Brazero + Assert back + Reverse + CBra 1 + abc + Ket + Ket + xyz + Ket + End +------------------------------------------------------------------ + +/[:a[:abc]b:]/B +------------------------------------------------------------------ + Bra + [:[a-c] + b:] + Ket + End +------------------------------------------------------------------ + +/^(a(*:A)(d|e(*:B))z|aeq)/auto_callout + adz +--->adz + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++10 ^ ^ | ++18 ^ ^ z ++19 ^ ^ | ++24 ^ ^ End of pattern + 0: adz + 1: adz + 2: d + aez +--->aez + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++11 ^^ e ++12 ^ ^ (*:B) ++17 ^ ^ ) +Latest Mark: B ++18 ^ ^ z ++19 ^ ^ | ++24 ^ ^ End of pattern + 0: aez + 1: aez + 2: e + aeqwerty +--->aeqwerty + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++11 ^^ e ++12 ^ ^ (*:B) ++17 ^ ^ ) +Latest Mark: B ++18 ^ ^ z ++20 ^ a ++21 ^^ e ++22 ^ ^ q ++23 ^ ^ ) ++24 ^ ^ End of pattern + 0: aeq + 1: aeq + +/.(*F)/ +\= Expect no match + abc\=ph +No match + +/\btype\b\W*?\btext\b\W*?\bjavascript\b/I +Capture group count = 0 +Max lookbehind = 1 +First code unit = 't' +Last code unit = 't' +Subject length lower bound = 18 + +/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|a+)(?>(z+))\w/B +------------------------------------------------------------------ + Bra + ^ + Once + a++ + Ket + Once + CBra 1 + z++ + Ket + Ket + \w + Ket + End +------------------------------------------------------------------ + aaaazzzzb + 0: aaaazzzzb + 1: zzzz +\= Expect no match + aazz +No match + +/(.)(\1|a(?2))/ + bab + 0: bab + 1: b + 2: ab + +/\1|(.)(?R)\1/ + cbbbc + 0: cbbbc + 1: c + +/(.)((?(1)c|a)|a(?2))/ +\= Expect no match + baa +No match + +/(?P(?P=abn)xxx)/B +------------------------------------------------------------------ + Bra + CBra 1 + \1 + xxx + Ket + Ket + End +------------------------------------------------------------------ + +/(a\1z)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + \1 + z + Ket + Ket + End +------------------------------------------------------------------ + +/^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz +\= Expect no match + ax41z +No match + +/^a[m\x41]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz + +/^a\x1z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + ax1z + 0: ax1z + +/^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz +\= Expect no match + au0041z +No match + +/^a[m\u0041]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz + +/^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + au041z + 0: au041z +\= Expect no match + aAz +No match + +/^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aU0041z + 0: aU0041z +\= Expect no match + aAz +No match + +/^\u{7a}/alt_bsux + u{7a} + 0: u{7a} +\= Expect no match + zoo +No match + +/^\u{7a}/extra_alt_bsux + zoo + 0: z + +/\u{}/extra_alt_bsux + u{} + 0: u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + 0: u{Q12} + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + 0: u{ 12} + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + 0: u{{{} + +/(?(?=c)c|d)++Y/B +------------------------------------------------------------------ + Bra + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + +/(?(?=c)c|d)*+Y/B +------------------------------------------------------------------ + Bra + Braposzero + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + +/a[\NB]c/ +Failed: error 171 at offset 4: \N is not supported in a class + aNc + +/a[B-\Nc]/ +Failed: error 150 at offset 6: invalid range in character class + +/a[B\Nc]/ +Failed: error 171 at offset 5: \N is not supported in a class + +/(a)(?2){0,1999}?(b)/ + +/(a)(?(DEFINE)(b))(?2){0,1999}?(?2)/ + +# This test, with something more complicated than individual letters, causes +# different behaviour in Perl. Perhaps it disables some optimization; no tag is +# passed back for the failures, whereas in PCRE2 there is a tag. + +/(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark + AABC + 0: AB + 1: A + 2: B +MK: A + XXYZ + 0: XXY + 1: + 2: + 3: X + 4: X + 5: Y +MK: B +\= Expect no match + XAQQ +No match, mark = A + XAQQXZZ +No match, mark = A + AXQQQ +No match, mark = A + AXXQQQ +No match, mark = B + +# Perl doesn't give marks for these, though it does if the alternatives are +# replaced by single letters. + +/(b|q)(*:m)f|a(*:n)w/mark + aw + 0: aw +MK: n +\= Expect no match + abc +No match, mark = m + +/(q|b)(*:m)f|a(*:n)w/mark + aw + 0: aw +MK: n +\= Expect no match + abc +No match, mark = m + +# After a partial match, the behaviour is as for a failure. + +/^a(*:X)bcde/mark + abc\=ps +Partial match, mark=X: abc + +# These are here because Perl doesn't return a mark, except for the first. + +/(?=(*:x))(q|)/aftertext,mark + abc + 0: + 0+ abc + 1: +MK: x + +/(?=(*:x))((*:y)q|)/aftertext,mark + abc + 0: + 0+ abc + 1: +MK: x + +/(?=(*:x))(?:(*:y)q|)/aftertext,mark + abc + 0: + 0+ abc +MK: x + +/(?=(*:x))(?>(*:y)q|)/aftertext,mark + abc + 0: + 0+ abc +MK: x + +/(?=a(*:x))(?!a(*:y)c)/aftertext,mark + ab + 0: + 0+ ab +MK: x + +/(?=a(*:x))(?=a(*:y)c|)/aftertext,mark + ab + 0: + 0+ ab +MK: x + +/(..)\1/ + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps + 0: abab + 1: ab + +/(..)\1/i + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps + 0: aBAb + 1: aB + +/(..)\1{2,}/ + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps +Partial match: abab + ababa\=ps +Partial match: ababa + ababab\=ps + 0: ababab + 1: ab + ababab\=ph +Partial match: ababab + abababa\=ps + 0: ababab + 1: ab + abababa\=ph +Partial match: abababa + +/(..)\1{2,}/i + ab\=ps +Partial match: ab + aBa\=ps +Partial match: aBa + aBAb\=ps +Partial match: aBAb + AbaBA\=ps +Partial match: AbaBA + abABAb\=ps + 0: abABAb + 1: ab + aBAbaB\=ph +Partial match: aBAbaB + abABabA\=ps + 0: abABab + 1: ab + abaBABa\=ph +Partial match: abaBABa + +/(..)\1{2,}?x/i + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps +Partial match: aBAb + abaBA\=ps +Partial match: abaBA + abAbaB\=ps +Partial match: abAbaB + abaBabA\=ps +Partial match: abaBabA + abAbABaBx\=ps + 0: abAbABaBx + 1: ab + +/^(..)\1/ + aba\=ps +Partial match: aba + +/^(..)\1{2,3}x/ + aba\=ps +Partial match: aba + ababa\=ps +Partial match: ababa + ababa\=ph +Partial match: ababa + abababx + 0: abababx + 1: ab + ababababx + 0: ababababx + 1: ab + +/^(..)\1{2,3}?x/ + aba\=ps +Partial match: aba + ababa\=ps +Partial match: ababa + ababa\=ph +Partial match: ababa + abababx + 0: abababx + 1: ab + ababababx + 0: ababababx + 1: ab + +/^(..)(\1{2,3})ab/ + abababab + 0: abababab + 1: ab + 2: abab + +/^\R/ + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/^\R{2,3}x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R{2,3}?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + x + 0: x + \rx + 0: \x0dx + +/^\R+x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\n\=ps +Partial match: \x0d\x0a + \r\n\=ph +Partial match: \x0d\x0a + \rx + 0: \x0dx + +/^a$/newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^a$/m,newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/newline=crlf + a\r\=ps + 0: a\x0d + 1: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + 0: a\x0d + 1: a\x0d + a\r\=ph +Partial match: a\x0d + +/./newline=crlf + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/.{2,3}/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + +/.{2,3}?/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d + \r\r\r\=ph + 0: \x0d\x0d + +"AB(C(D))(E(F))?(?(?=\2)(?=\4))" + ABCDGHI\=ovector=01 +Matched, but too many substrings + 0: ABCD + +# These are all run as real matches in test 1; here we are just checking the +# settings of the anchored and startline bits. + +/(?>.*?a)(?<=ba)/I +Capture group count = 0 +Max lookbehind = 2 +Last code unit = 'a' +Subject length lower bound = 1 + +/(?:.*?a)(?<=ba)/I +Capture group count = 0 +Max lookbehind = 2 +First code unit at start or follows newline +Last code unit = 'a' +Subject length lower bound = 1 + +/.*?a(*PRUNE)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/.*?a(*PRUNE)b/Is +Capture group count = 0 +Options: dotall +Last code unit = 'b' +Subject length lower bound = 2 + +/^a(*PRUNE)b/Is +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +First code unit = 'a' +Subject length lower bound = 2 + +/.*?a(*SKIP)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>.*?a)b/Is +Capture group count = 0 +Options: dotall +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>.*?a)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>^a)b/Is +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +First code unit = 'a' +Subject length lower bound = 2 + +/(?>.*?)(?<=(abcd)|(wxyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + +/(?>.*)(?<=(abcd)|(wxyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + +"(?>.*)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 3 + +"(?>.*?)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 3 + +/(?>^abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?>.*abc)/Im +Capture group count = 0 +Options: multiline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?:.*abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?:(a)+(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(a)++(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(?>(a))(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(?1)(?C1)x|ab(?C2))((a)){0}/ + aab\=callout_capture +Callout 1: last capture = 0 +--->aab + ^^ x +Callout 1: last capture = 0 +--->aab + ^^ x +Callout 2: last capture = 0 +--->aab + ^ ^ ) + 0: ab + +/(?1)(?C1)((a)(?C2)){0}/ + aab\=callout_capture +Callout 2: last capture = 2 + 1: + 2: a +--->aab + ^^ ){0} +Callout 1: last capture = 0 +--->aab + ^^ ( + 0: a + +/(?:(a)+(?C1)bb|aa(?C2)b)++/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + aab\=callout_capture,ovector=1 +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(ab)x|ab/ + ab\=ovector=0 + 0: ab + ab\=ovector=1 + 0: ab + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph +Partial match, mark=xx: a + xxxx123a\=ps +Partial match, mark=xx: a + +/123\Kabc/startchar + xxxx123a\=ph +Partial match: 123a + xxxx123a\=ps +Partial match: 123a + +/^(?(?=a)aa|bb)/auto_callout + bb +--->bb + +0 ^ ^ + +1 ^ (? + +3 ^ (?= + +6 ^ a ++11 ^ b ++12 ^^ b ++13 ^ ^ ) ++14 ^ ^ End of pattern + 0: bb + +/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/ + bb +--->bb + 1 ^ ^ + 2 ^ (? + 99 ^ (?= + 3 ^ a + 8 ^ b + 9 ^^ b + 10 ^ ^ ) + 11 ^ ^ End of pattern + 0: bb + +# Perl seems to have a bug with this one. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +# Here are some that Perl treats differently because of the way it handles +# backtracking verbs. + +/(?!a(*COMMIT)b)ac|ad/ + ac + 0: ac + ad + 0: ad + +/^(?!a(*THEN)b|ac)../ + ad + 0: ad +\= Expect no match + ac +No match + +/^(?=a(*THEN)b|ac)/ + ac + 0: + +/\A.*?(?:a|b(*THEN)c)/ + ba + 0: ba + +/\A.*?(?:a|b(*THEN)c)++/ + ba + 0: ba + +/\A.*?(?:a|b(*THEN)c|d)/ + ba + 0: ba + +/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ + aac + 0: aac + +/\A.*?(a|b(*THEN)c)/ + ba + 0: ba + 1: a + +/^(A(*THEN)B|A(*THEN)D)/ + AD + 0: AD + 1: AD + +/(?!b(*THEN)a)bn|bnn/ + bnn + 0: bn + +/(?(?=b(*SKIP)a)bn|bnn)/ + bnn + 0: bnn + +/(?=b(*THEN)a|)bn|bnn/ + bnn + 0: bn + +# This test causes a segfault with Perl 5.18.0 + +/^(?=(a)){0}b(?1)/ + backgammon + 0: ba + +/(?|(?f)|(?b))/I,dupnames +Capture group count = 1 +Named capture groups: + n 1 +Options: dupnames +Starting code units: b f +Subject length lower bound = 1 + +/(?abc)(?z)\k()/IB,dupnames +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + CBra 2 + z + Ket + \k2 + CBra 3 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 3 +Max back reference = 2 +Named capture groups: + a 1 + a 2 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 5 + +/a*[bcd]/B +------------------------------------------------------------------ + Bra + a*+ + [b-d] + Ket + End +------------------------------------------------------------------ + +/[bcd]*a/B +------------------------------------------------------------------ + Bra + [b-d]*+ + a + Ket + End +------------------------------------------------------------------ + +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). + +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx +------------------------------------------------------------------ + Bra + \D+ + \D + \D++ + \d + \D+ + \S + \D+ + \s + \D+ + \W + \D+ + \w + \D+ + Any + \D+ + \R + \D+ + \H + \D+ + \h + \D+ + \V + \D+ + \v + \D+ + \Z + \D++ + \z + \D+ + $ + Ket + End +------------------------------------------------------------------ + +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx +------------------------------------------------------------------ + Bra + \d++ + \D + \d+ + \d + \d+ + \S + \d++ + \s + \d++ + \W + \d+ + \w + \d+ + Any + \d++ + \R + \d+ + \H + \d++ + \h + \d+ + \V + \d++ + \v + \d++ + \Z + \d++ + \z + \d++ + $ + Ket + End +------------------------------------------------------------------ + +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx +------------------------------------------------------------------ + Bra + \S+ + \D + \S+ + \d + \S+ + \S + \S++ + \s + \S+ + \W + \S+ + \w + \S+ + Any + \S++ + \R + \S+ + \H + \S++ + \h + \S+ + \V + \S++ + \v + \S++ + \Z + \S++ + \z + \S++ + $ + Ket + End +------------------------------------------------------------------ + +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx +------------------------------------------------------------------ + Bra + \s+ + \D + \s++ + \d + \s++ + \S + \s+ + \s + \s+ + \W + \s++ + \w + \s+ + Any + \s+ + \R + \s+ + \H + \s+ + \h + \s+ + \V + \s+ + \v + \s+ + \Z + \s++ + \z + \s+ + $ + Ket + End +------------------------------------------------------------------ + +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx +------------------------------------------------------------------ + Bra + \W+ + \D + \W++ + \d + \W+ + \S + \W+ + \s + \W+ + \W + \W++ + \w + \W+ + Any + \W+ + \R + \W+ + \H + \W+ + \h + \W+ + \V + \W+ + \v + \W+ + \Z + \W++ + \z + \W+ + $ + Ket + End +------------------------------------------------------------------ + +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx +------------------------------------------------------------------ + Bra + \w+ + \D + \w+ + \d + \w+ + \S + \w++ + \s + \w++ + \W + \w+ + \w + \w+ + Any + \w++ + \R + \w+ + \H + \w++ + \h + \w+ + \V + \w++ + \v + \w++ + \Z + \w++ + \z + \w++ + $ + Ket + End +------------------------------------------------------------------ + +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx +------------------------------------------------------------------ + Bra + \R+ + \D + \R++ + \d + \R+ + \S + \R++ + \s + \R+ + \W + \R++ + \w + \R++ + Any + \R+ + \R + \R+ + \H + \R++ + \h + \R+ + \V + \R+ + \v + \R+ + \Z + \R++ + \z + \R+ + $ + Ket + End +------------------------------------------------------------------ + +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx +------------------------------------------------------------------ + Bra + \H+ + \D + \H+ + \d + \H+ + \S + \H+ + \s + \H+ + \W + \H+ + \w + \H+ + Any + \H+ + \R + \H+ + \H + \H++ + \h + \H+ + \V + \H+ + \v + \H+ + \Z + \H++ + \z + \H+ + $ + Ket + End +------------------------------------------------------------------ + +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx +------------------------------------------------------------------ + Bra + \h+ + \D + \h++ + \d + \h++ + \S + \h+ + \s + \h+ + \W + \h++ + \w + \h+ + Any + \h++ + \R + \h++ + \H + \h+ + \h + \h+ + \V + \h++ + \v + \h+ + \Z + \h++ + \z + \h+ + $ + Ket + End +------------------------------------------------------------------ + +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx +------------------------------------------------------------------ + Bra + \V+ + \D + \V+ + \d + \V+ + \S + \V+ + \s + \V+ + \W + \V+ + \w + \V+ + Any + \V++ + \R + \V+ + \H + \V+ + \h + \V+ + \V + \V++ + \v + \V+ + \Z + \V++ + \z + \V+ + $ + Ket + End +------------------------------------------------------------------ + +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx +------------------------------------------------------------------ + Bra + \v+ + \D + \v++ + \d + \v++ + \S + \v+ + \s + \v+ + \W + \v++ + \w + \v+ + Any + \v+ + \R + \v+ + \H + \v++ + \h + \v++ + \V + \v+ + \v + \v+ + \Z + \v++ + \z + \v+ + $ + Ket + End +------------------------------------------------------------------ + +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx +------------------------------------------------------------------ + Bra + a+ + \D + a++ + \d + a+ + \S + a++ + \s + a++ + \W + a+ + \w + a+ + Any + a++ + \R + a+ + \H + a++ + \h + a+ + \V + a++ + \v + a++ + \Z + a++ + \z + a++ + $ + Ket + End +------------------------------------------------------------------ + +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx +------------------------------------------------------------------ + Bra + \x0a+ + \D + \x0a++ + \d + \x0a++ + \S + \x0a+ + \s + \x0a+ + \W + \x0a++ + \w + \x0a+ + Any + \x0a+ + \R + \x0a+ + \H + \x0a++ + \h + \x0a++ + \V + \x0a+ + \v + \x0a+ + \Z + \x0a++ + \z + \x0a+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx +------------------------------------------------------------------ + Bra + Any+ + \D + Any+ + \d + Any+ + \S + Any+ + \s + Any+ + \W + Any+ + \w + Any+ + Any + Any++ + \R + Any+ + \H + Any+ + \h + Any+ + \V + Any+ + \v + Any+ + \Z + Any++ + \z + Any+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx +------------------------------------------------------------------ + Bra + AllAny+ + \D + AllAny+ + \d + AllAny+ + \S + AllAny+ + \s + AllAny+ + \W + AllAny+ + \w + AllAny+ + AllAny + AllAny+ + \R + AllAny+ + \H + AllAny+ + \h + AllAny+ + \V + AllAny+ + \v + AllAny+ + \Z + AllAny++ + \z + AllAny+ + $ + Ket + End +------------------------------------------------------------------ + +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx +------------------------------------------------------------------ + Bra + \D+ + /m $ + \d++ + /m $ + \S++ + /m $ + \s+ + /m $ + \W+ + /m $ + \w++ + /m $ + \R+ + /m $ + \H+ + /m $ + \h+ + /m $ + \V+ + /m $ + \v+ + /m $ + a+ + /m $ + \x0a+ + /m $ + Any+ + /m $ + Any+ + /m $ + Ket + End +------------------------------------------------------------------ + +/(?=a+)a(a+)++a/B +------------------------------------------------------------------ + Bra + Assert + a++ + Ket + a + CBraPos 1 + a+ + KetRpos + a + Ket + End +------------------------------------------------------------------ + +/a+(bb|cc)a+(?:bb|cc)a+(?>bb|cc)a+(?:bb|cc)+a+(aa)a+(?:bb|aa)/B +------------------------------------------------------------------ + Bra + a++ + CBra 1 + bb + Alt + cc + Ket + a++ + Bra + bb + Alt + cc + Ket + a++ + Once + bb + Alt + cc + Ket + a++ + Bra + bb + Alt + cc + KetRmax + a+ + CBra 2 + aa + Ket + a+ + Bra + bb + Alt + aa + Ket + Ket + End +------------------------------------------------------------------ + +/a+(bb|cc)?#a+(?:bb|cc)??#a+(?:bb|cc)?+#a+(?:bb|cc)*#a+(bb|cc)?a#a+(?:aa)?/B +------------------------------------------------------------------ + Bra + a++ + Brazero + CBra 1 + bb + Alt + cc + Ket + # + a++ + Braminzero + Bra + bb + Alt + cc + Ket + # + a++ + Once + Brazero + Bra + bb + Alt + cc + Ket + Ket + # + a++ + Brazero + Bra + bb + Alt + cc + KetRmax + # + a+ + Brazero + CBra 2 + bb + Alt + cc + Ket + a# + a+ + Brazero + Bra + aa + Ket + Ket + End +------------------------------------------------------------------ + +/a+(?:bb)?a#a+(?:|||)#a+(?:|b)a#a+(?:|||)?a/B +------------------------------------------------------------------ + Bra + a+ + Brazero + Bra + bb + Ket + a# + a++ + Bra + Alt + Alt + Alt + Ket + # + a+ + Bra + Alt + b + Ket + a# + a+ + Brazero + Bra + Alt + Alt + Alt + Ket + a + Ket + End +------------------------------------------------------------------ + +/[ab]*/B +------------------------------------------------------------------ + Bra + [ab]*+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]*?/B +------------------------------------------------------------------ + Bra + [ab]*? + Ket + End +------------------------------------------------------------------ + aaaa + 0: + +/[ab]?/B +------------------------------------------------------------------ + Bra + [ab]?+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: a + +/[ab]??/B +------------------------------------------------------------------ + Bra + [ab]?? + Ket + End +------------------------------------------------------------------ + aaaa + 0: + +/[ab]+/B +------------------------------------------------------------------ + Bra + [ab]++ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]+?/B +------------------------------------------------------------------ + Bra + [ab]+? + Ket + End +------------------------------------------------------------------ + aaaa + 0: a + +/[ab]{2,3}/B +------------------------------------------------------------------ + Bra + [ab]{2,3}+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaa + +/[ab]{2,3}?/B +------------------------------------------------------------------ + Bra + [ab]{2,3}? + Ket + End +------------------------------------------------------------------ + aaaa + 0: aa + +/[ab]{2,}/B +------------------------------------------------------------------ + Bra + [ab]{2,}+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]{2,}?/B +------------------------------------------------------------------ + Bra + [ab]{2,}? + Ket + End +------------------------------------------------------------------ + aaaa + 0: aa + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B +------------------------------------------------------------------ + Bra + \d++ + \s{0,5}+ + = + \s*+ + \S? + = + \w{0,4}+ + \W*+ + Ket + End +------------------------------------------------------------------ + +/[a-d]{5,12}[e-z0-9]*#[^a-z]+[b-y]*a[2-7]?[^0-9a-z]+/B +------------------------------------------------------------------ + Bra + [a-d]{5,12}+ + [0-9e-z]*+ + # + [\x00-`{-\xff] (neg)++ + [b-y]*+ + a + [2-7]?+ + [\x00-/:-`{-\xff] (neg)++ + Ket + End +------------------------------------------------------------------ + +/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/B +------------------------------------------------------------------ + Bra + [a-z]*+ + \s + # + [\x09 ]?+ + \S + # + [a-c]* + \S + # + [C-G]++ + \d + # + [4-8]*+ + \D + # + [,4-9]* + \D + # + [!$]{0,5}+ + \w + # + [M-Xf-l]++ + \W + # + [,a-c]? + \W + Ket + End +------------------------------------------------------------------ + +/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/B +------------------------------------------------------------------ + Bra + a+ + Brazero + CBra 1 + aa + Alt + bb + KetRmax + c# + a* + Brazero + CBra 2 + bb + Alt + cc + KetRmax + a# + a?+ + Brazero + CBra 3 + bb + Alt + cc + KetRmax + d# + [a-f]* + Brazero + CBra 4 + g + Alt + hh + KetRmax + f + Ket + End +------------------------------------------------------------------ + +/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/B +------------------------------------------------------------------ + Bra + [a-f]*+ + Brazero + CBra 1 + g + Alt + hh + Alt + i + KetRmax + i# + [a-x]{4,} + Brazero + SCBra 2 + y{0,6} + KetRmax + y# + [a-k]++ + CBra 3 + ll + Alt + mm + KetRmax + n + Ket + End +------------------------------------------------------------------ + +/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/B +------------------------------------------------------------------ + Bra + [a-f]*+ + Once + gg + Alt + hh + KetRmax + # + [a-f]*+ + Brazero + Once + gg + Alt + hh + Ket + # + [a-f]* + Brazero + Once + gg + Alt + hh + KetRmax + a# + [a-f]*+ + Brazero + Once + gg + Alt + hh + KetRmax + h + Ket + End +------------------------------------------------------------------ + +/[a-c]*d/IB +------------------------------------------------------------------ + Bra + [a-c]*+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +/[a-c]+d/IB +------------------------------------------------------------------ + Bra + [a-c]++ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c +Last code unit = 'd' +Subject length lower bound = 2 + +/[a-c]?d/IB +------------------------------------------------------------------ + Bra + [a-c]?+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +/[a-c]{4,6}d/IB +------------------------------------------------------------------ + Bra + [a-c]{4,6}+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c +Last code unit = 'd' +Subject length lower bound = 5 + +/[a-c]{0,6}d/IB +------------------------------------------------------------------ + Bra + [a-c]{0,6}+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +# End of special auto-possessive tests + +/^A\o{1239}B/ +Failed: error 164 at offset 8: non-octal character in \o{} (closing brace missing?) + A\123B + +/^A\oB/ +Failed: error 155 at offset 4: missing opening brace after \o + +/^A\x{zz}B/ +Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) + +/^A\x{12Z/ +Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) + +/^A\x{/ +Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} + +/[ab]++/B,no_auto_possess +------------------------------------------------------------------ + Bra + [ab]++ + Ket + End +------------------------------------------------------------------ + +/[^ab]*+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [\x00-`c-\xff] (neg)*+ + Ket + End +------------------------------------------------------------------ + +/a{4}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + a{4} + Ket + End +------------------------------------------------------------------ + +/a{4}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i a{4} + Ket + End +------------------------------------------------------------------ + +/[a-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + +/[A-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + +/[a-[.xxx.]]+/ +Failed: error 150 at offset 4: invalid range in character class + +/[a-[=xxx=]]+/ +Failed: error 150 at offset 4: invalid range in character class + +/[a-[!xxx!]]+/ +Failed: error 108 at offset 3: range out of order in character class + +/[A-[!xxx!]]+/ + A]]] + 0: A]]] + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + +/(?<0abc>xx)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?&1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?xx)/ +Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) + +/(?'0abc'xx)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P<0abc>xx)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\k<5ghj>/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/\k'5ghj'/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/\k{2fgh}/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P=8yuki)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\g{4df}/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/(?&1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P>1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\g'3gh'/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/\g<5fg>/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/(?(<4gh>)abc)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/(?('4gh')abc)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/(?(4gh)abc)/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(R&6yh)abc)/ +Failed: error 144 at offset 5: subpattern name must start with a non-digit + +/(((a\2)|(a*)\g<-1>))*a?/B +------------------------------------------------------------------ + Bra + Brazero + SCBra 1 + CBra 2 + CBra 3 + a + \2 + Ket + Alt + CBra 4 + a* + Ket + Recurse + Ket + KetRmax + a?+ + Ket + End +------------------------------------------------------------------ + +# Test the ugly "start or end of word" compatibility syntax. + +/[[:<:]]red[[:>:]]/B +------------------------------------------------------------------ + Bra + \b + Assert + \w + Ket + red + \b + Assert back + Reverse + \w + Ket + Ket + End +------------------------------------------------------------------ + little red riding hood + 0: red + a /red/ thing + 0: red + red is a colour + 0: red + put it all on red + 0: red +\= Expect no match + no reduction +No match + Alfred Winifred +No match + +/[a[:<:]] should give error/ +Failed: error 130 at offset 4: unknown POSIX class name + +/(?=ab\K)/aftertext,allow_lookaround_bsk + abcd\=startchar +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0+ abcd + +/abcd/newline=lf,firstline +\= Expect no match + xx\nxabcd +No match + +# Test stack guard external calls. + +/(((a)))/stackguard=1 +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) + +/(((a)))/stackguard=2 +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) + +/(((a)))/stackguard=3 + +/(((((a)))))/ + +# End stack guard tests + +/^\w+(?>\s*)(?<=\w)/B +------------------------------------------------------------------ + Bra + ^ + \w+ + Once + \s*+ + Ket + Assert back + Reverse + \w + Ket + Ket + End +------------------------------------------------------------------ + +/\othing/ +Failed: error 155 at offset 2: missing opening brace after \o + +/\o{}/ +Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+} + +/\o{whatever}/ +Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) + +/\xthing/ + +/\x{}/ +Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+} + +/\x{whatever}/ +Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) + +/A\8B/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/A\9B/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +# This one is here because Perl fails to match "12" for this pattern when the $ +# is present. + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 0: abc: + 12 + 0: 12 +\= Expect no match + 123 +No match + xyz +No match + +# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# match for cd. + +/(?(?=ab)ab)/aftertext + abxxx + 0: ab + 0+ xxx + ca + 0: + 0+ ca + cd + 0: + 0+ cd + +# This should test both paths for processing OP_RECURSE. + +/(?(R)a+|(?R)b)/ + aaaabcde + 0: aaaab + aaaabcde\=ovector=100 + 0: aaaab + +/a*?b*?/ + ab + 0: + +/(*NOTEMPTY)a*?b*?/ + ab + 0: a + ba + 0: b + cb + 0: b + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + 0: a + 0+ b + cdab + 0: + 0+ dab + +/(?(VERSION>=10.0)yes|no)/I +Capture group count = 0 +Subject length lower bound = 2 + yesno + 0: yes + +/(?(VERSION>=10.04)yes|no)/ + yesno + 0: yes + +/(?(VERSION=8)yes){3}/BI,aftertext +------------------------------------------------------------------ + Bra + Cond + Cond false + yes + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + yesno + 0: + 0+ yesno + +/(?(VERSION=8)yes|no){3}/I +Capture group count = 0 +Subject length lower bound = 6 + yesnononoyes + 0: nonono +\= Expect no match + yesno +No match + +/(?:(?abc)|xyz)(?(VERSION)yes|no)/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + VERSION 1 +Starting code units: a x +Subject length lower bound = 5 + abcyes + 0: abcyes + 1: abc + xyzno + 0: xyzno +\= Expect no match + abcno +No match + xyzyes +No match + +/(?(VERSION<10)yes|no)/ +Failed: error 179 at offset 10: syntax error or number too big in (?(VERSION condition + +/(?(VERSION>10)yes|no)/ +Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition + +/(?(VERSION>=10.0.0)yes|no)/ +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition + +/(?(VERSION=10.101)yes|no)/ +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition + +/abcd/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + +/abcd/I,no_start_optimize +Capture group count = 0 +Options: no_start_optimize + +/(|ab)*?d/I +Capture group count = 1 +Starting code units: a d +Last code unit = 'd' +Subject length lower bound = 1 + abd + 0: abd + 1: ab + xyd + 0: d + +/(|ab)*?d/I,no_start_optimize +Capture group count = 1 +Options: no_start_optimize + abd + 0: abd + 1: ab + xyd + 0: d + +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + 0: aabb + 1: aa + 2: bb + +/(((((a)))))/parens_nest_limit=2 +Failed: error 119 at offset 3: parentheses are too deeply nested + +/abc/replace=XYZ + 123123 + 0: 123123 + 123abc123 + 1: 123XYZ123 + 123abc123abc123 + 1: 123XYZ123abc123 + 123123\=zero_terminate + 0: 123123 + 123abc123\=zero_terminate + 1: 123XYZ123 + 123abc123abc123\=zero_terminate + 1: 123XYZ123abc123 + +/abc/g,replace=XYZ + 123abc123 + 1: 123XYZ123 + 123abc123abc123 + 2: 123XYZ123XYZ123 + +/abc/replace=X$$Z + 123abc123 + 1: 123X$Z123 + +/abc/g,replace=X$$Z + 123abc123abc123 + 2: 123X$Z123X$Z123 + +/a(b)c(d)e/replace=X$1Y${2}Z + "abcde" + 1: "XbYdZ" + +/a(b)c(d)e/replace=X$1Y${2}Z,global + "abcde-abcde" + 2: "XbYdZ-XbYdZ" + +/a(?b)c(?d)e/replace=X$ONE+${TWO}Z + "abcde" + 1: "Xb+dZ" + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z + "abcde-abcde-" + 2: "Xb+dZ-Xb+dZ-" + +/abc/replace=a$++ + 123abc +Failed: error -35 at offset 2 in replacement: invalid replacement string + +/abc/replace=a$bad + 123abc +Failed: error -49 at offset 5 in replacement: unknown substring + +/abc/replace=a${A234567890123456789_123456789012}z + 123abc +Failed: error -49 at offset 36 in replacement: unknown substring + +/abc/replace=a${A23456789012345678901234567890123}z + 123abc +Failed: error -35 at offset 35 in replacement: invalid replacement string + +/abc/replace=a${bcd + 123abc +Failed: error -58 at offset 6 in replacement: expected closing curly bracket in replacement string + +/abc/replace=a${b+d}z + 123abc +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + +/abc/replace=[10]XYZ + 123abc123 + 1: 123XYZ123 + +/abc/replace=[9]XYZ + 123abc123 +Failed: error -48: no more memory + +/abc/replace=xyz + 1abc2\=partial_hard +Failed: error -34: bad option value + +/abc/replace=xyz + 123abc456 + 1: 123xyz456 + 123abc456\=replace=pqr + 1: 123pqr456 + 123abc456abc789 + 1: 123xyz456abc789 + 123abc456abc789\=g + 2: 123xyz456xyz789 + +/(?<=abc)(|def)/g,replace=<$0> + 123abcxyzabcdef789abcpqr + 4: 123abc<>xyzabc<>789abc<>pqr + +/./replace=$0 + a + 1: a + +/(.)(.)/replace=$2+$1 + abc + 1: b+ac + +/(?.)(?.)/replace=$B+$A + abc + 1: b+ac + +/(.)(.)/g,replace=$2$1 + abcdefgh + 4: badcfehg + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + 3: pear orange strawberry + apple strudel + 1: pear strudel + fruitless + 0: fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + 1: pear sauce lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + 3: + apple strudel + 1: strudel + fruitless + 0: fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry +Failed: error -35 at offset 11 in replacement: invalid replacement string + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry +Failed: error -35 at offset 8 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry +Failed: error -35 at offset 9 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry +Failed: error -48: no more memory + apple lemon blackberry\=substitute_overflow_length +Failed: error -48: no more memory: 23 code units are needed + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + 3: pear orange strawberry + +/abc/ + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + +/a(b)c/ + 123abc123\=replace=[9]x$1z +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]x$1z +Failed: error -48: no more memory: 10 code units are needed + +"((?=(?(?=(?(?=(?(?=()))))))))" + a + 0: + 1: + 2: + +"(?(?=)==)(((((((((?=)))))))))" +\= Expect no match + a +No match + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall +Matched, but too many substrings + 0: c + 1: +Get substring 1 failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available +Get substring 3 failed (-54): requested value is not available +Get substring 4 failed (-49): unknown substring + 0L c + 1L + +/x(?=ab\K)/allow_lookaround_bsk + xab\=get=0 +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0G (0) + xab\=copy=0 +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0C (0) + xab\=getall +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0L + +/(?a)|(?b)/dupnames + a\=ovector=1,copy=A,get=A,get=2 +Matched, but too many substrings + 0: a +Copy substring 'A' failed (-54): requested value is not available +Get substring 2 failed (-54): requested value is not available +Get substring 'A' failed (-54): requested value is not available + a\=ovector=2,copy=A,get=A,get=2 + 0: a + 1: a + C a (1) A (non-unique) +Get substring 2 failed (-54): requested value is not available + G a (1) A (non-unique) + b\=ovector=2,copy=A,get=A,get=2 +Matched, but too many substrings + 0: b + 1: +Copy substring 'A' failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available +Get substring 'A' failed (-55): requested value is not set + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall +Partial match: abc + 0C abc (3) +Copy substring 1 failed (-2): partial match +get substring list failed (-2): partial match + +/^abc/info +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/^abc/info,no_dotstar_anchor +Capture group count = 0 +Compile options: no_dotstar_anchor +Overall options: anchored no_dotstar_anchor +First code unit = 'a' +Subject length lower bound = 3 + +/.*\d/info,auto_callout +Capture group count = 0 +Options: auto_callout +First code unit at start or follows newline +Subject length lower bound = 1 +\= Expect no match + aaa +--->aaa + +0 ^ .* + +2 ^ ^ \d + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d +No match + +/.*\d/info,no_dotstar_anchor,auto_callout +Capture group count = 0 +Options: auto_callout no_dotstar_anchor +Subject length lower bound = 1 +\= Expect no match + aaa +--->aaa + +0 ^ .* + +2 ^ ^ \d + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^^ \d + +2 ^ \d +No match + +/.*\d/dotall,info +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/.*\d/dotall,no_dotstar_anchor,info +Capture group count = 0 +Options: dotall no_dotstar_anchor +Subject length lower bound = 1 + +/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info +Capture group count = 0 +Compile options: +Overall options: no_dotstar_anchor +Subject length lower bound = 1 + +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 +Matched, but too many substrings + 0: aA + 1: + aA123\=ovector=3 + 0: aA + 1: + 2: a + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 +Matched, but too many substrings + 0: aa + aa123\=ovector=2 +Matched, but too many substrings + 0: aa + 1: + aa123\=ovector=3 + 0: aa + 1: + 2: a + +/(?(?J)(?1(111111)11|)1|1|)(?()1)/ + +/(?(?J)(?))(?-J)\k/ + +# Quantifiers are not allowed on condition assertions, but are otherwise +# OK in conditions. + +/(?(?=0)?)+/ +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/(?(?=0)(?=00)?00765)/ + 00765 + 0: 00765 + +/(?(?=0)(?=00)?00765|(?!3).56)/ + 00765 + 0: 00765 + 456 + 0: 456 +\= Expect no match + 356 +No match + +'^(a)*+(\w)' + g + 0: g + 1: + 2: g + g\=ovector=1 +Matched, but too many substrings + 0: g + +'^(?:a)*+(\w)' + g + 0: g + 1: g + g\=ovector=1 +Matched, but too many substrings + 0: g + +# These two pattern showeds up compile-time bugs + +"((?2){0,1999}())?" + +/((?+1)(\1))/B +------------------------------------------------------------------ + Bra + CBra 1 + Recurse + CBra 2 + \1 + Ket + Ket + Ket + End +------------------------------------------------------------------ + +# Callouts with string arguments + +/a(?C"/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a"/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a"bcde(?C"b")xyz/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a)b""c")/B +------------------------------------------------------------------ + Bra + a + CalloutStr "a)b"c" 5 13 0 + Ket + End +------------------------------------------------------------------ + +/ab(?C" any text with spaces ")cde/B +------------------------------------------------------------------ + Bra + ab + CalloutStr " any text with spaces " 6 30 1 + cde + Ket + End +------------------------------------------------------------------ + abcde +Callout (6): " any text with spaces " +--->abcde + ^ ^ c + 0: abcde + 12abcde +Callout (6): " any text with spaces " +--->12abcde + ^ ^ c + 0: abcde + +/^a(b)c(?C1)def/ + abcdef +--->abcdef + 1 ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C"AB")def/ + abcdef +Callout (10): "AB" +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C1)def/ + abcdef\=callout_capture +Callout 1: last capture = 1 + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C{AB})def/B +------------------------------------------------------------------ + Bra + ^ + a + CBra 1 + b + Ket + c + CalloutStr {AB} 10 14 1 + def + Ket + End +------------------------------------------------------------------ + abcdef\=callout_capture +Callout (10): {AB} last capture = 1 + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info +------------------------------------------------------------------ + Bra + CalloutStr `a`b` 4 10 0 + CalloutStr 'a'b' 14 20 0 + CalloutStr "a"b" 24 30 0 + CalloutStr ^a^b^ 34 40 0 + CalloutStr %a%b% 44 50 0 + CalloutStr #a#b# 54 60 0 + CalloutStr $a$b$ 64 70 0 + CalloutStr {a}b} 74 80 0 + Ket + End +------------------------------------------------------------------ +Callout `a`b` ( +Callout 'a'b' ( +Callout "a"b" ( +Callout ^a^b^ ( +Callout %a%b% ( +Callout #a#b# ( +Callout $a$b$ ( +Callout {a}b} + +/(?:a(?C`code`)){3}/B +------------------------------------------------------------------ + Bra + Bra + a + CalloutStr `code` 8 14 4 + Ket + Bra + a + CalloutStr `code` 8 14 4 + Ket + Bra + a + CalloutStr `code` 8 14 4 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info +------------------------------------------------------------------ + Bra + ^ + Cond + Callout 25 9 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ +Callout 25 (?= + abcdefg +--->abcdefg + 25 ^ (?= + 0: abcd + xyz123 +--->xyz123 + 25 ^ (?= + 0: xyz + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + CalloutStr $abc$ 7 12 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +Callout (7): $abc$ +--->abcdefg + ^ (?= + 0: abcd + xyz123 +Callout (7): $abc$ +--->xyz123 + ^ (?= + 0: xyz + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg +Callout (7): 'first' +--->abcdefg + ^ ^ c +Callout (20): "second" +--->abcdefg + ^ ^ e + 0: abcdef + +/(?:a(?C`code`)){3}X/ + aaaXY +Callout (8): `code` +--->aaaXY + ^^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} + 0: aaaX + +# Binary zero in callout string +# a ( ? C ' x z ' ) b +/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info +Callout 'x\x00z' b + abcdefgh +Callout (5): 'x\x00z' +--->abcdefgh + ^^ b + 0: ab + +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb + 0: b +\= Expect no match + aaa +No match + +# JIT gives a different error message for the infinite recursion + +"(*NO_JIT)((?2)+)((?1)){" + abcd{ +Failed: error -52: nested recursion at the same subject position + +# Perl fails to diagnose the absence of an assertion + +"(?(?.*!.*)?)" +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +"X((?2)()*+){2}+"B +------------------------------------------------------------------ + Bra + X + Once + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + Ket + End +------------------------------------------------------------------ + +"X((?2)()*+){2}"B +------------------------------------------------------------------ + Bra + X + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + End +------------------------------------------------------------------ + +/(?<=\bABQ(3(?-7)))/ +Failed: error 115 at offset 15: reference to non-existent subpattern + +/(?<=\bABQ(3(?+7)))/ +Failed: error 115 at offset 15: reference to non-existent subpattern + +";(?<=()((?3))((?2)))" +Failed: error 125 at offset 1: length of lookbehind assertion is not limited + +# Perl loops on this (PCRE2 used to!) + +/(?<=\Ka)/g,aftertext,allow_lookaround_bsk + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + +/(?<=\Ka)/altglobal,aftertext,allow_lookaround_bsk + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + +/((?2){73}(?2))((?1))/info +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/abc/ +\= Expect no match + \[9x!xxx(]{9999} +No match + +/(abc)*/ + \[abc]{5} + 0: abcabcabcabcabc + 1: abc + +/^/gm + \n\n\n + 0: + 0: + 0: + +/^/gm,alt_circumflex + \n\n\n + 0: + 0: + 0: + 0: + +/((((((((x))))))))\81/ +Failed: error 115 at offset 19: reference to non-existent subpattern + xx1 + +/((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ + xx +Matched, but too many substrings + 0: xx + 1: x + 2: x + 3: x + 4: x + 5: x + 6: x + 7: x + 8: x + 9: x +10: x +11: x +12: x +13: x +14: x + +/\80/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/A\8B\9C/ +Failed: error 115 at offset 2: reference to non-existent subpattern + A8B9C + +/(?x:((?'a')) # comment (with parentheses) and | vertical +(?-x:#not a comment (?'b')) # this is a comment () +(?'c')) # not a comment (?'d')/info +Capture group count = 5 +Named capture groups: + a 2 + b 3 + c 4 + d 5 +First code unit = '#' +Last code unit = ' ' +Subject length lower bound = 32 + +/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames +Capture group count = 3 +Named capture groups: + a 1 + a 2 + b 3 +Options: dupnames +Starting code units: 2 3 +Subject length lower bound = 1 + A23B + 0: 2 + 1: + 2: 2 + 3: + B32A + 0: 3 + 1: + 2: + 3: 3 + +# These are some patterns that used to cause buffer overflows or other errors +# while compiling. + +/.((?2)(?R)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + CBra 1 + Recurse + Recurse + Alt + \1 + Alt + $ + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + CBra 1 + Recurse + Recurse + CBra 2 + Ket + Recurse + Alt + \1 + Alt + $ + Ket + CBra 3 + Ket + Ket + End +------------------------------------------------------------------ + +/(\9*+(?2);\3++()2|)++{/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/\V\x85\9*+((?2)\3++()2)*:2/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames + +/(((?(X)){0,2}) (?'x'((?'X')((?'X')))))/dupnames + +/(((?(R)){0,2}) (?'x'((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" +Failed: error 125 at offset 16: length of lookbehind assertion is not limited + +/A(?'')Z/ +Failed: error 162 at offset 4: subpattern name expected + +"(?J:(?|(?'R')(\k'R')|((?'R'))))" + +/(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ +Failed: error 161 at offset 17: subpattern number is too big + +/^(?:(?(1)x|)+)+$()/B +------------------------------------------------------------------ + Bra + ^ + SBra + SCond + 1 Cond ref + x + Alt + KetRmax + KetRmax + $ + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/[[:>:]](?<)/ +Failed: error 162 at offset 10: subpattern name expected + +/((?x)(*:0))#(?'/ +Failed: error 162 at offset 15: subpattern name expected + +/(?C$[$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?C$)$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?(R))*+/B +------------------------------------------------------------------ + Bra + Braposzero + SBraPos + SCond + Cond recurse any + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + abcd + 0: + +/((?x)(?#))#(?'/ +Failed: error 162 at offset 14: subpattern name expected + +/((?x)(?#))#(?'abc')/I +Capture group count = 2 +Named capture groups: + abc 2 +First code unit = '#' +Subject length lower bound = 1 + +/[[:\\](?<[::]/ +Failed: error 162 at offset 9: subpattern name expected + +/[[:\\](?'abc')[a:]/I +Capture group count = 1 +Named capture groups: + abc 1 +Starting code units: : [ \ +Subject length lower bound = 2 + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" +Failed: error 106 at offset 353: missing terminating ] for character class + +/()(?(R)0)*+/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Braposzero + SBraPos + SCond + Cond recurse any + 0 + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + +/(?R-:(?>abcd<< + 1: >>w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)<< + +/abcd/g,replace=\$1$2\,substitute_literal + XabcdYabcdZ + 2: X\$1$2\Y\$1$2\Z + +/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended + abcDE + 1: aBcBCbcdEdeabAByzDone + +/abcd/replace=xy\kz,substitute_extended + abcd +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2} + ab + 1: X1X-2 + ac + 1: X-1X2 + ab\=replace=${1:+$1\:$1:$2} + 1: b:b + ac\=replace=${1:+$1\:$1:$2} + 1: c + >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal + 1: >>${1:+$1\:$1:$2}<< + +/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2} + ab + 1: XbX2:-2 + ac + 1: X1:-1Xc + +/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}< + a + 1: >$1:{}$$+A< + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + 1: xbBY + XbY\=replace=\Ux${1:+$1$1}y + 1: XBBY + +/a/substitute_extended,replace=${*MARK:+a:b} + a +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/abcd/substitute_extended,replace=>$1< + abcd +Failed: error -49 at offset 3 in replacement: unknown substring + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd +Failed: error -49 at offset 10 in replacement: unknown substring + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + 1: [] + [b] + 1: [] +\= Expect error + (a)\=ovector=1 +Failed: error -54 at offset 3 in replacement: requested value is not available + +/(a)|(b)/replace=<$1> +\= Expect error + b +Failed: error -55 at offset 3 in replacement: requested value is not set + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + 1: AAbbaa..AAbBaa + +/abcd/replace=wxyz,substitute_matched + abcd + 1: wxyz + pqrs + 0: pqrs + +/abcd/g + >abcd1234abcd5678<\=replace=wxyz,substitute_matched + 2: >wxyz1234wxyz5678< + +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I +Capture group count = 2 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'o' +Last code unit = '}' +Subject length lower bound = 65535 + +/((p(?'K/ +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?) + +/((p(?'K/no_auto_capture +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?) + +/abc/replace=A$3123456789Z + abc +Failed: error -49 at offset 3 in replacement: unknown substring + +/(?a[bc]d + +0 ^ ( + +1 ^ )\Q\E* + +7 ^ ] + +8 ^^ End of pattern + 0: ] + 1: + +/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended +------------------------------------------------------------------ + Bra + \x{8a}++ + f + Alt + ; + T? + *MARK ;.'?`(\x{ea}p + {! + [\x00- "-&+-:<->@-BD-xz-\xff] (neg) + {1; + CBra 1 + \x08 + Ket + Ket + End +------------------------------------------------------------------ + +# Tests for NULL characters in comments and verb "names" and callouts + +# /A#B\x00C\x0aZ/ +/41 23 42 00 43 0a 5a/Bx,hex +------------------------------------------------------------------ + Bra + AZ + Ket + End +------------------------------------------------------------------ + +# /A+#B\x00C\x0a+/ +/41 2b 23 42 00 43 0a 2b/Bx,hex +------------------------------------------------------------------ + Bra + A++ + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}WC + Z + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}W#X\x{0}Y\x{a}C + Z + Ket + End +------------------------------------------------------------------ + +# /A(?C{X\x00Y})B/ +/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + A + CalloutStr {X\x{0}Y} 5 10 1 + B + Ket + End +------------------------------------------------------------------ + +# /A(?#X\x00Y)B/ +/41 28 3f 23 7b 00 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + AB + Ket + End +------------------------------------------------------------------ + +# Tests for leading comment in extended patterns + +/ (?-x):?/extended + +/ (?-x):?/extended + +/0b 28 3f 2d 78 29 3a/hex,extended + +/#comment +(?-x):?/extended + +/(8(*:6^\x09x\xa6l\)6!|\xd0:[^:|)\x09d\Z\d{85*m(?'(?<1!)*\W[*\xff]!!h\w]*\xbe;/alt_bsux,alt_verbnames,allow_empty_class,dollar_endonly,extended,multiline,never_utf,no_dotstar_anchor,no_start_optimize +Failed: error 162 at offset 49: subpattern name expected + +/a|(b)c/replace=>$1<,substitute_unset_empty + cat + 1: c>b$1< +Failed: error -55 at offset 3 in replacement: requested value is not set + cat\=replace=>$1<,substitute_unset_empty + 1: c>$1<,substitute_unset_empty + 1: x>b${2:-xx}< +Failed: error -49 at offset 9 in replacement: unknown substring + cat\=replace=>${2:-xx}<,substitute_unknown_unset + 1: c>xx${X:-xx}<,substitute_unknown_unset + 1: c>xx$X<,substitute_unset_empty + cat + 1: c>b$Y<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>$2<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 +\= Expect no match + abc +--->abc + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + +/aaa/ +\[abc]{10000000000000000000000000000} +** Repeat count too large +\[a]{3} + 0: aaa + +/\[AB]{6000000000000000000000}/expand +** Pattern repeat count too large + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex +Failed: error 160 at offset 3: (*VERB) not recognized or malformed + +/'(*'/hex +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/'('/hex +Failed: error 114 at offset 1: missing closing parenthesis + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?|([ab]))...(?<=\1)z/ +Failed: error 125 at offset 13: length of lookbehind assertion is not limited + +/([ab])(\1)...(?<=\2)z/ + aa11az + 0: aa11az + 1: a + 2: a + +/(a\2)(b\1)(?<=\2)/ +Failed: error 125 at offset 10: length of lookbehind assertion is not limited + +/(?[ab])...(?<=\k'A')z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?[ab])...(?<=\k'A')(?)z/dupnames +Failed: error 125 at offset 13: length of lookbehind assertion is not limited + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + 0: aaXbbXa + 1: bXa + 2: bX + 3: a + +/ab(?C1)c/auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +/'ab(?C1)c'/hex,auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + a-a9-a + +/[A-[:digit:]]+/ +Failed: error 150 at offset 4: invalid range in character class + A-A9-A + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + a-a9-a + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + Cond recurse any + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + 1 Cond ref + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?=.*[A-Z])/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/()(?<=(?0))/ +Failed: error 125 at offset 2: length of lookbehind assertion is not limited + +/(?*?\g'0/use_length +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/.>*?\g'0/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/{„Í„Í̈́Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́Í̈́Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +** Group name in 'get' is too long +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 +** Too many characters in named 'get' modifiers + +"(?(?C))" +Failed: error 128 at offset 6: assertion expected after (?( or (?(?C) + +/(?(?(?(?(?(?))))))/ +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph +No match + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc +Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string + +//replace=0 + \=offset=7 +Failed: error -33: bad offset value + +/(?<=\G.)/g,replace=+ + abc + 3: a+b+c+ + +".+\QX\E+"B,no_auto_possess +------------------------------------------------------------------ + Bra + Any+ + X+ + Ket + End +------------------------------------------------------------------ + +".+\QX\E+"B,auto_callout,no_auto_possess +------------------------------------------------------------------ + Bra + Callout 255 0 4 + Any+ + Callout 255 4 4 + X+ + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I +Capture group count = 108 +Max back reference = 22 +Contains explicit CR or LF match +Subject length lower bound = 1 + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ +Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ +Failed: error 150 at offset 11: invalid range in character class + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 +--->abcd + +0 ^ a + +1 ^^ b +Failed: error -37: callout error code + +/()(\g+65534)/ +Failed: error 161 at offset 11: subpattern number is too big + +/()(\g+65533)/ +Failed: error 115 at offset 10: reference to non-existent subpattern + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I +Capture group count = 2 +Max back reference = 2 +First code unit = \xc1 +Last code unit = '2' +Subject length lower bound = 65535 + +/(a)(b)\2\1\1\1\1/I +Capture group count = 2 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 7 + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames +Capture group count = 4 +Max back reference = 4 +Named capture groups: + a 1 + a 3 + b 2 + b 4 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 11 + +// + \=ovector=7777777777 +** Invalid value in 'ovector=7777777777' + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD +No match + +"(?1){2}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +"(?1){2,4}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Brazero + Bra + Bra + Recurse + Ket + Brazero + Bra + Recurse + Ket + Ket + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + 0: AZB + 1: + 2: Z + AZBDZ + 0: AZBDZ + 1: B + 2: Z + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + 0: ac + +'(?:a(*:aa))b|ac' mark + ac + 0: ac + +/(R?){65}/ + (R?){65} + 0: + 1: + +/\[(a)]{60}/expand + aaaa +No match + +/(?abcdabcd + ^^ ( +Callout 1: last capture = 1 + 1: abcd + 2: b + 3: c +--->abcdabcd + ^ ^ ( + 0: abcdabcd + 1: abcd + 2: b + 3: c + +# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any +# pending backtracks in the recursion. + +/^ (?(DEFINE) (..(*ACCEPT)|...) ) (?1)$/x +\= Expect no match + abc +No match + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc + 0: a +MK: m + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc(*ACCEPT)d/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +/a(*ACCEPT)x|aa/endanchored + aaa + 0: a + +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 2 + +/(a){0}.*bc/sI +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'c' +Subject length lower bound = 2 + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 2 + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I +Capture group count = 0 +Last code unit = 'c' +Subject length lower bound = 4 + +/(*LIMIT_HEAP=0)xxx/I +Capture group count = 0 +Heap limit = 0 +First code unit = 'x' +Last code unit = 'x' +Subject length lower bound = 3 + +/\d{0,3}(*:abc)(?C1)xxx/callout_info +Callout 1 x + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa +No match + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb +No match + cc +No match + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X9 +No match + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XX +No match + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps +Partial match: XY\x0d + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t +No match + XYY +No match + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 +No match + XYY +No match + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 +No match + X9Y +No match + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A +No match + X++ +No match + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. +No match + Xaa +No match + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd +No match + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX + 0: AB +\= Expect no match + XAB +No match + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit +No match + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/BxxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[ a b c ]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +/(a)(?-n:(b))(c)/nB +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + CBra 1 + b + Ket + Ket + Bra + c + Ket + Ket + End +------------------------------------------------------------------ + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal +** Unrecognized modifier '\' in '\bad_escape_is_literal' + +/\N{\c/IB,bad_escape_is_literal +------------------------------------------------------------------ + Bra + N{c + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Extra options: bad_escape_is_literal +First code unit = 'N' +Last code unit = 'c' +Subject length lower bound = 3 + +/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal +------------------------------------------------------------------ + Bra + [Abgjoxz{}] + Ket + End +------------------------------------------------------------------ + +/[Q-\N]/B,bad_escape_is_literal +Failed: error 150 at offset 5: invalid range in character class + +/[\s-_]/bad_escape_is_literal +Failed: error 150 at offset 3: invalid range in character class + +/[_-\s]/bad_escape_is_literal +Failed: error 150 at offset 5: invalid range in character class + +/[\B\R\X]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\B\R\X]/B,bad_escape_is_literal +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[A-\BP-\RV-\X]/B +Failed: error 107 at offset 4: escape sequence is invalid in character class + +/[A-\BP-\RV-\X]/B,bad_escape_is_literal +Failed: error 107 at offset 4: escape sequence is invalid in character class + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + 0: a\b(c + +/a\b(c/literal,caseless + a\\b(c + 0: a\b(c + a\\B(c + 0: a\B(c + +/a\b(c/literal,firstline + XYYa\\b(c + 0: a\b(c +\= Expect no match + X\na\\b(c +No match + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 + 0: a\b?c +\= Expect no match + XXXXa\\b?c\=offset_limit=3 +No match + +/a\b(c/literal,anchored,endanchored + a\\b(c + 0: a\b(c +\= Expect no match + Xa\\b(c +No match + a\\b(cX +No match + Xa\\b(cX +No match + +//literal,extended +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/(*CR)abc/literal + (*CR)abc + 0: (*CR)abc + +/cat|dog/I,match_word +Capture group count = 0 +Max lookbehind = 1 +Extra options: match_word +Starting code units: c d +Subject length lower bound = 3 + the cat sat + 0: cat +\= Expect no match + caterpillar +No match + snowcat +No match + syndicate +No match + +/(cat)|dog/I,match_line,literal +Capture group count = 0 +Compile options: literal +Overall options: anchored literal +Extra options: match_line +First code unit = '(' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + caterpillar +No match + snowcat +No match + syndicate +No match + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard + 0: a whole line +\= Expect no match + Not a whole line +No match + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + 0: bbc + 1: b + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + 0: babc + 1: ba + bbabc + 0: bbabc + 1: ba + bababc + 0: bababc + 1: ba +\= Expect no match + bababbc +No match + babababc +No match + +/[[:digit:]-a]/ +Failed: error 150 at offset 10: invalid range in character class + +/[[:digit:]-[:print:]]/ +Failed: error 150 at offset 10: invalid range in character class + +/[\d-a]/ +Failed: error 150 at offset 3: invalid range in character class + +/[\H-z]/ +Failed: error 150 at offset 3: invalid range in character class + +/[\d-[:print:]]/ +Failed: error 150 at offset 3: invalid range in character class + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I +Capture group count = 1 +Max back reference = 1 +Max lookbehind = 1 +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: b + 1: a + aaab + 0: ab + 1: a + +"(?=(a))\1?b"I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + ab + 0: ab + 1: a + aaab + 0: ab + 1: a + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^ ^ ) ++13 ^ ^ b +Backtrack +--->aac ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +No match + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +Callout (15): 'XXX' +--->aac + ^ ^ b +Backtrack +Callout (15): 'XXX' +--->aac + ^^ b +Backtrack +No other matching paths +New match attempt +Callout (15): 'XXX' +--->aac + ^^ b +No match + +/\n/firstline + xyz\nabc + 0: \x0a + +/\nabc/firstline + xyz\nabc + 0: \x0aabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc +No match + +/[abc]/firstline +\= Expect no match + \na +No match + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?a?))^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + X + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/^(a?)b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + a? + Ket + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)++b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBraPos 1 + a? + KetRpos + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Ket + End +------------------------------------------------------------------ + +/(?=a+)a(a+)++b/B +------------------------------------------------------------------ + Bra + Assert + a++ + Ket + a + CBraPos 1 + a++ + KetRpos + b + Ket + End +------------------------------------------------------------------ + +/(?<=(?=.){4,5}x)/B +------------------------------------------------------------------ + Bra + Assert back + Reverse + Assert + Any + Ket + Assert + Any + Ket + Assert + Any + Ket + Assert + Any + Ket + Brazero + Assert + Any + Ket + x + Ket + Ket + End +------------------------------------------------------------------ + +# Perl behaves differently with these when optimization is turned off + +/a(*PRUNE:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy +No match, mark = X + +/a(*THEN:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy +No match, mark = X + +/(?^x-i)AB/ +Failed: error 194 at offset 4: invalid hyphen in option setting + +/(?^-i)AB/ +Failed: error 194 at offset 3: invalid hyphen in option setting + +/(?x-i-i)/ +Failed: error 194 at offset 5: invalid hyphen in option setting + +/(?(?=^))b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + +/(?(?=^)|)b/I +Capture group count = 0 +First code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + +/(?(?=^)|^)b/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b +\= Expect no match + abc +No match + +/(?(1)^|^())/I +Capture group count = 1 +Max back reference = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?(1)^())b/I +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + +/(?(1)^())+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^()|^)+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc +\= Expect no match + abc +No match + +/(?(1)^()|^)*b/I,aftertext +Capture group count = 1 +Max back reference = 1 +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc + abc + 0: b + 0+ c + xbc + 0: b + 0+ c + +/(?(1)^())+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^a()|^a)+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + abc + 0: ab + 0+ c +\= Expect no match + bbc +No match + +/(?(1)^|^(a))+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + 1: a +\= Expect no match + bbc +No match + +/(?(1)^a()|^a)*b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + bbc + 0: b + 0+ bc + xbc + 0: b + 0+ c + +/a(b)c|xyz/g,allvector,replace=<$0> + abcdefabcpqr\=ovector=4 + 2: defpqr + 0: 6 9 + 1: 7 8 + 2: + 3: + abxyz\=ovector=4 + 1: ab + 0: 2 5 + 1: + 2: + 3: + abcdefxyz\=ovector=4 + 2: def + 0: 6 9 + 1: + 2: + 3: + +/a(b)c|xyz/allvector + abcdef\=ovector=4 + 0: abc + 1: b + 2: + 3: + abxyz\=ovector=4 + 0: xyz + 1: + 2: + 3: + +/a(b)c|xyz/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(2) Old 0 3 "abc" New 0 5 "" + 2(2) Old 6 9 "abc" New 8 13 "" + 2: defpqr + abxyzpqrabcxyz + 1(1) Old 2 5 "xyz" New 2 7 "" + 2(2) Old 8 11 "abc" New 10 15 "" + 3(1) Old 11 14 "xyz" New 15 20 "" + 3: abpqr + 12abc34xyz99abc55\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " STOPPED" + 2: 1234xyz99abc55 + 12abc34xyz99abc55\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 2(1) Old 7 10 "xyz" New 7 12 "" + 3(2) Old 12 15 "abc" New 14 19 "" + 3: 12abc349955 + 12abc34xyz99abc55\=substitute_skip=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " SKIPPED" + 3(2) Old 12 15 "abc" New 14 19 "" + 3: 1234xyz9955 + +/a(b)c|xyz/g,replace=<$0> + abcdefabcpqr + 2: defpqr + abxyzpqrabcxyz + 3: abpqr + 12abc34xyz\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " STOPPED" + 2: 1234xyz + 12abc34xyz\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 2(1) Old 7 10 "xyz" New 7 12 "" + 2: 12abc34 + +/a(b)c|xyz/replace=<$0> + abcdefabcpqr + 1: defabcpqr + 12abc34xyz\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 1: 12abc34xyz + 12abc34xyz\=substitute_stop=1 + 1(2) Old 2 5 "abc" New 2 7 " STOPPED" + 1: 12abc34xyz + +/abc\rdef/ + abc\ndef +No match + +/abc\rdef\x{0d}xyz/escaped_cr_is_lf + abc\ndef\rxyz + 0: abc\x0adef\x0dxyz +\= Expect no match + abc\ndef\nxyz +No match + +/(?(*ACCEPT)xxx)/ +Failed: error 128 at offset 2: assertion expected after (?( or (?(?C) + +/(?(*atomic:xx)xxx)/ +Failed: error 128 at offset 10: assertion expected after (?( or (?(?C) + +/(?(*script_run:xxx)zzz)/ +Failed: error 128 at offset 14: assertion expected after (?( or (?(?C) + +/foobar/ + the foobar thing\=copy_matched_subject + 0: foobar + the foobar thing\=copy_matched_subject,zero_terminate + 0: foobar + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + 0: foobar + 0: foobar + +/(*:XX)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(*COMMIT:XX)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(*ACCEPT:XX)^abc/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/abc/replace=xyz + abc\=null_context + 1: xyz + +/abc/replace=xyz,substitute_callout + abc + 1(1) Old 0 3 "abc" New 0 3 "xyz" + 1: xyz +\= Expect error message + abc\=null_context +** Replacement callouts are not supported with null_context. + +/\[()]{65535}()/expand +Failed: error 197 at offset 131071: too many capturing groups (maximum 65535) + +/\[()]{65535}(?)/expand +Failed: error 197 at offset 131075: too many capturing groups (maximum 65535) + +/a(?:(*ACCEPT))??bc/ + abc + 0: abc + axy + 0: a + +/a(*ACCEPT)??bc/ + abc + 0: abc + axy + 0: a + +/a(*ACCEPT:XX)??bc/mark + abc + 0: abc + axy + 0: a +MK: XX + +/(*:\)?/ +Failed: error 109 at offset 5: quantifier does not follow a repeatable item + +/(*:\Q \E){5}/alt_verbnames +Failed: error 109 at offset 11: quantifier does not follow a repeatable item + +/(?=abc)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 2 + +/(?|(X)|(XY))\1abc/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'X' +Last code unit = 'c' +Subject length lower bound = 4 + +/(?|(a)|(bcde))(c)\2/I +Capture group count = 2 +Max back reference = 2 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(?|(a)|(bcde))(c)\1/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames +Capture group count = 3 +Max back reference = 2 +Named capture groups: + A 1 + A 3 + B 2 +Options: dupnames +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames +Capture group count = 3 +Max back reference = 3 +Named capture groups: + A 1 + A 3 + B 2 +Options: dupnames +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/((a|)+)+Z/I +Capture group count = 2 +Starting code units: Z a +Last code unit = 'Z' +Subject length lower bound = 1 + +/((?=a))[abcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/A(?:(*ACCEPT))?B/info +Capture group count = 0 +First code unit = 'A' +Subject length lower bound = 1 + +/(A(*ACCEPT)??B)C/ + ABC + 0: ABC + 1: AB + AXY + 0: A + 1: A + +/(?<=(?<=a)b)c.*/I +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: c +\= Expect no match + xbc\=ph +No match + +/(?<=ab)c.*/I +Capture group count = 0 +Max lookbehind = 2 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: c +\= Expect no match + xbc\=ph +No match + +/(?<=a(?<=a|a)c)/I +Capture group count = 0 +Max lookbehind = 2 +May match empty string +Subject length lower bound = 0 + +/(?<=a(?<=a|ba)c)/I +Capture group count = 0 +Max lookbehind = 2 +May match empty string +Subject length lower bound = 0 + +/(?<=(?<=a)b)(?.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 +No match + +/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + 0: word1 word3 word1 word2 word3 word2 word2 word1 word3 + 1: word3 + +/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + 0: word1 word3 word1 word2 word3 word2 word2 word1 word3 + 1: word3 + +/(*plb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 +No match + +/(*naplb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 + 0: a + 1: + 2: a + 3: a + +/(?<*(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 + 0: a + 1: + 2: a + 3: a + +/(*non_atomic_positive_lookahead:ab)/B +------------------------------------------------------------------ + Bra + Non-atomic assert + ab + Ket + Ket + End +------------------------------------------------------------------ + +/(*non_atomic_positive_lookbehind:ab)/B +------------------------------------------------------------------ + Bra + Non-atomic assert back + Reverse + ab + Ket + Ket + End +------------------------------------------------------------------ + +/(*pla:ab+)/B +------------------------------------------------------------------ + Bra + Assert + a + b++ + Ket + Ket + End +------------------------------------------------------------------ + +/(*napla:ab+)/B +------------------------------------------------------------------ + Bra + Non-atomic assert + a + b+ + Ket + Ket + End +------------------------------------------------------------------ + +/(*napla:)+/ + +/(*naplb:)+/ + +/(*napla:^x|^y)/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Starting code units: x y +Subject length lower bound = 1 + +/(*napla:abc|abd)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(*napla:a|(.)(*ACCEPT)zz)\1../ + abcd + 0: abc + 1: a + +/(*napla:a(*ACCEPT)zz|(.))\1../ + abcd + 0: bcd + 1: b + +/(*napla:a|(*COMMIT)(.))\1\1/ + aabc + 0: aa + 1: a +\= Expect no match + abbc +No match + +/(*napla:a|(.))\1\1/ + aabc + 0: aa + 1: a + abbc + 0: bb + 1: b + +/(*naplb:ab?c|PQ).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: R12 + +# ---- + +# Expect error (recursion => not fixed length) +/(\2)((?=(?<=\1)))/ +Failed: error 125 at offset 8: length of lookbehind assertion is not limited + +/c*+(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + abc\=ps + 0: c + ab\=ps + 0: + +/c++(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + +/(?<=(?=.(?<=x)))/ + abx + 0: + ab\=ph +Partial match: + bxyz + 0: + xyz + 0: + +/\z/ + abc\=ph +Partial match: + abc\=ps + 0: + +/\Z/ + abc\=ph +Partial match: + abc\=ps + 0: + abc\n\=ph +Partial match: \x0a + abc\n\=ps + 0: + +/(?![ab]).*/ + ab\=ph +Partial match: + +/c*+/ + ab\=ph,offset=2 +Partial match: + +/\A\s*(a|(?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + +/\A\s*((?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 65535 + +/\A\s*((?:[^`]{28500}){4}|a)/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + +/(?a)(?()b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Cond + 1 Cond ref + b + Ket + CBra 2 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + 1 Cond ref + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(R1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond recurse 1 + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(VERSION=10.3)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/[aA]b[cC]/IB +------------------------------------------------------------------ + Bra + /i a + b + /i c + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/[cc]abcd/I +Capture group count = 0 +First code unit = 'c' +Last code unit = 'd' +Subject length lower bound = 5 + +/[Cc]abcd/I +Capture group count = 0 +First code unit = 'C' (caseless) +Last code unit = 'd' +Subject length lower bound = 5 + +/[c]abcd/I +Capture group count = 0 +First code unit = 'c' +Last code unit = 'd' +Subject length lower bound = 5 + +/(?:c|C)abcd/I +Capture group count = 0 +First code unit = 'C' (caseless) +Last code unit = 'd' +Subject length lower bound = 5 + +/(a)?a/I +Capture group count = 1 +Starting code units: a +Last code unit = 'a' +Subject length lower bound = 1 + manm + 0: a + +/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/ + *abc_12345abc + 0: *abc_12345abc + 1: c + 2: 12345abc + +/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/ + *abc_12345abc + 0: *abc_12345abc + 1: c + 2: 12345abc + 3: 12345abc + +/^((\1+)(?C)|\d)+133X$/ + 111133X\=callout_capture +Callout 0: last capture = 2 + 1: 1 + 2: 111 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 3 + 2: 3 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 1 + 2: 11 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 3 + 2: 3 +--->111133X + ^ ^ | + 0: 111133X + 1: 11 + 2: 11 + +/abc/replace=xyz,substitute_replacement_only + 123abc456 + 1: xyz + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only + "abcde-abcde-" + 2: Xb+dZXb+dZ + +/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only + abcdefabcpqr + 1(2) Old 0 3 "abc" New 0 5 "" + 2(2) Old 6 9 "abc" New 5 10 "" + 2: + abxyzpqrabcxyz + 1(1) Old 2 5 "xyz" New 0 5 "" + 2(2) Old 8 11 "abc" New 5 10 "" + 3(1) Old 11 14 "xyz" New 10 15 "" + 3: + 12abc34xyz99abc55\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 0 5 "" + 2(1) Old 7 10 "xyz" New 5 10 " STOPPED" + 2: + 12abc34xyz99abc55\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 0 5 " SKIPPED" + 2(1) Old 7 10 "xyz" New 0 5 "" + 3(2) Old 12 15 "abc" New 5 10 "" + 3: + 12abc34xyz99abc55\=substitute_skip=2 + 1(2) Old 2 5 "abc" New 0 5 "" + 2(1) Old 7 10 "xyz" New 5 10 " SKIPPED" + 3(2) Old 12 15 "abc" New 5 10 "" + 3: + +/a(..)d/replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + 1: xyz>bcbc$1<,substitute_matched + xyzabcdxyzabcdxyz + 2: xyz>bcbcbcbc$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 3: xyz><>bcbc$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 1: xyz>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 1: xyz>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 3: xyz><>bcbc" 00 "<).."/hex,mark,no_start_optimize + AB + 0: AB +MK: >\x00< + A\=ph +Partial match, mark=>\x00<: A +\= Expect no match + A +No match, mark = >\x00< + +/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize + AB +--->AB + 1 ^^ . +Latest Mark: >\x00< + 0: AB +MK: >\x00< + +/(?(VERSION=0.0/ +Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition + +# Perl has made \K in lookarounds an error. PCRE2 now rejects as well, unless +# explicitly authorized. + +/(?=a\Kb)ab/ +Failed: error 199 at offset 10: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/(?=a\Kb)ab/allow_lookaround_bsk + ab + 0: b + +/(?!a\Kb)ac/ +Failed: error 199 at offset 10: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/(?!a\Kb)ac/allow_lookaround_bsk + ac + 0: ac + +/^abc(?<=b\Kc)d/ +Failed: error 199 at offset 14: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/^abc(?<=b\Kc)d/allow_lookaround_bsk + abcd + 0: cd + +/^abc(?X<\=null_replacement +Failed: error -51: NULL argument passed with non-zero length + +/X+/replace=[20] + >XX<\=null_replacement + 1: >< + +# --------- + +/[Aa]{2}/BI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/A{2}/iBI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/[Aa]{2,3}/BI +------------------------------------------------------------------ + Bra + /i A{2} + /i A?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +-- + \[X]{-10} +** Zero or negative repeat not allowed + +# Check imposition of maximum by match_data_create(). + +/abcd/ + abcd\=ovector=65536 + 0: abcd + +# Use recurse to test \K and Mark in atomic scope. +/(?>this line\s*((?R)|)\K)/ + this line this line this line + 0: + 1: this line this line + +/(?>this line\s*((?R)|)(*MARK:A))/ + this line this line this line + 0: this line this line this line + 1: this line this line + +# Check use of NULL pattern with zero length. + +//null_pattern,use_length + abc + 0: + +//null_pattern +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +/bad null pattern/null_pattern,use_length +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +/bad null pattern/null_pattern +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +# -------- Variable length lookbehinds -------- +/12345(?<=\d{1,256})X/ +Failed: error 200 at offset 5: branch too long in variable-length lookbehind assertion + +/(?<=(\d{1,256}))X/max_varlookbehind=256 + 12345XYZ + 0: X + 1: 12345 + +/12345(?<=a?bc)X/max_varlookbehind=0 +Failed: error 200 at offset 5: branch too long in variable-length lookbehind assertion + +/12345(?<=abc)X/max_varlookbehind=0 + +/(?a)|(?Pb))(?P=same))+/g,dupnames + bbbaaabaabb + 0: bbbaaaba + 1: a + 2: b + 0: bb + 1: + 2: b + +# -------- + +/ +/anchored, firstline + \x0a + 0: \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + 0: \x0a + +/ +/firstline + \x0a + 0: \x0a + abc\x0adef + 0: \x0a + +# This test is currently broken in the interpreter +# /|a(?0)/endanchored +# aaaa + +/A +/extended + +/(*ACCEPT)+/B,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 10 + SBra + *ACCEPT + KetRmax + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ + +/a\z/ + a + 0: a + a\=noteol + 0: a + +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?bd + +0 ^ a? + +2 ^ (?= + +5 ^ b + +8 ^ ) + +9 ^ d ++10 ^^ End of pattern + 0: d + +/a?(?=bc|)\bd/I +Capture group count = 0 +Max lookbehind = 1 +Starting code units: a d +Last code unit = 'd' +Subject length lower bound = 1 + bd +No match + +/(?0)/ + abc\=disable_recurseloop_check,match_limit=100 +Failed: error -47: match limit exceeded + +/(a(?1)z||(?1)++)$/ + abcd\=disable_recurseloop_check + 0: + 1: + +/(((?<=123?456456|ABC)))(?<=\2)../ + ABCDEFG + 0: DE + 1: + 2: + 12345645678910 + 0: 78 + 1: + 2: + +# This test is crashing Perl 5.38.2. + +/[^\S\W]{6}/ + .abc def.. +No match + +# End of testinput2 +Error -70: PCRE2_ERROR_BADDATA (unknown error number) +Error -62: bad serialized data +Error -2: partial match +Error -1: no match +Error 0: PCRE2_ERROR_BADDATA (unknown error number) +Error 100: no error +Error 101: \ at end of pattern +Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode +Error 300: PCRE2_ERROR_BADDATA (unknown error number) diff --git a/testdata/testoutput20 b/testdata/testoutput20 new file mode 100644 index 0000000..5ce720f --- /dev/null +++ b/testdata/testoutput20 @@ -0,0 +1,161 @@ +# This set of tests exercises the serialization/deserialization and code copy +# functions in the library. It does not use UTF or JIT. + +#forbid_utf + +# Compile several patterns, push them onto the stack, and then write them +# all to a file. + +#pattern push + +/(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) + (?(DEFINE) + (?[a-z]+) + (?\d+) + )/x +/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i + +#save testsaved1 + +# Do it again for some more patterns. + +/(*MARK:A)(*SKIP:B)(C|X)/mark +** Ignored when compiled pattern is stacked with 'push': mark +/(?:(?foo)|(?bar))\k/dupnames + +#save testsaved2 +#pattern -push + +# Reload the patterns, then pop them one by one and check them. + +#load testsaved1 +#load testsaved2 + +#pop info +Capture group count = 2 +Max back reference = 2 +Named capture groups: + n 1 + n 2 +Options: dupnames +Starting code units: b f +Subject length lower bound = 6 + foofoo + 0: foofoo + 1: foo + barbar + 0: barbar + 1: + 2: bar + +#pop mark + C + 0: C + 1: C +MK: A +\= Expect no match + D +No match, mark = A + +#pop + AmanaplanacanalPanama + 0: AmanaplanacanalPanama + 1: + 2: + 3: AmanaplanacanalPanama + 4: A + +#pop info +Capture group count = 4 +Named capture groups: + ADDR 2 + ADDRESS_PAT 4 + NAME 1 + NAME_PAT 3 +Options: extended +Subject length lower bound = 3 + metcalfe 33 + 0: metcalfe 33 + 1: metcalfe + 2: 33 + +# Check for an error when different tables are used. + +/abc/push,tables=1 +/xyz/push,tables=2 +#save testsaved1 +Serialization failed: error -30: patterns do not all use the same character tables + +#pop + xyz + 0: xyz + +#pop + abc + 0: abc + +#pop should give an error +** Can't pop off an empty stack + pqr + +/abcd/pushcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +#pop should give an error +** Can't pop off an empty stack + +/abcd/push +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +/abcd/push +#save testsaved1 +#pop should give an error +** Can't pop off an empty stack + +#load testsaved1 +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +#pop should give an error +** Can't pop off an empty stack + +/abcd/pushtablescopy + abcd + 0: abcd + +#popcopy + abcd + 0: abcd + +#pop + abcd + 0: abcd + +# Must only specify one of these + +//push,pushcopy +** Not allowed together: push pushcopy + +//push,pushtablescopy +** Not allowed together: push pushtablescopy + +//pushcopy,pushtablescopy +** Not allowed together: pushcopy pushtablescopy + +# End of testinput20 diff --git a/testdata/testoutput21 b/testdata/testoutput21 new file mode 100644 index 0000000..fbd7400 --- /dev/null +++ b/testdata/testoutput21 @@ -0,0 +1,94 @@ +# These are tests of \C that do not involve UTF. They are not run when \C is +# disabled by compiling with --enable-never-backslash-C. + +/\C+\D \C+\d \C+\S \C+\s \C+\W \C+\w \C+. \C+\R \C+\H \C+\h \C+\V \C+\v \C+\Z \C+\z \C+$/Bx +------------------------------------------------------------------ + Bra + AllAny+ + \D + AllAny+ + \d + AllAny+ + \S + AllAny+ + \s + AllAny+ + \W + AllAny+ + \w + AllAny+ + Any + AllAny+ + \R + AllAny+ + \H + AllAny+ + \h + AllAny+ + \V + AllAny+ + \v + AllAny+ + \Z + AllAny++ + \z + AllAny+ + $ + Ket + End +------------------------------------------------------------------ + +/\D+\C \d+\C \S+\C \s+\C \W+\C \w+\C .+\C \R+\C \H+\C \h+\C \V+\C \v+\C a+\C \n+\C \C+\C/Bx +------------------------------------------------------------------ + Bra + \D+ + AllAny + \d+ + AllAny + \S+ + AllAny + \s+ + AllAny + \W+ + AllAny + \w+ + AllAny + Any+ + AllAny + \R+ + AllAny + \H+ + AllAny + \h+ + AllAny + \V+ + AllAny + \v+ + AllAny + a+ + AllAny + \x0a+ + AllAny + AllAny+ + AllAny + Ket + End +------------------------------------------------------------------ + +/ab\Cde/never_backslash_c +Failed: error 183 at offset 4: using \C is disabled by the application + +/ab\Cde/info +Capture group count = 0 +Contains \C +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abXde + 0: abXde + +/(?<=ab\Cde)X/ + abZdeX + 0: X + +# End of testinput21 diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16 new file mode 100644 index 0000000..5421854 --- /dev/null +++ b/testdata/testoutput22-16 @@ -0,0 +1,182 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capture group count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 2 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-16 mode + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + Anybyte+ + extuni + extuni+ + Anybyte + Ket + End +------------------------------------------------------------------ + +/\C\X*TӅ; +{0,6}\v+ F +/utf +\= Expect no match + Ӆ\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} +No match + X\x{11234}Y + 0: X\x{11234}Y + 1: \x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + 1: \x{11234}Y + +/X(\C{4})/utf + X\x{1234}YZ +No match + X\x{11234}YZ + 0: X\x{11234}YZ + 1: \x{11234}YZ + X\x{11234}YZW + 0: X\x{11234}YZ + 1: \x{11234}YZ + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512}YZ + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512} + 0: X\x{11234}\x{512} + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{512}\x{11234} + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512} + X\x{11234} +No match + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b + 0: a\x{100}b + +/a\C\Cb/utf + a\x{100}b +No match + a\x{12257}b + 0: a\x{12257}b + a\x{12257}\x{11234}b +No match + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + 2: + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + 0: a\x{100}b + +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + +/\C[^\v]+\x80/utf + [Aá¿»BŀC] +No match + +/\C[^\d]+\x80/utf + [Aá¿»BŀC] +No match + +# End of testinput22 diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32 new file mode 100644 index 0000000..e96696a --- /dev/null +++ b/testdata/testoutput22-32 @@ -0,0 +1,180 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capture group count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf + ab!deXYZ + 0: X + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C\X*TӅ; +{0,6}\v+ F +/utf +\= Expect no match + Ӆ\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} +No match + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + 1: \x{11234}YZ + +/X(\C{4})/utf + X\x{1234}YZ +No match + X\x{11234}YZ +No match + X\x{11234}YZW + 0: X\x{11234}YZW + 1: \x{11234}YZW + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512}YZ + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512} +No match + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}YZ + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{512}\x{11234}Z + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} +No match + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} +No match + X\x{11234}Y +No match + X\x{11234}YZ + 0: X\x{11234}YZ + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{512}Y + X\x{11234} +No match + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b + 0: a\x{100}b + +/a\C\Cb/utf + a\x{100}b +No match + a\x{12257}b +No match + a\x{12257}\x{11234}b + 0: a\x{12257}\x{11234}b + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + 2: + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b + 0: a\x{100}b + +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + +/\C[^\v]+\x80/utf + [Aá¿»BŀC] +No match + +/\C[^\d]+\x80/utf + [Aá¿»BŀC] +No match + +# End of testinput22 diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8 new file mode 100644 index 0000000..eab410e --- /dev/null +++ b/testdata/testoutput22-8 @@ -0,0 +1,184 @@ +# Tests of \C when Unicode support is available. Note that \C is not supported +# for DFA matching in UTF mode, so this test is not run with -dfa. The output +# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match +# in some widths and not in others. + +/ab\Cde/utf,info +Capture group count = 0 +Contains \C +Options: utf +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 2 + abXde + 0: abXde + +# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and +# 16-bit modes, but not in 32-bit mode. + +/(?<=ab\Cde)X/utf +Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-8 mode + ab!deXYZ + +# Autopossessification tests + +/\C+\X \X+\C/Bx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + extuni+ + AllAny + Ket + End +------------------------------------------------------------------ + +/\C+\X \X+\C/Bx,utf +------------------------------------------------------------------ + Bra + Anybyte+ + extuni + extuni+ + Anybyte + Ket + End +------------------------------------------------------------------ + +/\C\X*TӅ; +{0,6}\v+ F +/utf +\= Expect no match + Ӆ\x0a +No match + +/\C(\W?Å¿)'?{{/utf +\= Expect no match + \\C(\\W?Å¿)'?{{ +No match + +/X(\C{3})/utf + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + X\x{11234}Y + 0: X\x{f0}\x{91}\x{88} + 1: \x{f0}\x{91}\x{88} + X\x{11234}YZ + 0: X\x{f0}\x{91}\x{88} + 1: \x{f0}\x{91}\x{88} + +/X(\C{4})/utf + X\x{1234}YZ + 0: X\x{1234}Y + 1: \x{1234}Y + X\x{11234}YZ + 0: X\x{11234} + 1: \x{11234} + X\x{11234}YZW + 0: X\x{11234} + 1: \x{11234} + +/X\C*/utf + XYZabcdce + 0: XYZabcdce + +/X\C*?/utf + XYZabcde + 0: X + +/X\C{3,5}/utf + Xabcdefg + 0: Xabcde + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} + 0: X\x{1234}\x{512} + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512} + X\x{11234}Y + 0: X\x{11234}Y + X\x{11234}YZ + 0: X\x{11234}Y + X\x{11234}\x{512} + 0: X\x{11234}\x{d4} + X\x{11234}\x{512}YZ + 0: X\x{11234}\x{d4} + X\x{11234}\x{512}\x{11234}Z + 0: X\x{11234}\x{d4} + +/X\C{3,5}?/utf + Xabcdefg + 0: Xabc + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234} + X\x{1234}\x{512} + 0: X\x{1234} + X\x{11234}Y + 0: X\x{f0}\x{91}\x{88} + X\x{11234}YZ + 0: X\x{f0}\x{91}\x{88} + X\x{11234}\x{512}YZ + 0: X\x{f0}\x{91}\x{88} + X\x{11234} + 0: X\x{f0}\x{91}\x{88} + +/a\Cb/utf + aXb + 0: aXb + a\nb + 0: a\x{0a}b + a\x{100}b +No match + +/a\C\Cb/utf + a\x{100}b + 0: a\x{100}b + a\x{12257}b +No match + a\x{12257}\x{11234}b +No match + +/ab\Cde/utf + abXde + 0: abXde + +# This one is here not because it's different to Perl, but because the way +# the captured single code unit is displayed. (In Perl it becomes a character, +# and you can't tell the difference.) + +/X(\C)(.*)/utf + X\x{1234} + 0: X\x{1234} + 1: \x{e1} + 2: \x{88}\x{b4} + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +# This one is here because Perl gives out a grumbly error message (quite +# correctly, but that messes up comparisons). + +/a\Cb/utf +\= Expect no match in 8-bit mode + a\x{100}b +No match + +/^ab\C/utf,no_start_optimize +\= Expect no match - tests \C at end of subject + ab +No match + +/\C[^\v]+\x80/utf + [Aá¿»BŀC] +No match + +/\C[^\d]+\x80/utf + [Aá¿»BŀC] +No match + +# End of testinput22 diff --git a/testdata/testoutput23 b/testdata/testoutput23 new file mode 100644 index 0000000..c6f0aa2 --- /dev/null +++ b/testdata/testoutput23 @@ -0,0 +1,8 @@ +# This test is run when PCRE2 has been built with --enable-never-backslash-C, +# which disables the use of \C. All we can do is check that it gives the +# correct error message. + +/a\Cb/ +Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library + +# End of testinput23 diff --git a/testdata/testoutput24 b/testdata/testoutput24 new file mode 100644 index 0000000..9c59893 --- /dev/null +++ b/testdata/testoutput24 @@ -0,0 +1,624 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in non-UTF mode. + +#forbid_utf +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +/abc/posix +** The convert and posix modifiers are mutually exclusive + +# Separator must be / \ or . + +/a*b/convert_glob_separator=% +** Invalid glob separator '%' + +# Can't have separator in a class + +"[ab/cd]" +(?s)\A[ab/cd](?/ +(?s)\A<[a-c\-d]>\z + + 0: + + 0: + + 0: + + 0: + <-> + 0: <-> + +/a[[:digit:].]z/ +(?s)\Aa[[:digit:].]z\z + a1z + 0: a1z + a.z + 0: a.z +\= Expect no match + a:z +No match + +/a[[:digit].]z/ +(?s)\Aa[\[:digit]\.\]z\z + a[.]z + 0: a[.]z + a:.]z + 0: a:.]z + ad.]z + 0: ad.]z + +/<[[:a[:digit:]b]>/ +(?s)\A<[\[:a[:digit:]b]>\z + <[> + 0: <[> + <:> + 0: <:> + + 0: + <9> + 0: <9> + + 0: +\= Expect no match + +No match + +/a*b/convert_glob_separator=\ +(?s)\Aa(*COMMIT)[^\\]*?b\z + +/a*b/convert_glob_separator=. +(?s)\Aa(*COMMIT)[^\.]*?b\z + +/a*b/convert_glob_separator=/ +(?s)\Aa(*COMMIT)[^/]*?b\z + +# Non control character checking + +/A\B\\C\D/ +(?s)\AAB\\CD\z + +/\\{}\?\*+\[\]()|.^$/ +(?s)\A\\\{\}\?\*\+\[\]\(\)\|\.\^\$\z + +/*a*\/*b*/ +(?s)\A[^/]*?a(*COMMIT)[^/]*?/(*COMMIT)[^/]*?b(*COMMIT)[^/]*+\z + +/?a?\/?b?/ +(?s)\A[^/]a[^/]/[^/]b[^/]\z + +/[a\\b\c][]][-][\]\-]/ +(?s)\A[a\\bc][\]][\-][\]\-]\z + +/[^a\\b\c][!]][!-][^\]\-]/ +(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z + +/[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]]/ +(?s)\A[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:word:][:xdigit:]](?[^/]*?a)(?>[^/]*?b)(?>[^/]*?g)(?>[^/]*?n)(?>[^/]*?t\z) + abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt + 0: /abcdefghijklmnop.txt + +/**\/*a*\/**/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*?/) + xx/xx/xx/xax/xx/xb + 0: /xax/ + +/**\/*a*/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*+\z) + xx/xx/xx/xax + 0: /xax + xx/xx/xx/xax/xx +No match + +/**\/*a*\/**\/*b*/ +(?s)(?:\A|/)(?>[^/]*?a)(?>[^/]*?/)(*COMMIT)(?:.*?/)??(?>[^/]*?b)(?>[^/]*+\z) + xx/xx/xx/xax/xx/xb + 0: /xax/xx/xb + xx/xx/xx/xax/xx/x +No match + +"**a"convert=glob +(?s)a\z + a + 0: a + c/b/a + 0: a + c/b/aaa + 0: a + +"a**/b"convert=glob +(?s)\Aa(*COMMIT).*?/b\z + a/b + 0: a/b + ab +No match + +"a/**b"convert=glob +(?s)\Aa/(*COMMIT).*?b\z + a/b + 0: a/b + ab +No match + +#pattern convert=glob:glob_no_starstar + +/***/ +(?s)\A[^/]*+\z + +/**a**/ +(?s)\A[^/]*?a(*COMMIT)[^/]*+\z + +#pattern convert=unset +#pattern convert=glob:glob_no_wild_separator + +/*/ +(?s) + +/*a*/ +(?s)a + +/**a**/ +(?s)a + +/a*b/ +(?s)\Aa(*COMMIT).*?b\z + +/*a*b*/ +(?s)a(*COMMIT).*?b + +/??a??/ +(?s)\A..a..\z + +#pattern convert=unset +#pattern convert=glob,convert_glob_escape=0 + +/a\b\cd/ +(?s)\Aa\\b\\cd\z + +/**\/a/ +(?s)\\/a\z + +/a`*b/convert_glob_escape=` +(?s)\Aa\*b\z + +/a`*b/convert_glob_escape=0 +(?s)\Aa`(*COMMIT)[^/]*?b\z + +/a`*b/convert_glob_escape=x +** Invalid glob escape 'x' + +# -------- Tests of extended POSIX conversion -------- + +#pattern convert=unset:posix_extended + +/<[[:a[:digit:]b]>/ +(*NUL)<[[:a[:digit:]b]> + <[> + 0: <[> + <:> + 0: <:> + + 0: + <9> + 0: <9> + + 0: +\= Expect no match + +No match + +/a+\1b\\c|d[ab\c]/ +(*NUL)a+1b\\c|d[ab\\c] + +/<[]bc]>/ +(*NUL)<[]bc]> + <]> + 0: <]> + + 0: + + 0: + +/<[^]bc]>/ +(*NUL)<[^]bc]> + <.> + 0: <.> +\= Expect no match + <]> +No match + +No match + +/(a)\1b/ +(*NUL)(a)1b + a1b + 0: a1b + 1: a +\= Expect no match + aab +No match + +/(ab)c)d]/ +(*NUL)(ab)c\)d\] + Xabc)d]Y + 0: abc)d] + 1: ab + +/a***b/ +(*NUL)a*b + +# -------- Tests of basic POSIX conversion -------- + +#pattern convert=unset:posix_basic + +/a*b+c\+[def](ab)\(cd\)/ +(*NUL)a*b\+c\+[def]\(ab\)(cd) + +/\(a\)\1b/ +(*NUL)(a)\1b + aab + 0: aab + 1: a +\= Expect no match + a1b +No match + +/how.to how\.to/ +(*NUL)how.to how\.to + how\nto how.to + 0: how\x0ato how.to +\= Expect no match + how\x{0}to how.to +No match + +/^how to \^how to/ +(*NUL)^how to \^how to + +/^*abc/ +(*NUL)^\*abc + +/*abc/ +(*NUL)\*abc + X*abcY + 0: *abc + +/**abc/ +(*NUL)\**abc + XabcY + 0: abc + X*abcY + 0: *abc + X**abcY + 0: **abc + +/*ab\(*cd\)/ +(*NUL)\*ab(\*cd) + +/^b\(c^d\)\(^e^f\)/ +(*NUL)^b(c\^d)(^e\^f) + +/a***b/ +(*NUL)a*b + +# End of testinput24 diff --git a/testdata/testoutput25 b/testdata/testoutput25 new file mode 100644 index 0000000..0ffbd13 --- /dev/null +++ b/testdata/testoutput25 @@ -0,0 +1,25 @@ +# This file tests the auxiliary pattern conversion features of the PCRE2 +# library, in UTF mode. + +#newline_default lf any anycrlf + +# -------- Tests of glob conversion -------- + +# Set the glob separator explicitly so that different OS defaults are not a +# problem. Then test various errors. + +#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/ + +# The fact that this one works in 13 bytes in the 8-bit library shows that the +# output is in UTF-8, though pcre2test shows the character as an escape. + +/'>' c4 a3 '<'/hex,utf,convert_length=13 +(?s)\A>\x{123}<\z + +# This expansion creates a string that is too long for the input buffer. + +/\[()]{65535}()/expand +(?s)\A\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\(\)\z +** Pattern conversion is too long for the buffer + +# End of testinput25 diff --git a/testdata/testoutput26 b/testdata/testoutput26 new file mode 100644 index 0000000..b951360 --- /dev/null +++ b/testdata/testoutput26 @@ -0,0 +1,3511 @@ +# These tests are generated by maint/GenerateTest26.py, do not edit. + +# Unicode Script Extension tests. + +# Base script check +/^\p{sc=Latin}/utf + A + 0: A + +/^\p{Script=Latn}/utf + \x{1df2a} + 0: \x{1df2a} + +# Script extension check +/^\p{Latin}/utf + \x{363} + 0: \x{363} + +/^\p{scx=Latn}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Latin}/utf + \x{363} + 0: \x{363} + +/^\p{sc=Latin}/utf + \x{363} +No match + +# Character not in script +/^\p{Latin}/utf + \x{1df2b} +No match + +# Base script check +/^\p{sc=Greek}/utf + \x{370} + 0: \x{370} + +/^\p{Script=Grek}/utf + \x{1d245} + 0: \x{1d245} + +# Script extension check +/^\p{Greek}/utf + \x{342} + 0: \x{342} + +/^\p{Script_Extensions=Grek}/utf + \x{1dc1} + 0: \x{1dc1} + +# Script extension only character +/^\p{Greek}/utf + \x{342} + 0: \x{342} + +/^\p{sc=Greek}/utf + \x{342} +No match + +# Character not in script +/^\p{Greek}/utf + \x{1d246} +No match + +# Base script check +/^\p{sc=Cyrillic}/utf + \x{400} + 0: \x{400} + +/^\p{Script=Cyrl}/utf + \x{1e08f} + 0: \x{1e08f} + +# Script extension check +/^\p{Cyrillic}/utf + \x{483} + 0: \x{483} + +/^\p{scx=Cyrl}/utf + \x{a66f} + 0: \x{a66f} + +# Script extension only character +/^\p{Cyrillic}/utf + \x{2e43} + 0: \x{2e43} + +/^\p{sc=Cyrillic}/utf + \x{2e43} +No match + +# Character not in script +/^\p{Cyrillic}/utf + \x{1e090} +No match + +# Base script check +/^\p{sc=Arabic}/utf + \x{600} + 0: \x{600} + +/^\p{Script=Arab}/utf + \x{1eef1} + 0: \x{1eef1} + +# Script extension check +/^\p{Arabic}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Arab}/utf + \x{102fb} + 0: \x{102fb} + +# Script extension only character +/^\p{Arabic}/utf + \x{102e0} + 0: \x{102e0} + +/^\p{sc=Arabic}/utf + \x{102e0} +No match + +# Character not in script +/^\p{Arabic}/utf + \x{1eef2} +No match + +# Base script check +/^\p{sc=Syriac}/utf + \x{700} + 0: \x{700} + +/^\p{Script=Syrc}/utf + \x{86a} + 0: \x{86a} + +# Script extension check +/^\p{Syriac}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Syrc}/utf + \x{1dfa} + 0: \x{1dfa} + +# Script extension only character +/^\p{Syriac}/utf + \x{1dfa} + 0: \x{1dfa} + +/^\p{sc=Syriac}/utf + \x{1dfa} +No match + +# Character not in script +/^\p{Syriac}/utf + \x{1dfb} +No match + +# Base script check +/^\p{sc=Thaana}/utf + \x{780} + 0: \x{780} + +/^\p{Script=Thaa}/utf + \x{7b1} + 0: \x{7b1} + +# Script extension check +/^\p{Thaana}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Thaa}/utf + \x{fdfd} + 0: \x{fdfd} + +# Script extension only character +/^\p{Thaana}/utf + \x{fdf2} + 0: \x{fdf2} + +/^\p{sc=Thaana}/utf + \x{fdf2} +No match + +# Character not in script +/^\p{Thaana}/utf + \x{fdfe} +No match + +# Base script check +/^\p{sc=Devanagari}/utf + \x{900} + 0: \x{900} + +/^\p{Script=Deva}/utf + \x{11b09} + 0: \x{11b09} + +# Script extension check +/^\p{Devanagari}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Deva}/utf + \x{a8f3} + 0: \x{a8f3} + +# Script extension only character +/^\p{Devanagari}/utf + \x{1cd1} + 0: \x{1cd1} + +/^\p{sc=Devanagari}/utf + \x{1cd1} +No match + +# Character not in script +/^\p{Devanagari}/utf + \x{11b0a} +No match + +# Base script check +/^\p{sc=Bengali}/utf + \x{980} + 0: \x{980} + +/^\p{Script=Beng}/utf + \x{9fe} + 0: \x{9fe} + +# Script extension check +/^\p{Bengali}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Beng}/utf + \x{a8f1} + 0: \x{a8f1} + +# Script extension only character +/^\p{Bengali}/utf + \x{1cf7} + 0: \x{1cf7} + +/^\p{sc=Bengali}/utf + \x{1cf7} +No match + +# Character not in script +/^\p{Bengali}/utf + \x{a8f2} +No match + +# Base script check +/^\p{sc=Gurmukhi}/utf + \x{a01} + 0: \x{a01} + +/^\p{Script=Guru}/utf + \x{a76} + 0: \x{a76} + +# Script extension check +/^\p{Gurmukhi}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Guru}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Gurmukhi}/utf + \x{a836} + 0: \x{a836} + +/^\p{sc=Gurmukhi}/utf + \x{a836} +No match + +# Character not in script +/^\p{Gurmukhi}/utf + \x{a83a} +No match + +# Base script check +/^\p{sc=Gujarati}/utf + \x{a81} + 0: \x{a81} + +/^\p{Script=Gujr}/utf + \x{aff} + 0: \x{aff} + +# Script extension check +/^\p{Gujarati}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Gujr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Gujarati}/utf + \x{a836} + 0: \x{a836} + +/^\p{sc=Gujarati}/utf + \x{a836} +No match + +# Character not in script +/^\p{Gujarati}/utf + \x{a83a} +No match + +# Base script check +/^\p{sc=Oriya}/utf + \x{b01} + 0: \x{b01} + +/^\p{Script=Orya}/utf + \x{b77} + 0: \x{b77} + +# Script extension check +/^\p{Oriya}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Orya}/utf + \x{1cf2} + 0: \x{1cf2} + +# Script extension only character +/^\p{Oriya}/utf + \x{1cda} + 0: \x{1cda} + +/^\p{sc=Oriya}/utf + \x{1cda} +No match + +# Character not in script +/^\p{Oriya}/utf + \x{1cf3} +No match + +# Base script check +/^\p{sc=Tamil}/utf + \x{b82} + 0: \x{b82} + +/^\p{Script=Taml}/utf + \x{11fff} + 0: \x{11fff} + +# Script extension check +/^\p{Tamil}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Taml}/utf + \x{11fd3} + 0: \x{11fd3} + +# Script extension only character +/^\p{Tamil}/utf + \x{a8f3} + 0: \x{a8f3} + +/^\p{sc=Tamil}/utf + \x{a8f3} +No match + +# Character not in script +/^\p{Tamil}/utf + \x{12000} +No match + +# Base script check +/^\p{sc=Telugu}/utf + \x{c00} + 0: \x{c00} + +/^\p{Script=Telu}/utf + \x{c7f} + 0: \x{c7f} + +# Script extension check +/^\p{Telugu}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Telu}/utf + \x{1cf2} + 0: \x{1cf2} + +# Script extension only character +/^\p{Telugu}/utf + \x{1cda} + 0: \x{1cda} + +/^\p{sc=Telugu}/utf + \x{1cda} +No match + +# Character not in script +/^\p{Telugu}/utf + \x{1cf3} +No match + +# Base script check +/^\p{sc=Kannada}/utf + \x{c80} + 0: \x{c80} + +/^\p{Script=Knda}/utf + \x{cf3} + 0: \x{cf3} + +# Script extension check +/^\p{Kannada}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Knda}/utf + \x{a835} + 0: \x{a835} + +# Script extension only character +/^\p{Kannada}/utf + \x{1cf4} + 0: \x{1cf4} + +/^\p{sc=Kannada}/utf + \x{1cf4} +No match + +# Character not in script +/^\p{Kannada}/utf + \x{a836} +No match + +# Base script check +/^\p{sc=Malayalam}/utf + \x{d00} + 0: \x{d00} + +/^\p{Script=Mlym}/utf + \x{d7f} + 0: \x{d7f} + +# Script extension check +/^\p{Malayalam}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Mlym}/utf + \x{a832} + 0: \x{a832} + +# Script extension only character +/^\p{Malayalam}/utf + \x{1cda} + 0: \x{1cda} + +/^\p{sc=Malayalam}/utf + \x{1cda} +No match + +# Character not in script +/^\p{Malayalam}/utf + \x{a833} +No match + +# Base script check +/^\p{sc=Sinhala}/utf + \x{d81} + 0: \x{d81} + +/^\p{Script=Sinh}/utf + \x{111f4} + 0: \x{111f4} + +# Script extension check +/^\p{Sinhala}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Sinh}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Sinhala}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Sinhala}/utf + \x{964} +No match + +# Character not in script +/^\p{Sinhala}/utf + \x{111f5} +No match + +# Base script check +/^\p{sc=Myanmar}/utf + \x{1000} + 0: \x{1000} + +/^\p{Script=Mymr}/utf + \x{aa7f} + 0: \x{aa7f} + +# Script extension check +/^\p{Myanmar}/utf + \x{1040} + 0: \x{1040} + +/^\p{scx=Mymr}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Myanmar}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{sc=Myanmar}/utf + \x{a92e} +No match + +# Character not in script +/^\p{Myanmar}/utf + \x{aa80} +No match + +# Base script check +/^\p{sc=Georgian}/utf + \x{10a0} + 0: \x{10a0} + +/^\p{Script=Geor}/utf + \x{2d2d} + 0: \x{2d2d} + +# Script extension check +/^\p{Georgian}/utf + \x{10fb} + 0: \x{10fb} + +/^\p{Script_Extensions=Geor}/utf + \x{10fb} + 0: \x{10fb} + +# Script extension only character +/^\p{Georgian}/utf + \x{10fb} + 0: \x{10fb} + +/^\p{sc=Georgian}/utf + \x{10fb} +No match + +# Character not in script +/^\p{Georgian}/utf + \x{2d2e} +No match + +# Base script check +/^\p{sc=Hangul}/utf + \x{1100} + 0: \x{1100} + +/^\p{Script=Hang}/utf + \x{ffdc} + 0: \x{ffdc} + +# Script extension check +/^\p{Hangul}/utf + \x{3001} + 0: \x{3001} + +/^\p{scx=Hang}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Hangul}/utf + \x{3003} + 0: \x{3003} + +/^\p{sc=Hangul}/utf + \x{3003} +No match + +# Character not in script +/^\p{Hangul}/utf + \x{ffdd} +No match + +# Base script check +/^\p{sc=Mongolian}/utf + \x{1800} + 0: \x{1800} + +/^\p{Script=Mong}/utf + \x{1166c} + 0: \x{1166c} + +# Script extension check +/^\p{Mongolian}/utf + \x{1802} + 0: \x{1802} + +/^\p{Script_Extensions=Mong}/utf + \x{202f} + 0: \x{202f} + +# Script extension only character +/^\p{Mongolian}/utf + \x{202f} + 0: \x{202f} + +/^\p{sc=Mongolian}/utf + \x{202f} +No match + +# Character not in script +/^\p{Mongolian}/utf + \x{1166d} +No match + +# Base script check +/^\p{sc=Hiragana}/utf + \x{3041} + 0: \x{3041} + +/^\p{Script=Hira}/utf + \x{1f200} + 0: \x{1f200} + +# Script extension check +/^\p{Hiragana}/utf + \x{3001} + 0: \x{3001} + +/^\p{scx=Hira}/utf + \x{ff9f} + 0: \x{ff9f} + +# Script extension only character +/^\p{Hiragana}/utf + \x{3031} + 0: \x{3031} + +/^\p{sc=Hiragana}/utf + \x{3031} +No match + +# Character not in script +/^\p{Hiragana}/utf + \x{1f201} +No match + +# Base script check +/^\p{sc=Katakana}/utf + \x{30a1} + 0: \x{30a1} + +/^\p{Script=Kana}/utf + \x{1b167} + 0: \x{1b167} + +# Script extension check +/^\p{Katakana}/utf + \x{3001} + 0: \x{3001} + +/^\p{Script_Extensions=Kana}/utf + \x{ff9f} + 0: \x{ff9f} + +# Script extension only character +/^\p{Katakana}/utf + \x{3031} + 0: \x{3031} + +/^\p{sc=Katakana}/utf + \x{3031} +No match + +# Character not in script +/^\p{Katakana}/utf + \x{1b168} +No match + +# Base script check +/^\p{sc=Bopomofo}/utf + \x{2ea} + 0: \x{2ea} + +/^\p{Script=Bopo}/utf + \x{31bf} + 0: \x{31bf} + +# Script extension check +/^\p{Bopomofo}/utf + \x{3001} + 0: \x{3001} + +/^\p{scx=Bopo}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Bopomofo}/utf + \x{302a} + 0: \x{302a} + +/^\p{sc=Bopomofo}/utf + \x{302a} +No match + +# Character not in script +/^\p{Bopomofo}/utf + \x{ff66} +No match + +# Base script check +/^\p{sc=Han}/utf + \x{2e80} + 0: \x{2e80} + +/^\p{Script=Hani}/utf + \x{323af} + 0: \x{323af} + +# Script extension check +/^\p{Han}/utf + \x{3001} + 0: \x{3001} + +/^\p{Script_Extensions=Hani}/utf + \x{1f251} + 0: \x{1f251} + +# Script extension only character +/^\p{Han}/utf + \x{3006} + 0: \x{3006} + +/^\p{sc=Han}/utf + \x{3006} +No match + +# Character not in script +/^\p{Han}/utf + \x{323b0} +No match + +# Base script check +/^\p{sc=Yi}/utf + \x{a000} + 0: \x{a000} + +/^\p{Script=Yiii}/utf + \x{a4c6} + 0: \x{a4c6} + +# Script extension check +/^\p{Yi}/utf + \x{3001} + 0: \x{3001} + +/^\p{scx=Yiii}/utf + \x{ff65} + 0: \x{ff65} + +# Script extension only character +/^\p{Yi}/utf + \x{3001} + 0: \x{3001} + +/^\p{sc=Yi}/utf + \x{3001} +No match + +# Character not in script +/^\p{Yi}/utf + \x{ff66} +No match + +# Base script check +/^\p{sc=Tagalog}/utf + \x{1700} + 0: \x{1700} + +/^\p{Script=Tglg}/utf + \x{171f} + 0: \x{171f} + +# Script extension check +/^\p{Tagalog}/utf + \x{1735} + 0: \x{1735} + +/^\p{Script_Extensions=Tglg}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Tagalog}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Tagalog}/utf + \x{1735} +No match + +# Character not in script +/^\p{Tagalog}/utf + \x{1737} +No match + +# Base script check +/^\p{sc=Hanunoo}/utf + \x{1720} + 0: \x{1720} + +/^\p{Script=Hano}/utf + \x{1734} + 0: \x{1734} + +# Script extension check +/^\p{Hanunoo}/utf + \x{1735} + 0: \x{1735} + +/^\p{scx=Hano}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Hanunoo}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Hanunoo}/utf + \x{1735} +No match + +# Character not in script +/^\p{Hanunoo}/utf + \x{1737} +No match + +# Base script check +/^\p{sc=Buhid}/utf + \x{1740} + 0: \x{1740} + +/^\p{Script=Buhd}/utf + \x{1753} + 0: \x{1753} + +# Script extension check +/^\p{Buhid}/utf + \x{1735} + 0: \x{1735} + +/^\p{Script_Extensions=Buhd}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Buhid}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Buhid}/utf + \x{1735} +No match + +# Character not in script +/^\p{Buhid}/utf + \x{1754} +No match + +# Base script check +/^\p{sc=Tagbanwa}/utf + \x{1760} + 0: \x{1760} + +/^\p{Script=Tagb}/utf + \x{1773} + 0: \x{1773} + +# Script extension check +/^\p{Tagbanwa}/utf + \x{1735} + 0: \x{1735} + +/^\p{scx=Tagb}/utf + \x{1736} + 0: \x{1736} + +# Script extension only character +/^\p{Tagbanwa}/utf + \x{1735} + 0: \x{1735} + +/^\p{sc=Tagbanwa}/utf + \x{1735} +No match + +# Character not in script +/^\p{Tagbanwa}/utf + \x{1774} +No match + +# Base script check +/^\p{sc=Limbu}/utf + \x{1900} + 0: \x{1900} + +/^\p{Script=Limb}/utf + \x{194f} + 0: \x{194f} + +# Script extension check +/^\p{Limbu}/utf + \x{965} + 0: \x{965} + +/^\p{Script_Extensions=Limb}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Limbu}/utf + \x{965} + 0: \x{965} + +/^\p{sc=Limbu}/utf + \x{965} +No match + +# Character not in script +/^\p{Limbu}/utf + \x{1950} +No match + +# Base script check +/^\p{sc=Tai_Le}/utf + \x{1950} + 0: \x{1950} + +/^\p{Script=Tale}/utf + \x{1974} + 0: \x{1974} + +# Script extension check +/^\p{Tai_Le}/utf + \x{1040} + 0: \x{1040} + +/^\p{scx=Tale}/utf + \x{1049} + 0: \x{1049} + +# Script extension only character +/^\p{Tai_Le}/utf + \x{1040} + 0: \x{1040} + +/^\p{sc=Tai_Le}/utf + \x{1040} +No match + +# Character not in script +/^\p{Tai_Le}/utf + \x{1975} +No match + +# Base script check +/^\p{sc=Linear_B}/utf + \x{10000} + 0: \x{10000} + +/^\p{Script=Linb}/utf + \x{100fa} + 0: \x{100fa} + +# Script extension check +/^\p{Linear_B}/utf + \x{10100} + 0: \x{10100} + +/^\p{Script_Extensions=Linb}/utf + \x{1013f} + 0: \x{1013f} + +# Script extension only character +/^\p{Linear_B}/utf + \x{10102} + 0: \x{10102} + +/^\p{sc=Linear_B}/utf + \x{10102} +No match + +# Character not in script +/^\p{Linear_B}/utf + \x{10140} +No match + +# Base script check +/^\p{sc=Cypriot}/utf + \x{10800} + 0: \x{10800} + +/^\p{Script=Cprt}/utf + \x{1083f} + 0: \x{1083f} + +# Script extension check +/^\p{Cypriot}/utf + \x{10100} + 0: \x{10100} + +/^\p{scx=Cprt}/utf + \x{1013f} + 0: \x{1013f} + +# Script extension only character +/^\p{Cypriot}/utf + \x{10102} + 0: \x{10102} + +/^\p{sc=Cypriot}/utf + \x{10102} +No match + +# Character not in script +/^\p{Cypriot}/utf + \x{10840} +No match + +# Base script check +/^\p{sc=Buginese}/utf + \x{1a00} + 0: \x{1a00} + +/^\p{Script=Bugi}/utf + \x{1a1f} + 0: \x{1a1f} + +# Script extension check +/^\p{Buginese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{Script_Extensions=Bugi}/utf + \x{a9cf} + 0: \x{a9cf} + +# Script extension only character +/^\p{Buginese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{sc=Buginese}/utf + \x{a9cf} +No match + +# Character not in script +/^\p{Buginese}/utf + \x{a9d0} +No match + +# Base script check +/^\p{sc=Coptic}/utf + \x{3e2} + 0: \x{3e2} + +/^\p{Script=Copt}/utf + \x{2cff} + 0: \x{2cff} + +# Script extension check +/^\p{Coptic}/utf + \x{102e0} + 0: \x{102e0} + +/^\p{scx=Copt}/utf + \x{102fb} + 0: \x{102fb} + +# Script extension only character +/^\p{Coptic}/utf + \x{102e0} + 0: \x{102e0} + +/^\p{sc=Coptic}/utf + \x{102e0} +No match + +# Character not in script +/^\p{Coptic}/utf + \x{102fc} +No match + +# Base script check +/^\p{sc=Glagolitic}/utf + \x{2c00} + 0: \x{2c00} + +/^\p{Script=Glag}/utf + \x{1e02a} + 0: \x{1e02a} + +# Script extension check +/^\p{Glagolitic}/utf + \x{484} + 0: \x{484} + +/^\p{Script_Extensions=Glag}/utf + \x{a66f} + 0: \x{a66f} + +# Script extension only character +/^\p{Glagolitic}/utf + \x{484} + 0: \x{484} + +/^\p{sc=Glagolitic}/utf + \x{484} +No match + +# Character not in script +/^\p{Glagolitic}/utf + \x{1e02b} +No match + +# Base script check +/^\p{sc=Syloti_Nagri}/utf + \x{a800} + 0: \x{a800} + +/^\p{Script=Sylo}/utf + \x{a82c} + 0: \x{a82c} + +# Script extension check +/^\p{Syloti_Nagri}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Sylo}/utf + \x{9ef} + 0: \x{9ef} + +# Script extension only character +/^\p{Syloti_Nagri}/utf + \x{9e6} + 0: \x{9e6} + +/^\p{sc=Syloti_Nagri}/utf + \x{9e6} +No match + +# Character not in script +/^\p{Syloti_Nagri}/utf + \x{a82d} +No match + +# Base script check +/^\p{sc=Phags_Pa}/utf + \x{a840} + 0: \x{a840} + +/^\p{Script=Phag}/utf + \x{a877} + 0: \x{a877} + +# Script extension check +/^\p{Phags_Pa}/utf + \x{1802} + 0: \x{1802} + +/^\p{Script_Extensions=Phag}/utf + \x{1805} + 0: \x{1805} + +# Script extension only character +/^\p{Phags_Pa}/utf + \x{1802} + 0: \x{1802} + +/^\p{sc=Phags_Pa}/utf + \x{1802} +No match + +# Character not in script +/^\p{Phags_Pa}/utf + \x{a878} +No match + +# Base script check +/^\p{sc=Nko}/utf + \x{7c0} + 0: \x{7c0} + +/^\p{Script=Nkoo}/utf + \x{7ff} + 0: \x{7ff} + +# Script extension check +/^\p{Nko}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Nkoo}/utf + \x{fd3f} + 0: \x{fd3f} + +# Script extension only character +/^\p{Nko}/utf + \x{fd3e} + 0: \x{fd3e} + +/^\p{sc=Nko}/utf + \x{fd3e} +No match + +# Character not in script +/^\p{Nko}/utf + \x{fd40} +No match + +# Base script check +/^\p{sc=Kayah_Li}/utf + \x{a900} + 0: \x{a900} + +/^\p{Script=Kali}/utf + \x{a92f} + 0: \x{a92f} + +# Script extension check +/^\p{Kayah_Li}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{Script_Extensions=Kali}/utf + \x{a92e} + 0: \x{a92e} + +# Script extension only character +/^\p{Kayah_Li}/utf + \x{a92e} + 0: \x{a92e} + +/^\p{sc=Kayah_Li}/utf + \x{a92e} +No match + +# Character not in script +/^\p{Kayah_Li}/utf + \x{a930} +No match + +# Base script check +/^\p{sc=Javanese}/utf + \x{a980} + 0: \x{a980} + +/^\p{Script=Java}/utf + \x{a9df} + 0: \x{a9df} + +# Script extension check +/^\p{Javanese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{scx=Java}/utf + \x{a9cf} + 0: \x{a9cf} + +# Script extension only character +/^\p{Javanese}/utf + \x{a9cf} + 0: \x{a9cf} + +/^\p{sc=Javanese}/utf + \x{a9cf} +No match + +# Character not in script +/^\p{Javanese}/utf + \x{a9e0} +No match + +# Base script check +/^\p{sc=Kaithi}/utf + \x{11080} + 0: \x{11080} + +/^\p{Script=Kthi}/utf + \x{110cd} + 0: \x{110cd} + +# Script extension check +/^\p{Kaithi}/utf + \x{966} + 0: \x{966} + +/^\p{Script_Extensions=Kthi}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Kaithi}/utf + \x{966} + 0: \x{966} + +/^\p{sc=Kaithi}/utf + \x{966} +No match + +# Character not in script +/^\p{Kaithi}/utf + \x{110ce} +No match + +# Base script check +/^\p{sc=Mandaic}/utf + \x{840} + 0: \x{840} + +/^\p{Script=Mand}/utf + \x{85e} + 0: \x{85e} + +# Script extension check +/^\p{Mandaic}/utf + \x{640} + 0: \x{640} + +/^\p{scx=Mand}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Mandaic}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Mandaic}/utf + \x{640} +No match + +# Character not in script +/^\p{Mandaic}/utf + \x{85f} +No match + +# Base script check +/^\p{sc=Chakma}/utf + \x{11100} + 0: \x{11100} + +/^\p{Script=Cakm}/utf + \x{11147} + 0: \x{11147} + +# Script extension check +/^\p{Chakma}/utf + \x{9e6} + 0: \x{9e6} + +/^\p{Script_Extensions=Cakm}/utf + \x{1049} + 0: \x{1049} + +# Script extension only character +/^\p{Chakma}/utf + \x{9e6} + 0: \x{9e6} + +/^\p{sc=Chakma}/utf + \x{9e6} +No match + +# Character not in script +/^\p{Chakma}/utf + \x{11148} +No match + +# Base script check +/^\p{sc=Sharada}/utf + \x{11180} + 0: \x{11180} + +/^\p{Script=Shrd}/utf + \x{111df} + 0: \x{111df} + +# Script extension check +/^\p{Sharada}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Shrd}/utf + \x{1ce0} + 0: \x{1ce0} + +# Script extension only character +/^\p{Sharada}/utf + \x{1cd7} + 0: \x{1cd7} + +/^\p{sc=Sharada}/utf + \x{1cd7} +No match + +# Character not in script +/^\p{Sharada}/utf + \x{111e0} +No match + +# Base script check +/^\p{sc=Takri}/utf + \x{11680} + 0: \x{11680} + +/^\p{Script=Takr}/utf + \x{116c9} + 0: \x{116c9} + +# Script extension check +/^\p{Takri}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Takr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Takri}/utf + \x{a836} + 0: \x{a836} + +/^\p{sc=Takri}/utf + \x{a836} +No match + +# Character not in script +/^\p{Takri}/utf + \x{116ca} +No match + +# Base script check +/^\p{sc=Duployan}/utf + \x{1bc00} + 0: \x{1bc00} + +/^\p{Script=Dupl}/utf + \x{1bc9f} + 0: \x{1bc9f} + +# Script extension check +/^\p{Duployan}/utf + \x{1bca0} + 0: \x{1bca0} + +/^\p{scx=Dupl}/utf + \x{1bca3} + 0: \x{1bca3} + +# Script extension only character +/^\p{Duployan}/utf + \x{1bca0} + 0: \x{1bca0} + +/^\p{sc=Duployan}/utf + \x{1bca0} +No match + +# Character not in script +/^\p{Duployan}/utf + \x{1bca4} +No match + +# Base script check +/^\p{sc=Grantha}/utf + \x{11300} + 0: \x{11300} + +/^\p{Script=Gran}/utf + \x{11374} + 0: \x{11374} + +# Script extension check +/^\p{Grantha}/utf + \x{951} + 0: \x{951} + +/^\p{Script_Extensions=Gran}/utf + \x{11fd3} + 0: \x{11fd3} + +# Script extension only character +/^\p{Grantha}/utf + \x{1cd3} + 0: \x{1cd3} + +/^\p{sc=Grantha}/utf + \x{1cd3} +No match + +# Character not in script +/^\p{Grantha}/utf + \x{11fd4} +No match + +# Base script check +/^\p{sc=Khojki}/utf + \x{11200} + 0: \x{11200} + +/^\p{Script=Khoj}/utf + \x{11241} + 0: \x{11241} + +# Script extension check +/^\p{Khojki}/utf + \x{ae6} + 0: \x{ae6} + +/^\p{scx=Khoj}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Khojki}/utf + \x{ae6} + 0: \x{ae6} + +/^\p{sc=Khojki}/utf + \x{ae6} +No match + +# Character not in script +/^\p{Khojki}/utf + \x{11242} +No match + +# Base script check +/^\p{sc=Linear_A}/utf + \x{10600} + 0: \x{10600} + +/^\p{Script=Lina}/utf + \x{10767} + 0: \x{10767} + +# Script extension check +/^\p{Linear_A}/utf + \x{10107} + 0: \x{10107} + +/^\p{Script_Extensions=Lina}/utf + \x{10133} + 0: \x{10133} + +# Script extension only character +/^\p{Linear_A}/utf + \x{10107} + 0: \x{10107} + +/^\p{sc=Linear_A}/utf + \x{10107} +No match + +# Character not in script +/^\p{Linear_A}/utf + \x{10768} +No match + +# Base script check +/^\p{sc=Mahajani}/utf + \x{11150} + 0: \x{11150} + +/^\p{Script=Mahj}/utf + \x{11176} + 0: \x{11176} + +# Script extension check +/^\p{Mahajani}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Mahj}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Mahajani}/utf + \x{966} + 0: \x{966} + +/^\p{sc=Mahajani}/utf + \x{966} +No match + +# Character not in script +/^\p{Mahajani}/utf + \x{11177} +No match + +# Base script check +/^\p{sc=Manichaean}/utf + \x{10ac0} + 0: \x{10ac0} + +/^\p{Script=Mani}/utf + \x{10af6} + 0: \x{10af6} + +# Script extension check +/^\p{Manichaean}/utf + \x{640} + 0: \x{640} + +/^\p{Script_Extensions=Mani}/utf + \x{10af2} + 0: \x{10af2} + +# Script extension only character +/^\p{Manichaean}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Manichaean}/utf + \x{640} +No match + +# Character not in script +/^\p{Manichaean}/utf + \x{10af7} +No match + +# Base script check +/^\p{sc=Modi}/utf + \x{11600} + 0: \x{11600} + +/^\p{Script=Modi}/utf + \x{11659} + 0: \x{11659} + +# Script extension check +/^\p{Modi}/utf + \x{a830} + 0: \x{a830} + +/^\p{scx=Modi}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Modi}/utf + \x{a836} + 0: \x{a836} + +/^\p{sc=Modi}/utf + \x{a836} +No match + +# Character not in script +/^\p{Modi}/utf + \x{1165a} +No match + +# Base script check +/^\p{sc=Old_Permic}/utf + \x{10350} + 0: \x{10350} + +/^\p{Script=Perm}/utf + \x{1037a} + 0: \x{1037a} + +# Script extension check +/^\p{Old_Permic}/utf + \x{483} + 0: \x{483} + +/^\p{Script_Extensions=Perm}/utf + \x{483} + 0: \x{483} + +# Script extension only character +/^\p{Old_Permic}/utf + \x{483} + 0: \x{483} + +/^\p{sc=Old_Permic}/utf + \x{483} +No match + +# Character not in script +/^\p{Old_Permic}/utf + \x{1037b} +No match + +# Base script check +/^\p{sc=Psalter_Pahlavi}/utf + \x{10b80} + 0: \x{10b80} + +/^\p{Script=Phlp}/utf + \x{10baf} + 0: \x{10baf} + +# Script extension check +/^\p{Psalter_Pahlavi}/utf + \x{640} + 0: \x{640} + +/^\p{scx=Phlp}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Psalter_Pahlavi}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Psalter_Pahlavi}/utf + \x{640} +No match + +# Character not in script +/^\p{Psalter_Pahlavi}/utf + \x{10bb0} +No match + +# Base script check +/^\p{sc=Khudawadi}/utf + \x{112b0} + 0: \x{112b0} + +/^\p{Script=Sind}/utf + \x{112f9} + 0: \x{112f9} + +# Script extension check +/^\p{Khudawadi}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Sind}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Khudawadi}/utf + \x{a836} + 0: \x{a836} + +/^\p{sc=Khudawadi}/utf + \x{a836} +No match + +# Character not in script +/^\p{Khudawadi}/utf + \x{112fa} +No match + +# Base script check +/^\p{sc=Tirhuta}/utf + \x{11480} + 0: \x{11480} + +/^\p{Script=Tirh}/utf + \x{114d9} + 0: \x{114d9} + +# Script extension check +/^\p{Tirhuta}/utf + \x{951} + 0: \x{951} + +/^\p{scx=Tirh}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Tirhuta}/utf + \x{1cf2} + 0: \x{1cf2} + +/^\p{sc=Tirhuta}/utf + \x{1cf2} +No match + +# Character not in script +/^\p{Tirhuta}/utf + \x{114da} +No match + +# Base script check +/^\p{sc=Multani}/utf + \x{11280} + 0: \x{11280} + +/^\p{Script=Mult}/utf + \x{112a9} + 0: \x{112a9} + +# Script extension check +/^\p{Multani}/utf + \x{a66} + 0: \x{a66} + +/^\p{Script_Extensions=Mult}/utf + \x{a6f} + 0: \x{a6f} + +# Script extension only character +/^\p{Multani}/utf + \x{a66} + 0: \x{a66} + +/^\p{sc=Multani}/utf + \x{a66} +No match + +# Character not in script +/^\p{Multani}/utf + \x{112aa} +No match + +# Base script check +/^\p{sc=Adlam}/utf + \x{1e900} + 0: \x{1e900} + +/^\p{Script=Adlm}/utf + \x{1e95f} + 0: \x{1e95f} + +# Script extension check +/^\p{Adlam}/utf + \x{61f} + 0: \x{61f} + +/^\p{scx=Adlm}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Adlam}/utf + \x{61f} + 0: \x{61f} + +/^\p{sc=Adlam}/utf + \x{61f} +No match + +# Character not in script +/^\p{Adlam}/utf + \x{1e960} +No match + +# Base script check +/^\p{sc=Masaram_Gondi}/utf + \x{11d00} + 0: \x{11d00} + +/^\p{Script=Gonm}/utf + \x{11d59} + 0: \x{11d59} + +# Script extension check +/^\p{Masaram_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Gonm}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Masaram_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Masaram_Gondi}/utf + \x{964} +No match + +# Character not in script +/^\p{Masaram_Gondi}/utf + \x{11d5a} +No match + +# Base script check +/^\p{sc=Dogra}/utf + \x{11800} + 0: \x{11800} + +/^\p{Script=Dogr}/utf + \x{1183b} + 0: \x{1183b} + +# Script extension check +/^\p{Dogra}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Dogr}/utf + \x{a839} + 0: \x{a839} + +# Script extension only character +/^\p{Dogra}/utf + \x{966} + 0: \x{966} + +/^\p{sc=Dogra}/utf + \x{966} +No match + +# Character not in script +/^\p{Dogra}/utf + \x{1183c} +No match + +# Base script check +/^\p{sc=Gunjala_Gondi}/utf + \x{11d60} + 0: \x{11d60} + +/^\p{Script=Gong}/utf + \x{11da9} + 0: \x{11da9} + +# Script extension check +/^\p{Gunjala_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{Script_Extensions=Gong}/utf + \x{965} + 0: \x{965} + +# Script extension only character +/^\p{Gunjala_Gondi}/utf + \x{964} + 0: \x{964} + +/^\p{sc=Gunjala_Gondi}/utf + \x{964} +No match + +# Character not in script +/^\p{Gunjala_Gondi}/utf + \x{11daa} +No match + +# Base script check +/^\p{sc=Hanifi_Rohingya}/utf + \x{10d00} + 0: \x{10d00} + +/^\p{Script=Rohg}/utf + \x{10d39} + 0: \x{10d39} + +# Script extension check +/^\p{Hanifi_Rohingya}/utf + \x{60c} + 0: \x{60c} + +/^\p{scx=Rohg}/utf + \x{6d4} + 0: \x{6d4} + +# Script extension only character +/^\p{Hanifi_Rohingya}/utf + \x{6d4} + 0: \x{6d4} + +/^\p{sc=Hanifi_Rohingya}/utf + \x{6d4} +No match + +# Character not in script +/^\p{Hanifi_Rohingya}/utf + \x{10d3a} +No match + +# Base script check +/^\p{sc=Sogdian}/utf + \x{10f30} + 0: \x{10f30} + +/^\p{Script=Sogd}/utf + \x{10f59} + 0: \x{10f59} + +# Script extension check +/^\p{Sogdian}/utf + \x{640} + 0: \x{640} + +/^\p{Script_Extensions=Sogd}/utf + \x{640} + 0: \x{640} + +# Script extension only character +/^\p{Sogdian}/utf + \x{640} + 0: \x{640} + +/^\p{sc=Sogdian}/utf + \x{640} +No match + +# Character not in script +/^\p{Sogdian}/utf + \x{10f5a} +No match + +# Base script check +/^\p{sc=Nandinagari}/utf + \x{119a0} + 0: \x{119a0} + +/^\p{Script=Nand}/utf + \x{119e4} + 0: \x{119e4} + +# Script extension check +/^\p{Nandinagari}/utf + \x{964} + 0: \x{964} + +/^\p{scx=Nand}/utf + \x{a835} + 0: \x{a835} + +# Script extension only character +/^\p{Nandinagari}/utf + \x{1cfa} + 0: \x{1cfa} + +/^\p{sc=Nandinagari}/utf + \x{1cfa} +No match + +# Character not in script +/^\p{Nandinagari}/utf + \x{119e5} +No match + +# Base script check +/^\p{sc=Yezidi}/utf + \x{10e80} + 0: \x{10e80} + +/^\p{Script=Yezi}/utf + \x{10eb1} + 0: \x{10eb1} + +# Script extension check +/^\p{Yezidi}/utf + \x{60c} + 0: \x{60c} + +/^\p{Script_Extensions=Yezi}/utf + \x{669} + 0: \x{669} + +# Script extension only character +/^\p{Yezidi}/utf + \x{660} + 0: \x{660} + +/^\p{sc=Yezidi}/utf + \x{660} +No match + +# Character not in script +/^\p{Yezidi}/utf + \x{10eb2} +No match + +# Base script check +/^\p{sc=Cypro_Minoan}/utf + \x{12f90} + 0: \x{12f90} + +/^\p{Script=Cpmn}/utf + \x{12ff2} + 0: \x{12ff2} + +# Script extension check +/^\p{Cypro_Minoan}/utf + \x{10100} + 0: \x{10100} + +/^\p{scx=Cpmn}/utf + \x{10101} + 0: \x{10101} + +# Script extension only character +/^\p{Cypro_Minoan}/utf + \x{10100} + 0: \x{10100} + +/^\p{sc=Cypro_Minoan}/utf + \x{10100} +No match + +# Character not in script +/^\p{Cypro_Minoan}/utf + \x{12ff3} +No match + +# Base script check +/^\p{sc=Old_Uyghur}/utf + \x{10f70} + 0: \x{10f70} + +/^\p{Script=Ougr}/utf + \x{10f89} + 0: \x{10f89} + +# Script extension check +/^\p{Old_Uyghur}/utf + \x{640} + 0: \x{640} + +/^\p{Script_Extensions=Ougr}/utf + \x{10af2} + 0: \x{10af2} + +# Script extension only character +/^\p{Old_Uyghur}/utf + \x{10af2} + 0: \x{10af2} + +/^\p{sc=Old_Uyghur}/utf + \x{10af2} +No match + +# Character not in script +/^\p{Old_Uyghur}/utf + \x{10f8a} +No match + +# Base script check +/^\p{sc=Common}/utf + \x{00} + 0: \x{00} + +/^\p{Script=Zyyy}/utf + \x{e007f} + 0: \x{e007f} + +# Character not in script +/^\p{Common}/utf + \x{e0080} +No match + +# Base script check +/^\p{sc=Armenian}/utf + \x{531} + 0: \x{531} + +/^\p{Script=Armn}/utf + \x{fb17} + 0: \x{fb17} + +# Character not in script +/^\p{Armenian}/utf + \x{fb18} +No match + +# Base script check +/^\p{sc=Hebrew}/utf + \x{591} + 0: \x{591} + +/^\p{Script=Hebr}/utf + \x{fb4f} + 0: \x{fb4f} + +# Character not in script +/^\p{Hebrew}/utf + \x{fb50} +No match + +# Base script check +/^\p{sc=Thai}/utf + \x{e01} + 0: \x{e01} + +/^\p{Script=Thai}/utf + \x{e5b} + 0: \x{e5b} + +# Character not in script +/^\p{Thai}/utf + \x{e5c} +No match + +# Base script check +/^\p{sc=Lao}/utf + \x{e81} + 0: \x{e81} + +/^\p{Script=Laoo}/utf + \x{edf} + 0: \x{edf} + +# Character not in script +/^\p{Lao}/utf + \x{ee0} +No match + +# Base script check +/^\p{sc=Tibetan}/utf + \x{f00} + 0: \x{f00} + +/^\p{Script=Tibt}/utf + \x{fda} + 0: \x{fda} + +# Character not in script +/^\p{Tibetan}/utf + \x{fdb} +No match + +# Base script check +/^\p{sc=Ethiopic}/utf + \x{1200} + 0: \x{1200} + +/^\p{Script=Ethi}/utf + \x{1e7fe} + 0: \x{1e7fe} + +# Character not in script +/^\p{Ethiopic}/utf + \x{1e7ff} +No match + +# Base script check +/^\p{sc=Cherokee}/utf + \x{13a0} + 0: \x{13a0} + +/^\p{Script=Cher}/utf + \x{abbf} + 0: \x{abbf} + +# Character not in script +/^\p{Cherokee}/utf + \x{abc0} +No match + +# Base script check +/^\p{sc=Canadian_Aboriginal}/utf + \x{1400} + 0: \x{1400} + +/^\p{Script=Cans}/utf + \x{11abf} + 0: \x{11abf} + +# Character not in script +/^\p{Canadian_Aboriginal}/utf + \x{11ac0} +No match + +# Base script check +/^\p{sc=Ogham}/utf + \x{1680} + 0: \x{1680} + +/^\p{Script=Ogam}/utf + \x{169c} + 0: \x{169c} + +# Character not in script +/^\p{Ogham}/utf + \x{169d} +No match + +# Base script check +/^\p{sc=Runic}/utf + \x{16a0} + 0: \x{16a0} + +/^\p{Script=Runr}/utf + \x{16f8} + 0: \x{16f8} + +# Character not in script +/^\p{Runic}/utf + \x{16f9} +No match + +# Base script check +/^\p{sc=Khmer}/utf + \x{1780} + 0: \x{1780} + +/^\p{Script=Khmr}/utf + \x{19ff} + 0: \x{19ff} + +# Character not in script +/^\p{Khmer}/utf + \x{1a00} +No match + +# Base script check +/^\p{sc=Old_Italic}/utf + \x{10300} + 0: \x{10300} + +/^\p{Script=Ital}/utf + \x{1032f} + 0: \x{1032f} + +# Character not in script +/^\p{Old_Italic}/utf + \x{10330} +No match + +# Base script check +/^\p{sc=Gothic}/utf + \x{10330} + 0: \x{10330} + +/^\p{Script=Goth}/utf + \x{1034a} + 0: \x{1034a} + +# Character not in script +/^\p{Gothic}/utf + \x{1034b} +No match + +# Base script check +/^\p{sc=Deseret}/utf + \x{10400} + 0: \x{10400} + +/^\p{Script=Dsrt}/utf + \x{1044f} + 0: \x{1044f} + +# Character not in script +/^\p{Deseret}/utf + \x{10450} +No match + +# Base script check +/^\p{sc=Inherited}/utf + \x{300} + 0: \x{300} + +/^\p{Script=Zinh}/utf + \x{e01ef} + 0: \x{e01ef} + +# Character not in script +/^\p{Inherited}/utf + \x{e01f0} +No match + +# Base script check +/^\p{sc=Ugaritic}/utf + \x{10380} + 0: \x{10380} + +/^\p{Script=Ugar}/utf + \x{1039f} + 0: \x{1039f} + +# Character not in script +/^\p{Ugaritic}/utf + \x{103a0} +No match + +# Base script check +/^\p{sc=Shavian}/utf + \x{10450} + 0: \x{10450} + +/^\p{Script=Shaw}/utf + \x{1047f} + 0: \x{1047f} + +# Character not in script +/^\p{Shavian}/utf + \x{10480} +No match + +# Base script check +/^\p{sc=Osmanya}/utf + \x{10480} + 0: \x{10480} + +/^\p{Script=Osma}/utf + \x{104a9} + 0: \x{104a9} + +# Character not in script +/^\p{Osmanya}/utf + \x{104aa} +No match + +# Base script check +/^\p{sc=Braille}/utf + \x{2800} + 0: \x{2800} + +/^\p{Script=Brai}/utf + \x{28ff} + 0: \x{28ff} + +# Character not in script +/^\p{Braille}/utf + \x{2900} +No match + +# Base script check +/^\p{sc=New_Tai_Lue}/utf + \x{1980} + 0: \x{1980} + +/^\p{Script=Talu}/utf + \x{19df} + 0: \x{19df} + +# Character not in script +/^\p{New_Tai_Lue}/utf + \x{19e0} +No match + +# Base script check +/^\p{sc=Tifinagh}/utf + \x{2d30} + 0: \x{2d30} + +/^\p{Script=Tfng}/utf + \x{2d7f} + 0: \x{2d7f} + +# Character not in script +/^\p{Tifinagh}/utf + \x{2d80} +No match + +# Base script check +/^\p{sc=Old_Persian}/utf + \x{103a0} + 0: \x{103a0} + +/^\p{Script=Xpeo}/utf + \x{103d5} + 0: \x{103d5} + +# Character not in script +/^\p{Old_Persian}/utf + \x{103d6} +No match + +# Base script check +/^\p{sc=Kharoshthi}/utf + \x{10a00} + 0: \x{10a00} + +/^\p{Script=Khar}/utf + \x{10a58} + 0: \x{10a58} + +# Character not in script +/^\p{Kharoshthi}/utf + \x{10a59} +No match + +# Base script check +/^\p{sc=Balinese}/utf + \x{1b00} + 0: \x{1b00} + +/^\p{Script=Bali}/utf + \x{1b7e} + 0: \x{1b7e} + +# Character not in script +/^\p{Balinese}/utf + \x{1b7f} +No match + +# Base script check +/^\p{sc=Cuneiform}/utf + \x{12000} + 0: \x{12000} + +/^\p{Script=Xsux}/utf + \x{12543} + 0: \x{12543} + +# Character not in script +/^\p{Cuneiform}/utf + \x{12544} +No match + +# Base script check +/^\p{sc=Phoenician}/utf + \x{10900} + 0: \x{10900} + +/^\p{Script=Phnx}/utf + \x{1091f} + 0: \x{1091f} + +# Character not in script +/^\p{Phoenician}/utf + \x{10920} +No match + +# Base script check +/^\p{sc=Sundanese}/utf + \x{1b80} + 0: \x{1b80} + +/^\p{Script=Sund}/utf + \x{1cc7} + 0: \x{1cc7} + +# Character not in script +/^\p{Sundanese}/utf + \x{1cc8} +No match + +# Base script check +/^\p{sc=Lepcha}/utf + \x{1c00} + 0: \x{1c00} + +/^\p{Script=Lepc}/utf + \x{1c4f} + 0: \x{1c4f} + +# Character not in script +/^\p{Lepcha}/utf + \x{1c50} +No match + +# Base script check +/^\p{sc=Ol_Chiki}/utf + \x{1c50} + 0: \x{1c50} + +/^\p{Script=Olck}/utf + \x{1c7f} + 0: \x{1c7f} + +# Character not in script +/^\p{Ol_Chiki}/utf + \x{1c80} +No match + +# Base script check +/^\p{sc=Vai}/utf + \x{a500} + 0: \x{a500} + +/^\p{Script=Vaii}/utf + \x{a62b} + 0: \x{a62b} + +# Character not in script +/^\p{Vai}/utf + \x{a62c} +No match + +# Base script check +/^\p{sc=Saurashtra}/utf + \x{a880} + 0: \x{a880} + +/^\p{Script=Saur}/utf + \x{a8d9} + 0: \x{a8d9} + +# Character not in script +/^\p{Saurashtra}/utf + \x{a8da} +No match + +# Base script check +/^\p{sc=Rejang}/utf + \x{a930} + 0: \x{a930} + +/^\p{Script=Rjng}/utf + \x{a95f} + 0: \x{a95f} + +# Character not in script +/^\p{Rejang}/utf + \x{a960} +No match + +# Base script check +/^\p{sc=Lycian}/utf + \x{10280} + 0: \x{10280} + +/^\p{Script=Lyci}/utf + \x{1029c} + 0: \x{1029c} + +# Character not in script +/^\p{Lycian}/utf + \x{1029d} +No match + +# Base script check +/^\p{sc=Carian}/utf + \x{102a0} + 0: \x{102a0} + +/^\p{Script=Cari}/utf + \x{102d0} + 0: \x{102d0} + +# Character not in script +/^\p{Carian}/utf + \x{102d1} +No match + +# Base script check +/^\p{sc=Lydian}/utf + \x{10920} + 0: \x{10920} + +/^\p{Script=Lydi}/utf + \x{1093f} + 0: \x{1093f} + +# Character not in script +/^\p{Lydian}/utf + \x{10940} +No match + +# Base script check +/^\p{sc=Cham}/utf + \x{aa00} + 0: \x{aa00} + +/^\p{Script=Cham}/utf + \x{aa5f} + 0: \x{aa5f} + +# Character not in script +/^\p{Cham}/utf + \x{aa60} +No match + +# Base script check +/^\p{sc=Tai_Tham}/utf + \x{1a20} + 0: \x{1a20} + +/^\p{Script=Lana}/utf + \x{1aad} + 0: \x{1aad} + +# Character not in script +/^\p{Tai_Tham}/utf + \x{1aae} +No match + +# Base script check +/^\p{sc=Tai_Viet}/utf + \x{aa80} + 0: \x{aa80} + +/^\p{Script=Tavt}/utf + \x{aadf} + 0: \x{aadf} + +# Character not in script +/^\p{Tai_Viet}/utf + \x{aae0} +No match + +# Base script check +/^\p{sc=Avestan}/utf + \x{10b00} + 0: \x{10b00} + +/^\p{Script=Avst}/utf + \x{10b3f} + 0: \x{10b3f} + +# Character not in script +/^\p{Avestan}/utf + \x{10b40} +No match + +# Base script check +/^\p{sc=Egyptian_Hieroglyphs}/utf + \x{13000} + 0: \x{13000} + +/^\p{Script=Egyp}/utf + \x{13455} + 0: \x{13455} + +# Character not in script +/^\p{Egyptian_Hieroglyphs}/utf + \x{13456} +No match + +# Base script check +/^\p{sc=Samaritan}/utf + \x{800} + 0: \x{800} + +/^\p{Script=Samr}/utf + \x{83e} + 0: \x{83e} + +# Character not in script +/^\p{Samaritan}/utf + \x{83f} +No match + +# Base script check +/^\p{sc=Lisu}/utf + \x{a4d0} + 0: \x{a4d0} + +/^\p{Script=Lisu}/utf + \x{11fb0} + 0: \x{11fb0} + +# Character not in script +/^\p{Lisu}/utf + \x{11fb1} +No match + +# Base script check +/^\p{sc=Bamum}/utf + \x{a6a0} + 0: \x{a6a0} + +/^\p{Script=Bamu}/utf + \x{16a38} + 0: \x{16a38} + +# Character not in script +/^\p{Bamum}/utf + \x{16a39} +No match + +# Base script check +/^\p{sc=Meetei_Mayek}/utf + \x{aae0} + 0: \x{aae0} + +/^\p{Script=Mtei}/utf + \x{abf9} + 0: \x{abf9} + +# Character not in script +/^\p{Meetei_Mayek}/utf + \x{abfa} +No match + +# Base script check +/^\p{sc=Imperial_Aramaic}/utf + \x{10840} + 0: \x{10840} + +/^\p{Script=Armi}/utf + \x{1085f} + 0: \x{1085f} + +# Character not in script +/^\p{Imperial_Aramaic}/utf + \x{10860} +No match + +# Base script check +/^\p{sc=Old_South_Arabian}/utf + \x{10a60} + 0: \x{10a60} + +/^\p{Script=Sarb}/utf + \x{10a7f} + 0: \x{10a7f} + +# Character not in script +/^\p{Old_South_Arabian}/utf + \x{10a80} +No match + +# Base script check +/^\p{sc=Inscriptional_Parthian}/utf + \x{10b40} + 0: \x{10b40} + +/^\p{Script=Prti}/utf + \x{10b5f} + 0: \x{10b5f} + +# Character not in script +/^\p{Inscriptional_Parthian}/utf + \x{10b60} +No match + +# Base script check +/^\p{sc=Inscriptional_Pahlavi}/utf + \x{10b60} + 0: \x{10b60} + +/^\p{Script=Phli}/utf + \x{10b7f} + 0: \x{10b7f} + +# Character not in script +/^\p{Inscriptional_Pahlavi}/utf + \x{10b80} +No match + +# Base script check +/^\p{sc=Old_Turkic}/utf + \x{10c00} + 0: \x{10c00} + +/^\p{Script=Orkh}/utf + \x{10c48} + 0: \x{10c48} + +# Character not in script +/^\p{Old_Turkic}/utf + \x{10c49} +No match + +# Base script check +/^\p{sc=Batak}/utf + \x{1bc0} + 0: \x{1bc0} + +/^\p{Script=Batk}/utf + \x{1bff} + 0: \x{1bff} + +# Character not in script +/^\p{Batak}/utf + \x{1c00} +No match + +# Base script check +/^\p{sc=Brahmi}/utf + \x{11000} + 0: \x{11000} + +/^\p{Script=Brah}/utf + \x{1107f} + 0: \x{1107f} + +# Character not in script +/^\p{Brahmi}/utf + \x{11080} +No match + +# Base script check +/^\p{sc=Meroitic_Cursive}/utf + \x{109a0} + 0: \x{109a0} + +/^\p{Script=Merc}/utf + \x{109ff} + 0: \x{109ff} + +# Character not in script +/^\p{Meroitic_Cursive}/utf + \x{10a00} +No match + +# Base script check +/^\p{sc=Meroitic_Hieroglyphs}/utf + \x{10980} + 0: \x{10980} + +/^\p{Script=Mero}/utf + \x{1099f} + 0: \x{1099f} + +# Character not in script +/^\p{Meroitic_Hieroglyphs}/utf + \x{109a0} +No match + +# Base script check +/^\p{sc=Miao}/utf + \x{16f00} + 0: \x{16f00} + +/^\p{Script=Plrd}/utf + \x{16f9f} + 0: \x{16f9f} + +# Character not in script +/^\p{Miao}/utf + \x{16fa0} +No match + +# Base script check +/^\p{sc=Sora_Sompeng}/utf + \x{110d0} + 0: \x{110d0} + +/^\p{Script=Sora}/utf + \x{110f9} + 0: \x{110f9} + +# Character not in script +/^\p{Sora_Sompeng}/utf + \x{110fa} +No match + +# Base script check +/^\p{sc=Caucasian_Albanian}/utf + \x{10530} + 0: \x{10530} + +/^\p{Script=Aghb}/utf + \x{1056f} + 0: \x{1056f} + +# Character not in script +/^\p{Caucasian_Albanian}/utf + \x{10570} +No match + +# Base script check +/^\p{sc=Bassa_Vah}/utf + \x{16ad0} + 0: \x{16ad0} + +/^\p{Script=Bass}/utf + \x{16af5} + 0: \x{16af5} + +# Character not in script +/^\p{Bassa_Vah}/utf + \x{16af6} +No match + +# Base script check +/^\p{sc=Elbasan}/utf + \x{10500} + 0: \x{10500} + +/^\p{Script=Elba}/utf + \x{10527} + 0: \x{10527} + +# Character not in script +/^\p{Elbasan}/utf + \x{10528} +No match + +# Base script check +/^\p{sc=Pahawh_Hmong}/utf + \x{16b00} + 0: \x{16b00} + +/^\p{Script=Hmng}/utf + \x{16b8f} + 0: \x{16b8f} + +# Character not in script +/^\p{Pahawh_Hmong}/utf + \x{16b90} +No match + +# Base script check +/^\p{sc=Mende_Kikakui}/utf + \x{1e800} + 0: \x{1e800} + +/^\p{Script=Mend}/utf + \x{1e8d6} + 0: \x{1e8d6} + +# Character not in script +/^\p{Mende_Kikakui}/utf + \x{1e8d7} +No match + +# Base script check +/^\p{sc=Mro}/utf + \x{16a40} + 0: \x{16a40} + +/^\p{Script=Mroo}/utf + \x{16a6f} + 0: \x{16a6f} + +# Character not in script +/^\p{Mro}/utf + \x{16a70} +No match + +# Base script check +/^\p{sc=Old_North_Arabian}/utf + \x{10a80} + 0: \x{10a80} + +/^\p{Script=Narb}/utf + \x{10a9f} + 0: \x{10a9f} + +# Character not in script +/^\p{Old_North_Arabian}/utf + \x{10aa0} +No match + +# Base script check +/^\p{sc=Nabataean}/utf + \x{10880} + 0: \x{10880} + +/^\p{Script=Nbat}/utf + \x{108af} + 0: \x{108af} + +# Character not in script +/^\p{Nabataean}/utf + \x{108b0} +No match + +# Base script check +/^\p{sc=Palmyrene}/utf + \x{10860} + 0: \x{10860} + +/^\p{Script=Palm}/utf + \x{1087f} + 0: \x{1087f} + +# Character not in script +/^\p{Palmyrene}/utf + \x{10880} +No match + +# Base script check +/^\p{sc=Pau_Cin_Hau}/utf + \x{11ac0} + 0: \x{11ac0} + +/^\p{Script=Pauc}/utf + \x{11af8} + 0: \x{11af8} + +# Character not in script +/^\p{Pau_Cin_Hau}/utf + \x{11af9} +No match + +# Base script check +/^\p{sc=Siddham}/utf + \x{11580} + 0: \x{11580} + +/^\p{Script=Sidd}/utf + \x{115dd} + 0: \x{115dd} + +# Character not in script +/^\p{Siddham}/utf + \x{115de} +No match + +# Base script check +/^\p{sc=Warang_Citi}/utf + \x{118a0} + 0: \x{118a0} + +/^\p{Script=Wara}/utf + \x{118ff} + 0: \x{118ff} + +# Character not in script +/^\p{Warang_Citi}/utf + \x{11900} +No match + +# Base script check +/^\p{sc=Ahom}/utf + \x{11700} + 0: \x{11700} + +/^\p{Script=Ahom}/utf + \x{11746} + 0: \x{11746} + +# Character not in script +/^\p{Ahom}/utf + \x{11747} +No match + +# Base script check +/^\p{sc=Anatolian_Hieroglyphs}/utf + \x{14400} + 0: \x{14400} + +/^\p{Script=Hluw}/utf + \x{14646} + 0: \x{14646} + +# Character not in script +/^\p{Anatolian_Hieroglyphs}/utf + \x{14647} +No match + +# Base script check +/^\p{sc=Hatran}/utf + \x{108e0} + 0: \x{108e0} + +/^\p{Script=Hatr}/utf + \x{108ff} + 0: \x{108ff} + +# Character not in script +/^\p{Hatran}/utf + \x{10900} +No match + +# Base script check +/^\p{sc=Old_Hungarian}/utf + \x{10c80} + 0: \x{10c80} + +/^\p{Script=Hung}/utf + \x{10cff} + 0: \x{10cff} + +# Character not in script +/^\p{Old_Hungarian}/utf + \x{10d00} +No match + +# Base script check +/^\p{sc=SignWriting}/utf + \x{1d800} + 0: \x{1d800} + +/^\p{Script=Sgnw}/utf + \x{1daaf} + 0: \x{1daaf} + +# Character not in script +/^\p{SignWriting}/utf + \x{1dab0} +No match + +# Base script check +/^\p{sc=Bhaiksuki}/utf + \x{11c00} + 0: \x{11c00} + +/^\p{Script=Bhks}/utf + \x{11c6c} + 0: \x{11c6c} + +# Character not in script +/^\p{Bhaiksuki}/utf + \x{11c6d} +No match + +# Base script check +/^\p{sc=Marchen}/utf + \x{11c70} + 0: \x{11c70} + +/^\p{Script=Marc}/utf + \x{11cb6} + 0: \x{11cb6} + +# Character not in script +/^\p{Marchen}/utf + \x{11cb7} +No match + +# Base script check +/^\p{sc=Newa}/utf + \x{11400} + 0: \x{11400} + +/^\p{Script=Newa}/utf + \x{11461} + 0: \x{11461} + +# Character not in script +/^\p{Newa}/utf + \x{11462} +No match + +# Base script check +/^\p{sc=Osage}/utf + \x{104b0} + 0: \x{104b0} + +/^\p{Script=Osge}/utf + \x{104fb} + 0: \x{104fb} + +# Character not in script +/^\p{Osage}/utf + \x{104fc} +No match + +# Base script check +/^\p{sc=Tangut}/utf + \x{16fe0} + 0: \x{16fe0} + +/^\p{Script=Tang}/utf + \x{18d08} + 0: \x{18d08} + +# Character not in script +/^\p{Tangut}/utf + \x{18d09} +No match + +# Base script check +/^\p{sc=Nushu}/utf + \x{16fe1} + 0: \x{16fe1} + +/^\p{Script=Nshu}/utf + \x{1b2fb} + 0: \x{1b2fb} + +# Character not in script +/^\p{Nushu}/utf + \x{1b2fc} +No match + +# Base script check +/^\p{sc=Soyombo}/utf + \x{11a50} + 0: \x{11a50} + +/^\p{Script=Soyo}/utf + \x{11aa2} + 0: \x{11aa2} + +# Character not in script +/^\p{Soyombo}/utf + \x{11aa3} +No match + +# Base script check +/^\p{sc=Zanabazar_Square}/utf + \x{11a00} + 0: \x{11a00} + +/^\p{Script=Zanb}/utf + \x{11a47} + 0: \x{11a47} + +# Character not in script +/^\p{Zanabazar_Square}/utf + \x{11a48} +No match + +# Base script check +/^\p{sc=Makasar}/utf + \x{11ee0} + 0: \x{11ee0} + +/^\p{Script=Maka}/utf + \x{11ef8} + 0: \x{11ef8} + +# Character not in script +/^\p{Makasar}/utf + \x{11ef9} +No match + +# Base script check +/^\p{sc=Medefaidrin}/utf + \x{16e40} + 0: \x{16e40} + +/^\p{Script=Medf}/utf + \x{16e9a} + 0: \x{16e9a} + +# Character not in script +/^\p{Medefaidrin}/utf + \x{16e9b} +No match + +# Base script check +/^\p{sc=Old_Sogdian}/utf + \x{10f00} + 0: \x{10f00} + +/^\p{Script=Sogo}/utf + \x{10f27} + 0: \x{10f27} + +# Character not in script +/^\p{Old_Sogdian}/utf + \x{10f28} +No match + +# Base script check +/^\p{sc=Elymaic}/utf + \x{10fe0} + 0: \x{10fe0} + +/^\p{Script=Elym}/utf + \x{10ff6} + 0: \x{10ff6} + +# Character not in script +/^\p{Elymaic}/utf + \x{10ff7} +No match + +# Base script check +/^\p{sc=Nyiakeng_Puachue_Hmong}/utf + \x{1e100} + 0: \x{1e100} + +/^\p{Script=Hmnp}/utf + \x{1e14f} + 0: \x{1e14f} + +# Character not in script +/^\p{Nyiakeng_Puachue_Hmong}/utf + \x{1e150} +No match + +# Base script check +/^\p{sc=Wancho}/utf + \x{1e2c0} + 0: \x{1e2c0} + +/^\p{Script=Wcho}/utf + \x{1e2ff} + 0: \x{1e2ff} + +# Character not in script +/^\p{Wancho}/utf + \x{1e300} +No match + +# Base script check +/^\p{sc=Chorasmian}/utf + \x{10fb0} + 0: \x{10fb0} + +/^\p{Script=Chrs}/utf + \x{10fcb} + 0: \x{10fcb} + +# Character not in script +/^\p{Chorasmian}/utf + \x{10fcc} +No match + +# Base script check +/^\p{sc=Dives_Akuru}/utf + \x{11900} + 0: \x{11900} + +/^\p{Script=Diak}/utf + \x{11959} + 0: \x{11959} + +# Character not in script +/^\p{Dives_Akuru}/utf + \x{1195a} +No match + +# Base script check +/^\p{sc=Khitan_Small_Script}/utf + \x{16fe4} + 0: \x{16fe4} + +/^\p{Script=Kits}/utf + \x{18cd5} + 0: \x{18cd5} + +# Character not in script +/^\p{Khitan_Small_Script}/utf + \x{18cd6} +No match + +# Base script check +/^\p{sc=Tangsa}/utf + \x{16a70} + 0: \x{16a70} + +/^\p{Script=Tnsa}/utf + \x{16ac9} + 0: \x{16ac9} + +# Character not in script +/^\p{Tangsa}/utf + \x{16aca} +No match + +# Base script check +/^\p{sc=Toto}/utf + \x{1e290} + 0: \x{1e290} + +/^\p{Script=Toto}/utf + \x{1e2ae} + 0: \x{1e2ae} + +# Character not in script +/^\p{Toto}/utf + \x{1e2af} +No match + +# Base script check +/^\p{sc=Vithkuqi}/utf + \x{10570} + 0: \x{10570} + +/^\p{Script=Vith}/utf + \x{105bc} + 0: \x{105bc} + +# Character not in script +/^\p{Vithkuqi}/utf + \x{105bd} +No match + +# Base script check +/^\p{sc=Kawi}/utf + \x{11f00} + 0: \x{11f00} + +/^\p{Script=Kawi}/utf + \x{11f59} + 0: \x{11f59} + +# Character not in script +/^\p{Kawi}/utf + \x{11f5a} +No match + +# Base script check +/^\p{sc=Nag_Mundari}/utf + \x{1e4d0} + 0: \x{1e4d0} + +/^\p{Script=Nagm}/utf + \x{1e4f9} + 0: \x{1e4f9} + +# Character not in script +/^\p{Nag_Mundari}/utf + \x{1e4fa} +No match + +# End of testinput26 diff --git a/testdata/testoutput3 b/testdata/testoutput3 new file mode 100644 index 0000000..717fa45 --- /dev/null +++ b/testdata/testoutput3 @@ -0,0 +1,170 @@ +# This set of tests checks local-specific features, using the "fr_FR" locale. +# It is not Perl-compatible. When run via RunTest, the locale is edited to +# be whichever of "fr_FR", "french", or "fr" is found to exist. There is +# different version of this file called wintestinput3 for use on Windows, +# where the locale is called "french" and the tests are run using +# RunTest.bat. + +#forbid_utf + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\w]+/locale=fr_FR + École + 0: École + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\W]+/ + École + 0: \xc9 + +/^[\W]+/locale=fr_FR +\= Expect no match + École +No match + +/[\b]/ + \b + 0: \x08 +\= Expect no match + a +No match + +/[\b]/locale=fr_FR + \b + 0: \x08 +\= Expect no match + a +No match + +/^\w+/ +\= Expect no match + École +No match + +/^\w+/locale=fr_FR + École + 0: École + +/(.+)\b(.+)/ + École + 0: \xc9cole + 1: \xc9 + 2: cole + +/(.+)\b(.+)/locale=fr_FR +\= Expect no match + École +No match + +/École/i + École + 0: \xc9cole +\= Expect no match + école +No match + +/École/i,locale=fr_FR + École + 0: École + école + 0: école + +/\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/\w/I,locale=fr_FR +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 1 + +# All remaining tests are in the fr_FR locale, so set the default. + +#pattern locale=fr_FR + +/^[\xc8-\xc9]/i + École + 0: É + école + 0: é + +/^[\xc8-\xc9]/ + École + 0: É +\= Expect no match + école +No match + +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + +/\W+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[\W]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[^[:alpha:]]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/\w+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[\w]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]][[:lower:]][[:upper:]]/IB +------------------------------------------------------------------ + Bra + [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + [a-z\xb5\xdf-\xf6\xf8-\xff] + [A-Z\xc0-\xd6\xd8-\xde] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 3 + +# End of testinput3 diff --git a/testdata/testoutput3A b/testdata/testoutput3A new file mode 100644 index 0000000..53f6894 --- /dev/null +++ b/testdata/testoutput3A @@ -0,0 +1,170 @@ +# This set of tests checks local-specific features, using the "fr_FR" locale. +# It is not Perl-compatible. When run via RunTest, the locale is edited to +# be whichever of "fr_FR", "french", or "fr" is found to exist. There is +# different version of this file called wintestinput3 for use on Windows, +# where the locale is called "french" and the tests are run using +# RunTest.bat. + +#forbid_utf + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\w]+/locale=fr_FR + École + 0: École + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\W]+/ + École + 0: \xc9 + +/^[\W]+/locale=fr_FR +\= Expect no match + École +No match + +/[\b]/ + \b + 0: \x08 +\= Expect no match + a +No match + +/[\b]/locale=fr_FR + \b + 0: \x08 +\= Expect no match + a +No match + +/^\w+/ +\= Expect no match + École +No match + +/^\w+/locale=fr_FR + École + 0: École + +/(.+)\b(.+)/ + École + 0: \xc9cole + 1: \xc9 + 2: cole + +/(.+)\b(.+)/locale=fr_FR +\= Expect no match + École +No match + +/École/i + École + 0: \xc9cole +\= Expect no match + école +No match + +/École/i,locale=fr_FR + École + 0: École + école + 0: école + +/\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/\w/I,locale=fr_FR +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 1 + +# All remaining tests are in the fr_FR locale, so set the default. + +#pattern locale=fr_FR + +/^[\xc8-\xc9]/i + École + 0: É + école + 0: é + +/^[\xc8-\xc9]/ + École + 0: É +\= Expect no match + école +No match + +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + +/\W+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[\W]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[^[:alpha:]]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/\w+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[\w]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]][[:lower:]][[:upper:]]/IB +------------------------------------------------------------------ + Bra + [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + [a-z\xaa\xb5\xba\xdf-\xf6\xf8-\xff] + [A-Z\xc0-\xd6\xd8-\xde] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 3 + +# End of testinput3 diff --git a/testdata/testoutput3B b/testdata/testoutput3B new file mode 100644 index 0000000..1e22efe --- /dev/null +++ b/testdata/testoutput3B @@ -0,0 +1,170 @@ +# This set of tests checks local-specific features, using the "fr_FR" locale. +# It is not Perl-compatible. When run via RunTest, the locale is edited to +# be whichever of "fr_FR", "french", or "fr" is found to exist. There is +# different version of this file called wintestinput3 for use on Windows, +# where the locale is called "french" and the tests are run using +# RunTest.bat. + +#forbid_utf + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\w]+/locale=fr_FR + École + 0: École + +/^[\w]+/ +\= Expect no match + École +No match + +/^[\W]+/ + École + 0: \xc9 + +/^[\W]+/locale=fr_FR +\= Expect no match + École +No match + +/[\b]/ + \b + 0: \x08 +\= Expect no match + a +No match + +/[\b]/locale=fr_FR + \b + 0: \x08 +\= Expect no match + a +No match + +/^\w+/ +\= Expect no match + École +No match + +/^\w+/locale=fr_FR + École + 0: École + +/(.+)\b(.+)/ + École + 0: \xc9cole + 1: \xc9 + 2: cole + +/(.+)\b(.+)/locale=fr_FR +\= Expect no match + École +No match + +/École/i + École + 0: \xc9cole +\= Expect no match + école +No match + +/École/i,locale=fr_FR + École + 0: École + école + 0: école + +/\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/\w/I,locale=fr_FR +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â + ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 1 + +# All remaining tests are in the fr_FR locale, so set the default. + +#pattern locale=fr_FR + +/^[\xc8-\xc9]/i + École + 0: É + école + 0: é + +/^[\xc8-\xc9]/ + École + 0: É +\= Expect no match + école +No match + +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + +/\W+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[\W]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[^[:alpha:]]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/\w+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[\w]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]][[:lower:]][[:upper:]]/IB +------------------------------------------------------------------ + Bra + [A-Za-z\x83\x8a\x8c\x8e\x9a\x9c\x9e\x9f\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + [a-z\x83\x9a\x9c\x9e\xaa\xb5\xba\xdf-\xf6\xf8-\xff] + [A-Z\x8a\x8c\x8e\x9f\xc0-\xd6\xd8-\xde] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ª µ º À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í + î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 3 + +# End of testinput3 diff --git a/testdata/testoutput4 b/testdata/testoutput4 new file mode 100644 index 0000000..5917ebb --- /dev/null +++ b/testdata/testoutput4 @@ -0,0 +1,4605 @@ +# This set of tests is for UTF support, including Unicode properties. The +# Unicode tests are all compatible with all versions of Perl >= 5.10, but +# some of the property tests may differ because of different versions of +# Unicode in use by PCRE2 and Perl. + +# WARNING: Use only / as the pattern delimiter. Although pcre2test supports +# a number of delimiters, all those other than / give problems with the +# perltest.sh script. + +#newline_default lf anycrlf any +#perltest + +/a.b/utf + acb + 0: acb + a\x7fb + 0: a\x{7f}b + a\x{100}b + 0: a\x{100}b +\= Expect no match + a\nb +No match + +/a(.{3})b/utf + a\x{4000}xyb + 0: a\x{4000}xyb + 1: \x{4000}xy + a\x{4000}\x7fyb + 0: a\x{4000}\x{7f}yb + 1: \x{4000}\x{7f}y + a\x{4000}\x{100}yb + 0: a\x{4000}\x{100}yb + 1: \x{4000}\x{100}y +\= Expect no match + a\x{4000}b +No match + ac\ncb +No match + +/a(.*?)(.)/ + a\xc0\x88b + 0: a\xc0 + 1: + 2: \xc0 + +/a(.*?)(.)/utf + a\x{100}b + 0: a\x{100} + 1: + 2: \x{100} + +/a(.*)(.)/ + a\xc0\x88b + 0: a\xc0\x88b + 1: \xc0\x88 + 2: b + +/a(.*)(.)/utf + a\x{100}b + 0: a\x{100}b + 1: \x{100} + 2: b + +/a(.)(.)/ + a\xc0\x92bcd + 0: a\xc0\x92 + 1: \xc0 + 2: \x92 + +/a(.)(.)/utf + a\x{240}bcd + 0: a\x{240}b + 1: \x{240} + 2: b + +/a(.?)(.)/ + a\xc0\x92bcd + 0: a\xc0\x92 + 1: \xc0 + 2: \x92 + +/a(.?)(.)/utf + a\x{240}bcd + 0: a\x{240}b + 1: \x{240} + 2: b + +/a(.??)(.)/ + a\xc0\x92bcd + 0: a\xc0 + 1: + 2: \xc0 + +/a(.??)(.)/utf + a\x{240}bcd + 0: a\x{240} + 1: + 2: \x{240} + +/a(.{3})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + 1: \x{1234}xy + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + 1: \x{1234}\x{4321}y + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + 1: \x{1234}\x{4321}\x{3412} +\= Expect no match + a\x{1234}b +No match + ac\ncb +No match + +/a(.{3,})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + 1: \x{1234}xy + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + 1: \x{1234}\x{4321}y + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + 1: \x{1234}\x{4321}\x{3412} + axxxxbcdefghijb + 0: axxxxbcdefghijb + 1: xxxxbcdefghij + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + 1: \x{1234}\x{4321}\x{3412}\x{3421} +\= Expect no match + a\x{1234}b +No match + +/a(.{3,}?)b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + 1: \x{1234}xy + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + 1: \x{1234}\x{4321}y + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + 1: \x{1234}\x{4321}\x{3412} + axxxxbcdefghijb + 0: axxxxb + 1: xxxx + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + 1: \x{1234}\x{4321}\x{3412}\x{3421} +\= Expect no match + a\x{1234}b +No match + +/a(.{3,5})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + 1: \x{1234}xy + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + 1: \x{1234}\x{4321}y + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + 1: \x{1234}\x{4321}\x{3412} + axxxxbcdefghijb + 0: axxxxb + 1: xxxx + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + 1: \x{1234}\x{4321}\x{3412}\x{3421} + axbxxbcdefghijb + 0: axbxxb + 1: xbxx + axxxxxbcdefghijb + 0: axxxxxb + 1: xxxxx +\= Expect no match + a\x{1234}b +No match + axxxxxxbcdefghijb +No match + +/a(.{3,5}?)b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + 1: \x{1234}xy + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + 1: \x{1234}\x{4321}y + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + 1: \x{1234}\x{4321}\x{3412} + axxxxbcdefghijb + 0: axxxxb + 1: xxxx + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + 1: \x{1234}\x{4321}\x{3412}\x{3421} + axbxxbcdefghijb + 0: axbxxb + 1: xbxx + axxxxxbcdefghijb + 0: axxxxxb + 1: xxxxx +\= Expect no match + a\x{1234}b +No match + axxxxxxbcdefghijb +No match + +/^[a\x{c0}]/utf +\= Expect no match + \x{100} +No match + +/(?<=aXb)cd/utf + aXbcd + 0: cd + +/(?<=a\x{100}b)cd/utf + a\x{100}bcd + 0: cd + +/(?<=a\x{100000}b)cd/utf + a\x{100000}bcd + 0: cd + +/(?:\x{100}){3}b/utf + \x{100}\x{100}\x{100}b + 0: \x{100}\x{100}\x{100}b +\= Expect no match + \x{100}\x{100}b +No match + +/\x{ab}/utf + \x{ab} + 0: \x{ab} + \xc2\xab + 0: \x{ab} +\= Expect no match + \x00{ab} +No match + +/(?<=(.))X/utf + WXYZ + 0: X + 1: W + \x{256}XYZ + 0: X + 1: \x{256} +\= Expect no match + XYZ +No match + +/[^a]+/g,utf + bcd + 0: bcd + \x{100}aY\x{256}Z + 0: \x{100} + 0: Y\x{256}Z + +/^[^a]{2}/utf + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/utf + \x{100}bcAa + 0: \x{100}bcA + +/^[^a]{2,}?/utf + \x{100}bca + 0: \x{100}b + +/[^a]+/gi,utf + bcd + 0: bcd + \x{100}aY\x{256}Z + 0: \x{100} + 0: Y\x{256}Z + +/^[^a]{2}/i,utf + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/i,utf + \x{100}bcAa + 0: \x{100}bc + +/^[^a]{2,}?/i,utf + \x{100}bca + 0: \x{100}b + +/\x{100}{0,0}/utf + abcd + 0: + +/\x{100}?/utf + abcd + 0: + \x{100}\x{100} + 0: \x{100} + +/\x{100}{0,3}/utf + \x{100}\x{100} + 0: \x{100}\x{100} + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}*/utf + abce + 0: + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,1}/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100} + +/\x{100}{1,3}/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}+/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{3}/utf + abcd\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100} + +/\x{100}{3,5}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100} + +/\x{100}{3,}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/(?<=a\x{100}{2}b)X/utf,aftertext + Xyyya\x{100}\x{100}bXzzz + 0: X + 0+ zzz + +/\D*/utf + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\D*/utf + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/\D/utf + 1X2 + 0: X + 1\x{100}2 + 0: \x{100} + +/>\S/utf + > >X Y + 0: >X + > >\x{100} Y + 0: >\x{100} + +/\d/utf + \x{100}3 + 0: 3 + +/\s/utf + \x{100} X + 0: + +/\D+/utf + 12abcd34 + 0: abcd +\= Expect no match + 1234 +No match + +/\D{2,3}/utf + 12abcd34 + 0: abc + 12ab34 + 0: ab +\= Expect no match + 1234 +No match + 12a34 +No match + +/\D{2,3}?/utf + 12abcd34 + 0: ab + 12ab34 + 0: ab +\= Expect no match + 1234 +No match + 12a34 +No match + +/\d+/utf + 12abcd34 + 0: 12 + +/\d{2,3}/utf + 12abcd34 + 0: 12 + 1234abcd + 0: 123 +\= Expect no match + 1.4 +No match + +/\d{2,3}?/utf + 12abcd34 + 0: 12 + 1234abcd + 0: 12 +\= Expect no match + 1.4 +No match + +/\S+/utf + 12abcd34 + 0: 12abcd34 +\= Expect no match + \ \ +No match + +/\S{2,3}/utf + 12abcd34 + 0: 12a + 1234abcd + 0: 123 +\= Expect no match + \ \ +No match + +/\S{2,3}?/utf + 12abcd34 + 0: 12 + 1234abcd + 0: 12 +\= Expect no match + \ \ +No match + +/>\s+ <34 + 0: > < + 0+ 34 + +/>\s{2,3} < + 0+ cd + ab> < + 0+ ce +\= Expect no match + ab> \s{2,3}? < + 0+ cd + ab> < + 0+ ce +\= Expect no match + ab> \xff< + 0: \xff + +/[\xff]/utf + >\x{ff}< + 0: \x{ff} + +/[^\xFF]/ + XYZ + 0: X + +/[^\xff]/utf + XYZ + 0: X + \x{123} + 0: \x{123} + +/^[ac]*b/utf +\= Expect no match + xb +No match + +/^[ac\x{100}]*b/utf +\= Expect no match + xb +No match + +/^[^x]*b/i,utf +\= Expect no match + xb +No match + +/^[^x]*b/utf +\= Expect no match + xb +No match + +/^\d*b/utf +\= Expect no match + xb +No match + +/(|a)/g,utf + catac + 0: + 1: + 0: + 1: + 0: a + 1: a + 0: + 1: + 0: + 1: + 0: a + 1: a + 0: + 1: + 0: + 1: + a\x{256}a + 0: + 1: + 0: a + 1: a + 0: + 1: + 0: + 1: + 0: a + 1: a + 0: + 1: + +/^\x{85}$/i,utf + \x{85} + 0: \x{85} + +/^ሴ/utf + ሴ + 0: \x{1234} + +/^\ሴ/utf + ሴ + 0: \x{1234} + +/(?s)(.{1,5})/utf + abcdefg + 0: abcde + 1: abcde + ab + 0: ab + 1: ab + +/a*\x{100}*\w/utf + a + 0: a + +/\S\S/g,utf + A\x{a3}BC + 0: A\x{a3} + 0: BC + +/\S{2}/g,utf + A\x{a3}BC + 0: A\x{a3} + 0: BC + +/\W\W/g,utf + +\x{a3}== + 0: +\x{a3} + 0: == + +/\W{2}/g,utf + +\x{a3}== + 0: +\x{a3} + 0: == + +/\S/g,utf + \x{442}\x{435}\x{441}\x{442} + 0: \x{442} + 0: \x{435} + 0: \x{441} + 0: \x{442} + +/[\S]/g,utf + \x{442}\x{435}\x{441}\x{442} + 0: \x{442} + 0: \x{435} + 0: \x{441} + 0: \x{442} + +/\D/g,utf + \x{442}\x{435}\x{441}\x{442} + 0: \x{442} + 0: \x{435} + 0: \x{441} + 0: \x{442} + +/[\D]/g,utf + \x{442}\x{435}\x{441}\x{442} + 0: \x{442} + 0: \x{435} + 0: \x{441} + 0: \x{442} + +/\W/g,utf + \x{2442}\x{2435}\x{2441}\x{2442} + 0: \x{2442} + 0: \x{2435} + 0: \x{2441} + 0: \x{2442} + +/[\W]/g,utf + \x{2442}\x{2435}\x{2441}\x{2442} + 0: \x{2442} + 0: \x{2435} + 0: \x{2441} + 0: \x{2442} + +/[\S\s]*/utf + abc\n\r\x{442}\x{435}\x{441}\x{442}xyz + 0: abc\x{0a}\x{0d}\x{442}\x{435}\x{441}\x{442}xyz + +/[\x{41f}\S]/g,utf + \x{442}\x{435}\x{441}\x{442} + 0: \x{442} + 0: \x{435} + 0: \x{441} + 0: \x{442} + +/.[^\S]./g,utf + abc def\x{442}\x{443}xyz\npqr + 0: c d + 0: z\x{0a}p + +/.[^\S\n]./g,utf + abc def\x{442}\x{443}xyz\npqr + 0: c d + +/[[:^alnum:]]/g,utf + +\x{2442} + 0: + + 0: \x{2442} + +/[[:^alpha:]]/g,utf + +\x{2442} + 0: + + 0: \x{2442} + +/[[:^ascii:]]/g,utf + A\x{442} + 0: \x{442} + +/[[:^blank:]]/g,utf + A\x{442} + 0: A + 0: \x{442} + +/[[:^cntrl:]]/g,utf + A\x{442} + 0: A + 0: \x{442} + +/[[:^digit:]]/g,utf + A\x{442} + 0: A + 0: \x{442} + +/[[:^graph:]]/g,utf + \x19\x{e01ff} + 0: \x{19} + 0: \x{e01ff} + +/[[:^lower:]]/g,utf + A\x{422} + 0: A + 0: \x{422} + +/[[:^print:]]/g,utf + \x{19}\x{e01ff} + 0: \x{19} + 0: \x{e01ff} + +/[[:^punct:]]/g,utf + A\x{442} + 0: A + 0: \x{442} + +/[[:^space:]]/g,utf + A\x{442} + 0: A + 0: \x{442} + +/[[:^upper:]]/g,utf + a\x{442} + 0: a + 0: \x{442} + +/[[:^word:]]/g,utf + +\x{2442} + 0: + + 0: \x{2442} + +/[[:^xdigit:]]/g,utf + M\x{442} + 0: M + 0: \x{442} + +/[^ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĀĂĄĆĈĊČĎĐĒĔĖĘĚĜĞĠĢĤĦĨĪĬĮİIJĴĶĹĻĽĿŁŃŅŇŊŌŎŐŒŔŖŘŚŜŞŠŢŤŦŨŪŬŮŰŲŴŶŸŹŻŽƁƂƄƆƇƉƊƋƎƏƐƑƓƔƖƗƘƜƝƟƠƢƤƦƧƩƬƮƯƱƲƳƵƷƸƼDŽLJNJǍǏǑǓǕǗǙǛǞǠǢǤǦǨǪǬǮDZǴǶǷǸǺǼǾȀȂȄȆȈȊȌȎȐȒȔȖȘȚȜȞȠȢȤȦȨȪȬȮȰȲȺȻȽȾɁΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫϒϓϔϘϚϜϞϠϢϤϦϨϪϬϮϴϷϹϺϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯѠѢѤѦѨѪѬѮѰѲѴѶѸѺѼѾҀҊҌҎҐҒҔҖҘҚҜҞҠҢҤҦҨҪҬҮҰҲҴҶҸҺҼҾӀӁӃӅӇӉӋӍӐӒӔӖӘӚӜӞӠӢӤӦӨӪӬӮӰӲӴӶӸԀԂԄԆԈԊԌԎԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅḀḂḄḆḈḊḌḎḐḒḔḖḘḚḜḞḠḢḤḦḨḪḬḮḰḲḴḶḸḺḼḾṀṂṄṆṈṊṌṎṐṒṔṖṘṚṜṞṠṢṤṦṨṪṬṮṰṲṴṶṸṺṼṾẀẂẄẆẈẊẌẎẐẒẔẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼẾỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪỬỮỰỲỴỶỸἈἉἊἋἌἍἎἏἘἙἚἛἜἝἨἩἪἫἬἭἮἯἸἹἺἻἼἽἾἿὈὉὊὋὌὍὙὛὝὟὨὩὪὫὬὭὮὯᾸᾹᾺΆῈΈῊΉῘῙῚΊῨῩῪΎῬῸΌῺΏabcdefghijklmnopqrstuvwxyzªµºßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķĸĺļľŀłńņňʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżžſƀƃƅƈƌƍƒƕƙƚƛƞơƣƥƨƪƫƭưƴƶƹƺƽƾƿdžljnjǎǐǒǔǖǘǚǜǝǟǡǣǥǧǩǫǭǯǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳȴȵȶȷȸȹȼȿɀɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿʀʁʂʃʄʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʘʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧʨʩʪʫʬʭʮʯΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϕϖϗϙϛϝϟϡϣϥϧϩϫϭϯϰϱϲϳϵϸϻϼабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹԁԃԅԇԉԋԍԏաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևᴀᴁᴂᴃᴄᴅᴆᴇᴈᴉᴊᴋᴌᴍᴎᴏᴐᴑᴒᴓᴔᴕᴖᴗᴘᴙᴚᴛᴜᴝᴞᴟᴠᴡᴢᴣᴤᴥᴦᴧᴨᴩᴪᴫᵢᵣᵤᵥᵦᵧᵨᵩᵪᵫᵬᵭᵮᵯᵰᵱᵲᵳᵴᵵᵶᵷᵹᵺᵻᵼᵽᵾᵿᶀᶁᶂᶃᶄᶅᶆᶇᶈᶉᶊᶋᶌᶍᶎᶏᶐᶑᶒᶓᶔᶕᶖᶗᶘᶙᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕẖẗẘẙẚẛạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰάὲέὴήὶίὸόὺύὼώᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾰᾱᾲᾳᾴᾶᾷιῂῃῄῆῇῐῑῒΐῖῗῠῡῢΰῤῥῦῧῲῳῴῶῷⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳤⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥfffiflffifflſtstﬓﬔﬕﬖﬗ\d_^]/utf + +/^[^d]*?$/ + abc + 0: abc + +/^[^d]*?$/utf + abc + 0: abc + +/^[^d]*?$/i + abc + 0: abc + +/^[^d]*?$/i,utf + abc + 0: abc + +/(?i)[\xc3\xa9\xc3\xbd]|[\xc3\xa9\xc3\xbdA]/utf + +/^[a\x{c0}]b/utf + \x{c0}b + 0: \x{c0}b + +/^([a\x{c0}]*?)aa/utf + a\x{c0}aaaa/ + 0: a\x{c0}aa + 1: a\x{c0} + +/^([a\x{c0}]*?)aa/utf + a\x{c0}aaaa/ + 0: a\x{c0}aa + 1: a\x{c0} + a\x{c0}a\x{c0}aaa/ + 0: a\x{c0}a\x{c0}aa + 1: a\x{c0}a\x{c0} + +/^([a\x{c0}]*)aa/utf + a\x{c0}aaaa/ + 0: a\x{c0}aaaa + 1: a\x{c0}aa + a\x{c0}a\x{c0}aaa/ + 0: a\x{c0}a\x{c0}aaa + 1: a\x{c0}a\x{c0}a + +/^([a\x{c0}]*)a\x{c0}/utf + a\x{c0}aaaa/ + 0: a\x{c0} + 1: + a\x{c0}a\x{c0}aaa/ + 0: a\x{c0}a\x{c0} + 1: a\x{c0} + +/A*/g,utf + AAB\x{123}BAA + 0: AA + 0: + 0: + 0: + 0: AA + 0: + +/(abc)\1/i,utf +\= Expect no match + abc +No match + +/(abc)\1/utf +\= Expect no match + abc +No match + +/a(*:a\x{1234}b)/utf,mark + abc + 0: a +MK: a\x{1234}b + +/a(*:a£b)/utf,mark + abc + 0: a +MK: a\x{a3}b + +# Noncharacters + +/./utf + \x{fffe} + 0: \x{fffe} + \x{ffff} + 0: \x{ffff} + \x{1fffe} + 0: \x{1fffe} + \x{1ffff} + 0: \x{1ffff} + \x{2fffe} + 0: \x{2fffe} + \x{2ffff} + 0: \x{2ffff} + \x{3fffe} + 0: \x{3fffe} + \x{3ffff} + 0: \x{3ffff} + \x{4fffe} + 0: \x{4fffe} + \x{4ffff} + 0: \x{4ffff} + \x{5fffe} + 0: \x{5fffe} + \x{5ffff} + 0: \x{5ffff} + \x{6fffe} + 0: \x{6fffe} + \x{6ffff} + 0: \x{6ffff} + \x{7fffe} + 0: \x{7fffe} + \x{7ffff} + 0: \x{7ffff} + \x{8fffe} + 0: \x{8fffe} + \x{8ffff} + 0: \x{8ffff} + \x{9fffe} + 0: \x{9fffe} + \x{9ffff} + 0: \x{9ffff} + \x{afffe} + 0: \x{afffe} + \x{affff} + 0: \x{affff} + \x{bfffe} + 0: \x{bfffe} + \x{bffff} + 0: \x{bffff} + \x{cfffe} + 0: \x{cfffe} + \x{cffff} + 0: \x{cffff} + \x{dfffe} + 0: \x{dfffe} + \x{dffff} + 0: \x{dffff} + \x{efffe} + 0: \x{efffe} + \x{effff} + 0: \x{effff} + \x{ffffe} + 0: \x{ffffe} + \x{fffff} + 0: \x{fffff} + \x{10fffe} + 0: \x{10fffe} + \x{10ffff} + 0: \x{10ffff} + \x{fdd0} + 0: \x{fdd0} + \x{fdd1} + 0: \x{fdd1} + \x{fdd2} + 0: \x{fdd2} + \x{fdd3} + 0: \x{fdd3} + \x{fdd4} + 0: \x{fdd4} + \x{fdd5} + 0: \x{fdd5} + \x{fdd6} + 0: \x{fdd6} + \x{fdd7} + 0: \x{fdd7} + \x{fdd8} + 0: \x{fdd8} + \x{fdd9} + 0: \x{fdd9} + \x{fdda} + 0: \x{fdda} + \x{fddb} + 0: \x{fddb} + \x{fddc} + 0: \x{fddc} + \x{fddd} + 0: \x{fddd} + \x{fdde} + 0: \x{fdde} + \x{fddf} + 0: \x{fddf} + \x{fde0} + 0: \x{fde0} + \x{fde1} + 0: \x{fde1} + \x{fde2} + 0: \x{fde2} + \x{fde3} + 0: \x{fde3} + \x{fde4} + 0: \x{fde4} + \x{fde5} + 0: \x{fde5} + \x{fde6} + 0: \x{fde6} + \x{fde7} + 0: \x{fde7} + \x{fde8} + 0: \x{fde8} + \x{fde9} + 0: \x{fde9} + \x{fdea} + 0: \x{fdea} + \x{fdeb} + 0: \x{fdeb} + \x{fdec} + 0: \x{fdec} + \x{fded} + 0: \x{fded} + \x{fdee} + 0: \x{fdee} + \x{fdef} + 0: \x{fdef} + +/^\d*\w{4}/utf + 1234 + 0: 1234 +\= Expect no match + 123 +No match + +/^[^b]*\w{4}/utf + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/^[^b]*\w{4}/i,utf + aaaa + 0: aaaa +\= Expect no match + aaa +No match + +/^\x{100}*.{4}/utf + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} +\= Expect no match + \x{100}\x{100}\x{100} +No match + +/^\x{100}*.{4}/i,utf + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} +\= Expect no match + \x{100}\x{100}\x{100} +No match + +/^a+[a\x{200}]/utf + aa + 0: aa + +/^.\B.\B./utf + \x{10123}\x{10124}\x{10125} + 0: \x{10123}\x{10124}\x{10125} + +/^#[^\x{ffff}]#[^\x{ffff}]#[^\x{ffff}]#/utf + #\x{10000}#\x{100}#\x{10ffff}# + 0: #\x{10000}#\x{100}#\x{10ffff}# + +# Unicode property support tests + +/^\pC\pL\pM\pN\pP\pS\pZ\s+/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + 0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}\x{0b} + +/^>\pZ+/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + 0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f} + +/^>[[:space:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} + 0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}\x{0b} + +/^>[[:blank:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + 0: > \x{a0}\x{1680}\x{2000}\x{202f}\x{09} + +/^[[:alpha:]]*/utf,ucp + Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d} + 0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d} + +/^[[:alnum:]]*/utf,ucp + Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee} + 0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee} + +/^[[:cntrl:]]*/utf,ucp + \x{0}\x{09}\x{1f}\x{7f}\x{9f} + 0: \x{00}\x{09}\x{1f}\x{7f}\x{9f} + +/^[[:graph:]]*/utf,ucp + A\x{a1}\x{a0} + 0: A\x{a1} + +/^[[:print:]]*/utf,ucp + A z\x{a0}\x{a1} + 0: A z\x{a0}\x{a1} + +/^[[:punct:]]*/utf,ucp + .+\x{a1}\x{a0} + 0: .+\x{a1} + +/\p{Zs}*?\R/ +\= Expect no match + a\xFCb +No match + +/\p{Zs}*\R/ +\= Expect no match + a\xFCb +No match + +/â±¥/i,utf + â±¥ + 0: \x{2c65} + Ⱥx + 0: \x{23a} + Ⱥ + 0: \x{23a} + +/[â±¥]/i,utf + â±¥ + 0: \x{2c65} + Ⱥx + 0: \x{23a} + Ⱥ + 0: \x{23a} + +/Ⱥ/i,utf + Ⱥ + 0: \x{23a} + â±¥ + 0: \x{2c65} + +# These are tests for extended grapheme clusters + +/^\X/utf,aftertext + G\x{34e}\x{34e}X + 0: G\x{34e}\x{34e} + 0+ X + \x{34e}\x{34e}X + 0: \x{34e}\x{34e} + 0+ X + \x04X + 0: \x{04} + 0+ X + \x{1100}X + 0: \x{1100} + 0+ X + \x{1100}\x{34e}X + 0: \x{1100}\x{34e} + 0+ X + \x{1b04}\x{1b04}X + 0: \x{1b04}\x{1b04} + 0+ X + *These match up to the roman letters + 0: * + 0+ These match up to the roman letters + \x{1111}\x{1111}L,L + 0: \x{1111}\x{1111} + 0+ L,L + \x{1111}\x{1111}\x{1169}L,L,V + 0: \x{1111}\x{1111}\x{1169} + 0+ L,L,V + \x{1111}\x{ae4c}L, LV + 0: \x{1111}\x{ae4c} + 0+ L, LV + \x{1111}\x{ad89}L, LVT + 0: \x{1111}\x{ad89} + 0+ L, LVT + \x{1111}\x{ae4c}\x{1169}L, LV, V + 0: \x{1111}\x{ae4c}\x{1169} + 0+ L, LV, V + \x{1111}\x{ae4c}\x{1169}\x{1169}L, LV, V, V + 0: \x{1111}\x{ae4c}\x{1169}\x{1169} + 0+ L, LV, V, V + \x{1111}\x{ae4c}\x{1169}\x{11fe}L, LV, V, T + 0: \x{1111}\x{ae4c}\x{1169}\x{11fe} + 0+ L, LV, V, T + \x{1111}\x{ad89}\x{11fe}L, LVT, T + 0: \x{1111}\x{ad89}\x{11fe} + 0+ L, LVT, T + \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T + 0: \x{1111}\x{ad89}\x{11fe}\x{11fe} + 0+ L, LVT, T, T + \x{ad89}\x{11fe}\x{11fe}LVT, T, T + 0: \x{ad89}\x{11fe}\x{11fe} + 0+ LVT, T, T + *These match just the first codepoint (invalid sequence) + 0: * + 0+ These match just the first codepoint (invalid sequence) + \x{1111}\x{11fe}L, T + 0: \x{1111} + 0+ \x{11fe}L, T + \x{ae4c}\x{1111}LV, L + 0: \x{ae4c} + 0+ \x{1111}LV, L + \x{ae4c}\x{ae4c}LV, LV + 0: \x{ae4c} + 0+ \x{ae4c}LV, LV + \x{ae4c}\x{ad89}LV, LVT + 0: \x{ae4c} + 0+ \x{ad89}LV, LVT + \x{1169}\x{1111}V, L + 0: \x{1169} + 0+ \x{1111}V, L + \x{1169}\x{ae4c}V, LV + 0: \x{1169} + 0+ \x{ae4c}V, LV + \x{1169}\x{ad89}V, LVT + 0: \x{1169} + 0+ \x{ad89}V, LVT + \x{ad89}\x{1111}LVT, L + 0: \x{ad89} + 0+ \x{1111}LVT, L + \x{ad89}\x{1169}LVT, V + 0: \x{ad89} + 0+ \x{1169}LVT, V + \x{ad89}\x{ae4c}LVT, LV + 0: \x{ad89} + 0+ \x{ae4c}LVT, LV + \x{ad89}\x{ad89}LVT, LVT + 0: \x{ad89} + 0+ \x{ad89}LVT, LVT + \x{11fe}\x{1111}T, L + 0: \x{11fe} + 0+ \x{1111}T, L + \x{11fe}\x{1169}T, V + 0: \x{11fe} + 0+ \x{1169}T, V + \x{11fe}\x{ae4c}T, LV + 0: \x{11fe} + 0+ \x{ae4c}T, LV + \x{11fe}\x{ad89}T, LVT + 0: \x{11fe} + 0+ \x{ad89}T, LVT + *Test extend and spacing mark + 0: * + 0+ Test extend and spacing mark + \x{1111}\x{ae4c}\x{0711}L, LV, extend + 0: \x{1111}\x{ae4c}\x{711} + 0+ L, LV, extend + \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark + 0: \x{1111}\x{ae4c}\x{1b04} + 0+ L, LV, spacing mark + \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark + 0: \x{1111}\x{ae4c}\x{1b04}\x{711}\x{1b04} + 0+ L, LV, spacing mark, extend, spacing mark + *Test CR, LF, and control + 0: * + 0+ Test CR, LF, and control + \x0d\x{0711}CR, extend + 0: \x{0d} + 0+ \x{711}CR, extend + \x0d\x{1b04}CR, spacingmark + 0: \x{0d} + 0+ \x{1b04}CR, spacingmark + \x0a\x{0711}LF, extend + 0: \x{0a} + 0+ \x{711}LF, extend + \x0a\x{1b04}LF, spacingmark + 0: \x{0a} + 0+ \x{1b04}LF, spacingmark + \x0b\x{0711}Control, extend + 0: \x{0b} + 0+ \x{711}Control, extend + \x09\x{1b04}Control, spacingmark + 0: \x{09} + 0+ \x{1b04}Control, spacingmark + *Test Extended Pictographic after bug fix + 0: * + 0+ Test Extended Pictographic after bug fix + \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic + 0: \x{261d} + 0+ \x{261d}B Extended_Pictographic Extended_Pictographic + \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P + 0: \x{261d}\x{1f3fb} + 0+ \x{261d}B Extended_Pictographic Extend E-P + \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P + 0: \x{261d}\x{1f3fb}\x{200d}\x{261d} + 0+ B Extended_Pictographic Extend ZWJ E-P + \x{1f3f3}\x{fe0f}\x{200d}\x{1f308}\x{1f3f4}\x{200d}\x{2620}\x{fe0f}\x{1f3f3}\x{fe0f}\x{200d}\x{1f308}\x{1f3f4}\x{200d}\x{2620}\x{fe0f} + 0: \x{1f3f3}\x{fe0f}\x{200d}\x{1f308} + 0+ \x{1f3f4}\x{200d}\x{2620}\x{fe0f}\x{1f3f3}\x{fe0f}\x{200d}\x{1f308}\x{1f3f4}\x{200d}\x{2620}\x{fe0f} + A\x{200d}\x{1f308}B + 0: A\x{200d} + 0+ \x{1f308}B + A\x{200d}B A ZWJ + 0: A\x{200d} + 0+ B A ZWJ + \x{261D}\x{1F3FB}B Extended_Pictographic Extend + 0: \x{261d}\x{1f3fb} + 0+ B Extended_Pictographic Extend + \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator + 0: \x{1f1e6}\x{1f1e7} + 0+ B RegionalIndicator RegionalIndicator + *There are no Prepend characters, so we can't test Prepend, CR + 0: * + 0+ There are no Prepend characters, so we can't test Prepend, CR + +/^(?>\X{2})X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/^\X{2,4}X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/^\X{2,4}?X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/\X*Z/utf,no_start_optimize +\= Expect no match + A\x{300} +No match + +/\X*(.)/utf,no_start_optimize + A\x{1111}\x{ae4c}\x{1169} + 0: A\x{1111} + 1: \x{1111} + +# -------------------------------------------- + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/[z\x{1e9e}]+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/[z\x{00df}]+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +/[z\x{1f88}]+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +# Check a reference with more than one other case + +/^(\x{00b5})\1{2}$/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + 1: \x{b5} + +# Characters with more than one other case; test in classes + +/[z\x{00b5}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/[z\x{039c}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/[z\x{03bc}]+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/[z\x{00c5}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/[z\x{00e5}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/[z\x{212b}]+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/[z\x{01c4}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/[z\x{01c5}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/[z\x{01c6}]+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/[z\x{01c7}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/[z\x{01c8}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/[z\x{01c9}]+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/[z\x{01ca}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/[z\x{01cb}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/[z\x{01cc}]+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/[z\x{01f1}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/[z\x{01f2}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/[z\x{01f3}]+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/[z\x{0345}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/[z\x{0399}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/[z\x{03b9}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/[z\x{1fbe}]+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/[z\x{0392}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/[z\x{03b2}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/[z\x{03d0}]+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/[z\x{0395}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/[z\x{03b5}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/[z\x{03f5}]+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/[z\x{0398}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/[z\x{03b8}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/[z\x{03d1}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/[z\x{03f4}]+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/[z\x{039a}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/[z\x{03ba}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/[z\x{03f0}]+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/[z\x{03a0}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/[z\x{03c0}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/[z\x{03d6}]+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/[z\x{03a1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/[z\x{03c1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/[z\x{03f1}]+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/[z\x{03a3}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/[z\x{03c2}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/[z\x{03c3}]+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/[z\x{03a6}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/[z\x{03c6}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/[z\x{03d5}]+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/[z\x{03c9}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/[z\x{03a9}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/[z\x{2126}]+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/[z\x{1e60}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/[z\x{1e61}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/[z\x{1e9b}]+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +# Perl 5.12.4 gets these wrong, but 5.15.3 is OK + +/[z\x{004b}]+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/[z\x{006b}]+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/[z\x{212a}]+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/[z\x{0053}]+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/[z\x{0073}]+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/[z\x{017f}]+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +# -------------------------------------- + +/(ΣΆΜΟΣ) \1/i,utf + ΣΆΜΟΣ ΣΆΜΟΣ + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + ΣΆΜΟΣ σάμος + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + σάμος σάμος + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + σάμος σάμοσ + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c3} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + σάμος ΣΆΜΟΣ + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + +/(σάμος) \1/i,utf + ΣΆΜΟΣ ΣΆΜΟΣ + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + ΣΆΜΟΣ σάμος + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + σάμος σάμος + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + σάμος σάμοσ + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c3} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + σάμος ΣΆΜΟΣ + 0: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + 1: \x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + +/(ΣΆΜΟΣ) \1*/i,utf + ΣΆΜΟΣ\x20 + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + ΣΆΜΟΣ ΣΆΜΟΣσάμοςσάμος + 0: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3}\x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2}\x{3c3}\x{3ac}\x{3bc}\x{3bf}\x{3c2} + 1: \x{3a3}\x{386}\x{39c}\x{39f}\x{3a3} + +# Perl matches these + +/\x{00b5}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/\x{039c}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/\x{03bc}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + + +/\x{00c5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/\x{00e5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/\x{212b}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + + +/\x{01c4}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/\x{01c5}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/\x{01c6}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + + +/\x{01c7}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/\x{01c8}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/\x{01c9}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + + +/\x{01ca}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/\x{01cb}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/\x{01cc}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + + +/\x{01f1}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/\x{01f2}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/\x{01f3}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + + +/\x{0345}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{0399}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{03b9}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{1fbe}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + + +/\x{0392}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/\x{03b2}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/\x{03d0}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + + +/\x{0395}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/\x{03b5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/\x{03f5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + + +/\x{0398}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03b8}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03d1}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03f4}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + + +/\x{039a}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/\x{03ba}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/\x{03f0}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + + +/\x{03a0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/\x{03c0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/\x{03d6}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + + +/\x{03a1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/\x{03c1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/\x{03f1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + + +/\x{03a3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/\x{03c2}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/\x{03c3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + + +/\x{03a6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/\x{03c6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/\x{03d5}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + + +/\x{03c9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/\x{03a9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/\x{2126}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + + +/\x{1e60}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e61}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9b}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +/\x{1f80}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +# Perl 5.12.4 gets these wrong, but 5.15.3 is OK + +/\x{004b}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/\x{006b}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/\x{212a}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + + +/\x{0053}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/\x{0073}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/\x{017f}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/^\p{Any}*\d{4}/utf + 1234 + 0: 1234 +\= Expect no match + 123 +No match + +/^\X*\w{4}/utf + 1234 + 0: 1234 +\= Expect no match + 123 +No match + +/^A\s+Z/utf,ucp + A\x{2005}Z + 0: A\x{2005}Z + A\x{85}\x{2005}Z + 0: A\x{85}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + 0: A\x{2005}Z + A\x{85}\x{2005}Z + 0: A\x{85}\x{2005}Z + +/^[[:graph:]]+$/utf,ucp + Letter:ABC + 0: Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + 0: Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + 0: Number:9\x{660} + Punctuation:\x{66a},; + 0: Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + 0: Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + 0: \x{feff} + \x{fff9}\x{fffa}\x{fffb} + 0: \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + 0: \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + 0: \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} + 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +\= Expect no match + \x{09} +No match + \x{0a} +No match + \x{1D} +No match + \x{20} +No match + \x{85} +No match + \x{a0} +No match + \x{1680} +No match + \x{2028} +No match + \x{2029} +No match + \x{202f} +No match + \x{2065} +No match + \x{3000} +No match + \x{e0002} +No match + \x{e001f} +No match + \x{e0080} +No match + +/^[[:print:]]+$/utf,ucp + Space: \x{a0} + 0: Space: \x{a0} + \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} + 0: \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} + \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} + 0: \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} + \x{202f}\x{205f} + 0: \x{202f}\x{205f} + \x{3000} + 0: \x{3000} + Letter:ABC + 0: Letter:ABC + Mark:\x{300}\x{1d172}\x{1d17b} + 0: Mark:\x{300}\x{1d172}\x{1d17b} + Number:9\x{660} + 0: Number:9\x{660} + Punctuation:\x{66a},; + 0: Punctuation:\x{66a},; + Symbol:\x{6de}<>\x{fffc} + 0: Symbol:\x{6de}<>\x{fffc} + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + 0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + 0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + 0: \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} + \x{202f} + 0: \x{202f} + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + 0: \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + 0: \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} + \x{feff} + 0: \x{feff} + \x{fff9}\x{fffa}\x{fffb} + 0: \x{fff9}\x{fffa}\x{fffb} + \x{110bd} + 0: \x{110bd} + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + 0: \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} + \x{e0001} + 0: \x{e0001} + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} + 0: \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +\= Expect no match + \x{09} +No match + \x{1D} +No match + \x{85} +No match + \x{2028} +No match + \x{2029} +No match + \x{2065} +No match + \x{e0002} +No match + \x{e001f} +No match + \x{e0080} +No match + +/^[[:punct:]]+$/utf,ucp + \$+<=>^`|~ + 0: $+<=>^`|~ + !\"#%&'()*,-./:;?@[\\]_{} + 0: !"#%&'()*,-./:;?@[\]_{} + \x{a1}\x{a7} + 0: \x{a1}\x{a7} + \x{37e} + 0: \x{37e} +\= Expect no match + abcde +No match + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{1680} + 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{1680} + \x{2028}\x{2029}\x{202f}\x{2065} + 0: \x{2028}\x{2029}\x{202f}\x{2065} + \x{3000}\x{e0002}\x{e001f}\x{e0080} + 0: \x{3000}\x{e0002}\x{e001f}\x{e0080} +\= Expect no match + Letter:ABC +No match + Mark:\x{300}\x{1d172}\x{1d17b} +No match + Number:9\x{660} +No match + Punctuation:\x{66a},; +No match + Symbol:\x{6de}<>\x{fffc} +No match + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} +No match + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} +No match + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} +No match + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} +No match + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} +No match + \x{feff} +No match + \x{fff9}\x{fffa}\x{fffb} +No match + \x{110bd} +No match + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} +No match + \x{e0001} +No match + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +No match + +/^[[:^print:]]+$/utf,ucp + \x{09}\x{1D}\x{85}\x{2028}\x{2029}\x{2065} + 0: \x{09}\x{1d}\x{85}\x{2028}\x{2029}\x{2065} + \x{e0002}\x{e001f}\x{e0080} + 0: \x{e0002}\x{e001f}\x{e0080} +\= Expect no match + Space: \x{a0} +No match + \x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005} +No match + \x{2006}\x{2007}\x{2008}\x{2009}\x{200a} +No match + \x{202f}\x{205f} +No match + \x{3000} +No match + Letter:ABC +No match + Mark:\x{300}\x{1d172}\x{1d17b} +No match + Number:9\x{660} +No match + Punctuation:\x{66a},; +No match + Symbol:\x{6de}<>\x{fffc} +No match + Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f} +No match + \x{200b}\x{200c}\x{200d}\x{200e}\x{200f} +No match + \x{202a}\x{202b}\x{202c}\x{202d}\x{202e} +No match + \x{202f} +No match + \x{2060}\x{2061}\x{2062}\x{2063}\x{2064} +No match + \x{206a}\x{206b}\x{206c}\x{206d}\x{206e}\x{206f} +No match + \x{feff} +No match + \x{fff9}\x{fffa}\x{fffb} +No match + \x{110bd} +No match + \x{1d173}\x{1d174}\x{1d175}\x{1d176}\x{1d177}\x{1d178}\x{1d179}\x{1d17a} +No match + \x{e0001} +No match + \x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f} +No match + +/^[[:^punct:]]+$/utf,ucp + abcde + 0: abcde +\= Expect no match + \$+<=>^`|~ +No match + !\"#%&'()*,-./:;?@[\\]_{} +No match + \x{a1}\x{a7} +No match + \x{37e} +No match + +/[RST]+/i,utf,ucp + Ss\x{17f} + 0: Ss\x{17f} + +/[R-T]+/i,utf,ucp + Ss\x{17f} + 0: Ss\x{17f} + +/[q-u]+/i,utf,ucp + Ss\x{17f} + 0: Ss\x{17f} + +/^s?c/im,utf + scat + 0: sc + +# The next four tests are for repeated caseless back references when the +# code unit length of the matched text is different to that of the original +# group in the UTF-8 case. + +/^(\x{23a})\1*(.)/i,utf + \x{23a}\x{23a}\x{23a}\x{23a} + 0: \x{23a}\x{23a}\x{23a}\x{23a} + 1: \x{23a} + 2: \x{23a} + \x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a} + 2: \x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + 0: \x{23a}\x{23a}\x{2c65}\x{23a} + 1: \x{23a} + 2: \x{23a} + +/^(\x{23a})\1*(..)/i,utf + \x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a} + 2: \x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + 0: \x{23a}\x{23a}\x{2c65}\x{23a} + 1: \x{23a} + 2: \x{2c65}\x{23a} + +/^(\x{23a})\1*(...)/i,utf + \x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a} + 2: \x{2c65}\x{2c65}\x{2c65} + \x{23a}\x{23a}\x{2c65}\x{23a} + 0: \x{23a}\x{23a}\x{2c65}\x{23a} + 1: \x{23a} + 2: \x{23a}\x{2c65}\x{23a} + +/^(\x{23a})\1*(....)/i,utf +\= Expect no match + \x{23a}\x{2c65}\x{2c65}\x{2c65} +No match + \x{23a}\x{23a}\x{2c65}\x{23a} +No match + +/[A-`]/i,utf + abcdefghijklmno + 0: a + +/[\S\V\H]/utf + +/[^\p{Any}]*+x/utf + x + 0: x + +/[[:punct:]]/utf,ucp + \x{b4} +No match + +/[[:^ascii:]]/utf,ucp + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} +\= Expect no match + aa +No match + 99 +No match + +/[[:^ascii:]\w]/utf,ucp + aa + 0: a + 99 + 0: 9 + gg + 0: g + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} + +/[\w[:^ascii:]]/utf,ucp + aa + 0: a + 99 + 0: 9 + gg + 0: g + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} + \x{300} + 0: \x{300} + \x{37e} + 0: \x{37e} + +/[^[:ascii:]\W]/utf,ucp + \x{100} + 0: \x{100} + \x{200} + 0: \x{200} +\= Expect no match + aa +No match + 99 +No match + gg +No match + \x{37e} +No match + +/[^[:^ascii:]\d]/utf,ucp + a + 0: a + ~ + 0: ~ + \a + 0: \x{07} + \x{7f} + 0: \x{7f} +\= Expect no match + 0 +No match + \x{389} +No match + \x{20ac} +No match + +/(?=.*b)\pL/ + 11bb + 0: b + +/(?(?=.*b)(?=.*b)\pL|.*c)/ + 11bb + 0: b + +/^\x{123}+?$/utf,no_auto_possess + \x{123}\x{123}\x{123} + 0: \x{123}\x{123}\x{123} + +/^\x{123}+?$/i,utf,no_auto_possess + \x{123}\x{122}\x{123} + 0: \x{123}\x{122}\x{123} +\= Expect no match + \x{123}\x{124}\x{123} +No match + +/\N{U+1234}/utf + \x{1234} + 0: \x{1234} + +/[\N{U+1234}]/utf + \x{1234} + 0: \x{1234} + +# Test the full list of Unicode "Pattern White Space" characters that are to +# be ignored by /x. The pattern lines below may show up oddly in text editors +# or when listed to the screen. Note that characters such as U+2002, which are +# matched as space by \h and \v are *not* "Pattern White Space". + +/A…‎‏

B/x,utf + AB + 0: AB + +/A B/x,utf + A\x{2002}B + 0: A\x{2002}B +\= Expect no match + AB +No match + +# ------- + +/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf + \x{99}\x{99}\x{99} + 0: \x{99}\x{99}\x{99} + +/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf + \x{99}\x{99}\x{99} + 0: \x{99}\x{99}\x{99} + +/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf + \x{99}\x{99}\x{99} + 0: \x{99}\x{99}\x{99} + +# Script run tests + +/^(*script_run:.{4})/utf + abcd Latin x4 + 0: abcd + \x{2e80}\x{2fa1d}\x{3041}\x{30a1} Han Han Hiragana Katakana + 0: \x{2e80}\x{2fa1d}\x{3041}\x{30a1} + \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han + 0: \x{3041}\x{30a1}\x{3007}\x{3007} + \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han + 0: \x{30a1}\x{3041}\x{3007}\x{3007} + \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul + 0: \x{1100}\x{2e80}\x{2e80}\x{1101} + \x{2e80}\x{3105}\x{2e80}\x{3105} Han Bopomofo Han Bopomofo + 0: \x{2e80}\x{3105}\x{2e80}\x{3105} + \x{02ea}\x{2e80}\x{2e80}\x{3105} Bopomofo-Sk Han Han Bopomofo + 0: \x{2ea}\x{2e80}\x{2e80}\x{3105} + \x{3105}\x{2e80}\x{2e80}\x{3105} Bopomofo Han Han Bopomofo + 0: \x{3105}\x{2e80}\x{2e80}\x{3105} + \x{0300}cd! Inherited Latin Latin Common + 0: \x{300}cd! + \x{0391}12\x{03a9} Greek Common-digits Greek + 0: \x{391}12\x{3a9} + \x{0400}12\x{fe2f} Cyrillic Common-digits Cyrillic + 0: \x{400}12\x{fe2f} + \x{0531}12\x{fb17} Armenian Common-digits Armenian + 0: \x{531}12\x{fb17} + \x{0591}12\x{fb4f} Hebrew Common-digits Hebrew + 0: \x{591}12\x{fb4f} + \x{0600}12\x{1eef1} Arabic Common-digits Arabic + 0: \x{600}12\x{1eef1} + \x{0600}\x{0660}\x{0669}\x{1eef1} Arabic Arabic-digits Arabic + 0: \x{600}\x{660}\x{669}\x{1eef1} + \x{0700}12\x{086a} Syriac Common-digits Syriac + 0: \x{700}12\x{86a} + \x{1200}12\x{ab2e} Ethiopic Common-digits Ethiopic + 0: \x{1200}12\x{ab2e} + \x{1680}12\x{169c} Ogham Common-digits Ogham + 0: \x{1680}12\x{169c} + \x{3041}12\x{3041} Hiragana Common-digits Hiragana + 0: \x{3041}12\x{3041} + \x{0980}\x{09e6}\x{09e7}\x{0993} Bengali Bengali-digits Bengali + 0: \x{980}\x{9e6}\x{9e7}\x{993} + !cde Common Latin Latin Latin + 0: !cde + A..B Latin Common Common Latin + 0: A..B + 0abc Ascii-digit Latin Latin Latin + 0: 0abc + 1\x{0700}\x{0700}\x{0700} Ascii-digit Syriac x 3 + 0: 1\x{700}\x{700}\x{700} + \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters + 0: \x{1a80}\x{1a80}\x{1a40}\x{1a41} +\= Expect no match + a\x{370}bcd Latin Greek Latin Latin +No match + \x{1100}\x{02ea}\x{02ea}\x{02ea} Hangul Bopomofo x3 +No match + \x{02ea}\x{02ea}\x{02ea}\x{1100} Bopomofo x3 Hangul +No match + \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul +No match + \x{0391}\x{09e6}\x{09e7}\x{03a9} Greek Bengali digits Greek +No match + \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic +No match + \x{0600}\x{0669}7\x{1eef1} Arabic Arabic-digit ascii-digit Arabic +No match + A5\x{ff19}B Latin Common-ascii/notascii-digits Latin +No match + \x{0300}cd\x{0391} Inherited Latin Latin Greek +No match + !cd\x{0391} Common Latin Latin Greek +No match + \x{1A80}\x{1A90}\x{1a40}\x{1a41} Tai Tham Hora digit, Tham digit, letters +No match + A\x{1d7ce}\x{1d7ff}B Common fancy-common-2-sets-digits Common +No match + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana +No match + +/^(*sr:.{4}|..)/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + 0: \x{2e80}\x{3105} + +/^(*atomic_script_run:.{4}|..)/utf +\= Expect no match + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana +No match + +/^(*asr:.*)/utf +\= Expect no match + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana +No match + +/^(?>(*sr:.*))/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + 0: \x{2e80}\x{3105}\x{2e80} + +/^(*sr:.*)/utf + \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana + 0: \x{2e80}\x{3105}\x{2e80} + \x{10fffd}\x{10fffd}\x{10fffd} Private use (Unknown) + 0: \x{10fffd} + +/^(*sr:\x{2e80}*)/utf + \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo + 0: \x{2e80}\x{2e80} + +/^(*sr:\x{2e80}*)\x{2e80}/utf + \x{2e80}\x{2e80}\x{3105} Han Han Bopomofo + 0: \x{2e80}\x{2e80} + +/^(*sr:.*)Test/utf + Test script run on an empty string + 0: Test + +/^(*sr:(.{2})){2}/utf + \x{0600}7\x{0669}\x{1eef1} Arabic ascii-digit Arabic-digit Arabic + 0: \x{600}7\x{669}\x{1eef1} + 1: \x{669}\x{1eef1} + \x{1A80}\x{1A80}\x{1a40}\x{1a41} Tai Tham Hora digits, letters + 0: \x{1a80}\x{1a80}\x{1a40}\x{1a41} + 1: \x{1a40}\x{1a41} + \x{1A80}\x{1a40}\x{1A90}\x{1a41} Tai Tham Hora digit, letter, Tham digit, letter + 0: \x{1a80}\x{1a40}\x{1a90}\x{1a41} + 1: \x{1a90}\x{1a41} +\= Expect no match + \x{1100}\x{2e80}\x{3041}\x{1101} Hangul Han Hiragana Hangul +No match + +/^(*sr:\S*)/utf + \x{1cf4}\x{20f0}\x{900}\x{11305} [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Gran + 0: \x{1cf4}\x{20f0}\x{900} + \x{1cf4}\x{20f0}\x{11305}\x{900} [Dev,Gran,Kan] [Dev,Gran,Lat] Gran Dev + 0: \x{1cf4}\x{20f0}\x{11305} + \x{1cf4}\x{20f0}\x{900}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Dev Lat + 0: \x{1cf4}\x{20f0}\x{900} + \x{1cf4}\x{20f0}ABC [Dev,Gran,Kan] [Dev,Gran,Lat] Lat + 0: \x{1cf4}\x{20f0} + \x{20f0}ABC [Dev,Gran,Lat] Lat + 0: \x{20f0}ABC + XYZ\x{20f0}ABC Lat [Dev,Gran,Lat] Lat + 0: XYZ\x{20f0}ABC + \x{a36}\x{a33}\x{900} [Dev,...] [Dev,...] Dev + 0: \x{a36}\x{a33} + \x{3001}\x{2e80}\x{3041}\x{30a1} [Bopo, Han, etc] Han Hira Kata + 0: \x{3001}\x{2e80}\x{3041}\x{30a1} + \x{3001}\x{30a1}\x{2e80}\x{3041} [Bopo, Han, etc] Kata Han Hira + 0: \x{3001}\x{30a1}\x{2e80}\x{3041} + \x{3001}\x{3105}\x{2e80}\x{1101} [Bopo, Han, etc] Bopomofo Han Hangul + 0: \x{3001}\x{3105}\x{2e80} + \x{3105}\x{3001}\x{2e80}\x{1101} Bopomofo [Bopo, Han, etc] Han Hangul + 0: \x{3105}\x{3001}\x{2e80} + \x{3031}\x{3041}\x{30a1}\x{2e80} [Hira Kata] Hira Kata Han + 0: \x{3031}\x{3041}\x{30a1}\x{2e80} + \x{060c}\x{06d4}\x{0600}\x{10d00}\x{0700} [Arab Rohg Syrc Thaa] [Arab Rohg] Arab Rohg Syrc + 0: \x{60c}\x{6d4}\x{600} + \x{060c}\x{06d4}\x{0700}\x{0600}\x{10d00} [Arab Rohg Syrc Thaa] [Arab Rohg] Syrc Arab Rohg + 0: \x{60c}\x{6d4} + \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80} Han Hira [Bopo, Han, etc] [Hira Kata] Han + 0: \x{2e80}\x{3041}\x{3001}\x{3031}\x{2e80} + +/(?\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidic}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066} + +/\p{bidi_control}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidi_c}]/utf + -->\x{202c}<-- + 0: \x{202c} + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066} + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}< + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: >\x{2066}\x{2067}\x{2068}\x{2069}< + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + 0: \x{61d} + +/\p{bc = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d} + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{b_c = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + 0: \x{602}\x{604} + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + 0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029} + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + 0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff} + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + 0: ,.\x{60c}\x{ff1a} + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + 0: 09\x{b2}\x{2074}\x{1fbf9} + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + 0: +-\x{207a}\x{ff0d} + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + 0: # + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + 0: \x{2068} + +/\p{bidi class:L}+/utf + -->ABC<-- + 0: ABC + +/\P{bidi class:L}+/utf + -->ABC<-- + 0: --> + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + 0: \x{202a}\x{2066}\x{202d} + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + 0: \x{9bc}\x{a71}\x{e31} + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + 0: >!'()*;@\x{384}\x{2039}<= + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + 0: \x{202c}\x{2069} + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + 0: \x{590}\x{5c6}\x{200f}\x{10805} + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + 0: \x{202b}\x{2067}\x{202e} + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + 0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000} + +# ----------------------------------------------------------------------------- + +/[\p{taml}\p{sc:ugar}]+/utf + \x{0b82}\x{10380} + 0: \x{b82}\x{10380} + +/^[\p{sc:Arabic}]/utf +\= Expect no match + \x{650} +No match + \x{651} +No match + \x{652} +No match + \x{653} +No match + \x{654} +No match + \x{655} +No match + +# ----------------------------------------------------------------------------- +# Tests for newly-added Boolean Properties + +/\p{ahex}\p{asciihexdigit}/utf + >4F< + 0: 4F + +/\p{alpha}\p{alphabetic}/g,utf + >AB<>\x{148}\x{1234} + 0: AB + 0: \x{148}\x{1234} + +/\p{ascii}\p{ascii}/g,utf + >AB<>\x{148}\x{1234} + 0: >A + 0: B< + +/\p{Bidi_C}\p{bidicontrol}/g,utf + >\x{202d}\x{2069}< + 0: \x{202d}\x{2069} + +/\p{Bidi_M}\p{bidimirrored}/g,utf + >\x{202d}\x{2069}<>\x{298b}\x{bb}< + 0: <> + 0: \x{298b}\x{bb} + +/\p{cased}\p{cased}/g,utf + >AN<>\x{149}\x{120}< + 0: AN + 0: \x{149}\x{120} + +/\p{caseignorable}\p{ci}/g,utf + >AN<>\x{60}\x{859}< + 0: `\x{859} + +/\p{changeswhencasefolded}\p{cwcf}/g,utf + >AN<>\x{149}\x{120}< + 0: AN + 0: \x{149}\x{120} + +/\p{changeswhencasemapped}\p{cwcm}/g,utf + >AN<>\x{149}\x{120}< + 0: AN + 0: \x{149}\x{120} + +/\p{changeswhenlowercased}\p{cwl}/g,utf + >AN<>\x{149}\x{120}<>yz< + 0: AN + +/\p{changeswhenuppercased}\p{cwu}/g,utf + >AN<>\x{149}\x{120}<>yz< + 0: yz + +/\p{changeswhentitlecased}\p{cwt}/g,utf + >AN<>\x{149}\x{120}<>yz< + 0: yz + +/\p{dash}\p{dash}/g,utf + >\x{2d}\x{1400}<>yz< + 0: -\x{1400} + +/\p{defaultignorablecodepoint}\p{di}/g,utf + >AN<>\x{ad}\x{e0fff}<>yz< + 0: \x{ad}\x{e0fff} + +/\p{deprecated}\p{dep}/g,utf + >AN<>\x{149}\x{e0001}<>yz< + 0: \x{149}\x{e0001} + +/\p{diacritic}\p{dia}/g,utf + >AN<>\x{f84}\x{5e}<>yz< + 0: \x{f84}^ + +/\p{emojicomponent}\p{ecomp}/g,utf + >AN<>\x{200d}\x{e007f}<>yz< + 0: \x{200d}\x{e007f} + +/\p{emojimodifier}\p{emod}/g,utf + >AN<>\x{1f3fb}\x{1f3ff}<>yz< + 0: \x{1f3fb}\x{1f3ff} + +/\p{emojipresentation}\p{epres}/g,utf + >AN<>\x{2653}\x{1f6d2}<>yz< + 0: \x{2653}\x{1f6d2} + +/\p{extender}\p{ext}/g,utf + >AN<>\x{1e944}\x{b7}<>yz< + 0: \x{1e944}\x{b7} + +/\p{extendedpictographic}\p{extpict}/g,utf + >AN<>\x{26cf}\x{ae}<>yz< + 0: \x{26cf}\x{ae} + +/\p{graphemebase}\p{grbase}/g,utf + >AN<>\x{10f}\x{60}<>yz< + 0: >A + 0: N< + 0: >\x{10f} + 0: `< + 0: >y + 0: z< + +/\p{graphemeextend}\p{grext}/g,utf + >AN<>\x{300}\x{b44}<>yz< + 0: \x{300}\x{b44} + +/\p{hexdigit}\p{hex}/g,utf + >AF23<>\x{ff46}\x{ff10}<>yz< + 0: AF + 0: 23 + 0: \x{ff46}\x{ff10} + +/\p{idcontinue}\p{idc}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + 0: AF + 0: 23 + 0: \x{146}z + 0: yz + +/\p{ideographic}\p{ideo}/g,utf + >AF23<>\x{30000}\x{3006}<>yz< + 0: \x{30000}\x{3006} + +/\p{idstart}\p{ids}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + 0: AF + 0: \x{146}z + 0: yz + +/\p{idsbinaryoperator}\p{idsb}/g,utf + >AF23<>\x{2ff0}\x{2ffb}<>yz<\x{2ff2}\x{2ff1} + 0: \x{2ff0}\x{2ffb} + +/\p{idstrinaryoperator}\p{idst}/g,utf + >AF23<>\x{2ff2}\x{2ff3}<>yz< + 0: \x{2ff2}\x{2ff3} + +/\p{Join Control}\p{joinc}/g,utf + >AF23<>\x{200c}\x{200d}<>yz< + 0: \x{200c}\x{200d} + +/\p{logical_order_exception}\p{loe}/g,utf + >AF23<>\x{e40}\x{aabc}<>yz< + 0: \x{e40}\x{aabc} + +/\p{Lowercase}\p{lower}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + 0: \x{146}z + 0: yz + +/\p{math}\p{math}/g,utf + >AF23<>\x{2215}\x{2b}<>yz< + 0: <> + 0: \x{2215}+ + 0: <> + +/\p{Non Character Code Point}\p{nchar}/g,utf + >AF23<>\x{10ffff}\x{fdd0}<>yz< + 0: \x{10ffff}\x{fdd0} + +/\p{patternsyntax}\p{patsyn}/g,utf + >AF23<>\x{21cd}\x{21}<>yz< + 0: <> + 0: \x{21cd}! + 0: <> + +/\p{patternwhitespace}\p{patws}/g,utf + >AF23<>\x{2029}\x{85}<>yz< + 0: \x{2029}\x{85} + +/\p{prependedconcatenationmark}\p{pcm}/g,utf + >AF23<>\x{600}\x{110cd}<>yz< + 0: \x{600}\x{110cd} + +/\p{quotationmark}\p{qmark}/g,utf + >AF23<>\x{ff63}\x{22}<>yz< + 0: \x{ff63}" + +/\p{radical}\p{radical}/g,utf + >AF23<>\x{2fd5}\x{2e80}<>yz< + 0: \x{2fd5}\x{2e80} + +/\p{regionalindicator}\p{ri}/g,utf + >AF23<>\x{1f1e6}\x{1f1ff}<>yz< + 0: \x{1f1e6}\x{1f1ff} + +/=\p{whitespace}\p{space}\p{wspace}=/g,utf + >AF23<=\x{d}\x{1680}\x{3000}=>yz< + 0: =\x{0d}\x{1680}\x{3000}= + +/\p{sentenceterminal}\p{sterm}/g,utf + >AF23<>\x{1da88}\x{2e}<>yz< + 0: \x{1da88}. + +/\p{terminalpunctuation}\p{term}/g,utf + >AF23<>\x{1da88}\x{2e}<>yz< + 0: \x{1da88}. + +/\p{unified ideograph}\p{uideo}/g,utf + >AF23<>\x{30000}\x{3400}<>yz< + 0: \x{30000}\x{3400} + +/\p{UPPERcase}\p{upper}/g,utf + >AF23<>\x{146}\x{7a}<>yz< + 0: AF + +/\p{variationselector}\p{vs}/g,utf + >AF23<>\x{180b}\x{e01ef}<>yz< + 0: \x{180b}\x{e01ef} + +/\p{xidcontinue}\p{xidc}/g,utf + >AF23<>\x{146}\x{30}<>yz< + 0: AF + 0: 23 + 0: \x{146}0 + 0: yz + +# ----------------------------------------------------------------------------- +# Variable-length lookbehinds. + +/(?<=áb?c).../g,utf + ábcdèfgácxyz + 0: d\x{e8}f + 0: xyz + +/(?<=PQR|áb?c).../g,utf + ábcdèfgácxyzPQR123 + 0: d\x{e8}f + 0: xyz + 0: 123 + +/(?<=áb?c|PQR).../g,utf + ábcdèfgácxyzPQR123 + 0: d\x{e8}f + 0: xyz + 0: 123 + +/(?<=PQ|áb?c).../g,utf + ábcdèfgácxyzPQR123 + 0: d\x{e8}f + 0: xyz + 0: R12 + +/(?<=áb?c|PQ).../g,utf + ábcdèfgácxyzPQR123 + 0: d\x{e8}f + 0: xyz + 0: R12 + +/(?<=á(b?c|d?è?è)f)X./g,utf + ácfX1zzzáèfX2zzzádèèfX3zzzX4zzz + 0: X1 + 1: c + 0: X2 + 1: \x{e8} + 0: X3 + 1: d\x{e8}\x{e8} + +/(?[[:blank:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + 0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09} + +/^A\s+Z/utf,ucp + A\x{85}\x{180e}\x{2005}Z + 0: A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + 0: A\x{2005}Z + A\x{85}\x{2005}Z + 0: A\x{85}\x{2005}Z + +/^[[:graph:]]+$/utf,ucp +\= Expect no match + \x{180e} +No match + +/^[[:print:]]+$/utf,ucp + \x{180e} + 0: \x{180e} + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} + 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e} + +/^[[:^print:]]+$/utf,ucp +\= Expect no match + \x{180e} +No match + +# End of U+180E tests. + +# --------------------------------------------------------------------- + +/\x{110000}/IB,utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/\o{4200000}/IB,utf +Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large + +/\x{ffffffff}/utf +Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large + +/\o{37777777777}/utf +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\x{100000000}/utf +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + +/\o{77777777777}/utf +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/\x{d800}/utf +Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\o{154000}/utf +Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\x{dfff}/utf +Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\o{157777}/utf +Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\x{d7ff}/utf + +/\o{153777}/utf + +/\x{e000}/utf + +/\o{170000}/utf + +/^\x{100}a\x{1234}/utf + \x{100}a\x{1234}bcd + 0: \x{100}a\x{1234} + +/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf +------------------------------------------------------------------ + Bra + A\x{2262}\x{391}. + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + \x{0041}\x{2262}\x{0391}\x{002e} + 0: A\x{2262}\x{391}. + +/.{3,5}X/IB,utf +------------------------------------------------------------------ + Bra + Any{3} + Any{0,2} + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Last code unit = 'X' +Subject length lower bound = 4 + \x{212ab}\x{212ab}\x{212ab}\x{861}X + 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X + +/.{3,5}?/IB,utf +------------------------------------------------------------------ + Bra + Any{3} + Any{0,2}? + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 3 + \x{212ab}\x{212ab}\x{212ab}\x{861} + 0: \x{212ab}\x{212ab}\x{212ab} + +/^[ab]/IB,utf +------------------------------------------------------------------ + Bra + ^ + [ab] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: utf +Overall options: anchored utf +Starting code units: a b +Subject length lower bound = 1 + bar + 0: b +\= Expect no match + c +No match + \x{ff} +No match + \x{100} +No match + +/\x{100}*(\d+|"(?1)")/utf + 1234 + 0: 1234 + 1: 1234 + "1234" + 0: "1234" + 1: "1234" + \x{100}1234 + 0: \x{100}1234 + 1: 1234 + "\x{100}1234" + 0: \x{100}1234 + 1: 1234 + \x{100}\x{100}12ab + 0: \x{100}\x{100}12 + 1: 12 + \x{100}\x{100}"12" + 0: \x{100}\x{100}"12" + 1: "12" +\= Expect no match + \x{100}\x{100}abcd +No match + +/\x{100}*/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: utf +Subject length lower bound = 0 + +/a\x{100}*/IB,utf +------------------------------------------------------------------ + Bra + a + \x{100}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Subject length lower bound = 1 + +/ab\x{100}*/IB,utf +------------------------------------------------------------------ + Bra + ab + \x{100}*+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/[\x{200}-\x{100}]/utf +Failed: error 108 at offset 15: range out of order in character class + +/[Ā-Ą]/utf + \x{100} + 0: \x{100} + \x{104} + 0: \x{104} +\= Expect no match + \x{105} +No match + \x{ff} +No match + +/[\xFF]/IB +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = \xff +Subject length lower bound = 1 + >\xff< + 0: \xff + +/[^\xFF]/IB +------------------------------------------------------------------ + Bra + [^\x{ff}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[Ä-Ü]/utf + Ö # Matches without Study + 0: \x{d6} + \x{d6} + 0: \x{d6} + +/[Ä-Ü]/utf + Ö <-- Same with Study + 0: \x{d6} + \x{d6} + 0: \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö # Matches without Study + 0: \x{d6} + \x{d6} + 0: \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö <-- Same with Study + 0: \x{d6} + \x{d6} + 0: \x{d6} + +/[^\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [^\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: utf +Last code unit = 'z' +Subject length lower bound = 7 + +/(\x{100}(b(?2)c))?/IB,utf +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Options: utf +Subject length lower bound = 0 + +/(\x{100}(b(?2)c)){0,2}/IB,utf +------------------------------------------------------------------ + Bra + Brazero + Bra + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Brazero + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Options: utf +Subject length lower bound = 0 + +/(\x{100}(b(?1)c))?/IB,utf +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Options: utf +Subject length lower bound = 0 + +/(\x{100}(b(?1)c)){0,2}/IB,utf +------------------------------------------------------------------ + Bra + Brazero + Bra + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Brazero + CBra 1 + \x{100} + CBra 2 + b + Recurse + c + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Options: utf +Subject length lower bound = 0 + +/\W/utf + A.B + 0: . + A\x{100}B + 0: \x{100} + +/\w/utf + \x{100}X + 0: X + +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize +------------------------------------------------------------------ + Bra + ^ + \x{1234} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: no_start_optimize utf +Overall options: anchored no_start_optimize utf + +/()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + A (x) (?41) B/x,utf + AxxB +Matched, but too many substrings + 0: AxxB + 1: + 2: + 3: + 4: + 5: + 6: + 7: + 8: + 9: +10: +11: +12: +13: +14: + +/^[\x{100}\E-\Q\E\x{150}]/B,utf +------------------------------------------------------------------ + Bra + ^ + [\x{100}-\x{150}] + Ket + End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/B,utf +------------------------------------------------------------------ + Bra + ^ + [\x{100}-\x{150}] + Ket + End +------------------------------------------------------------------ + +/^abc./gmx,newline=any,utf + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/abc.$/gmx,newline=any,utf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/^a\Rb/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0cb + 0: a\x{0c}b + a\x{85}b + 0: a\x{85}b + a\x{2028}b + 0: a\x{2028}b + a\x{2029}b + 0: a\x{2029}b +\= Expect no match + a\n\rb +No match + +/^a\R*b/bsr=unicode,utf + ab + 0: ab + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b + +/^a\R+b/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b +\= Expect no match + ab +No match + +/^a\R{1,3}b/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}b + 0: a\x{0a}\x{0d}\x{85}b + a\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}b + a\r\n\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b + a\n\r\n\rb + 0: a\x{0a}\x{0d}\x{0a}\x{0d}b + a\n\n\r\nb + 0: a\x{0a}\x{0a}\x{0d}\x{0a}b +\= Expect no match + a\n\n\n\rb +No match + a\r +No match + +/\H\h\V\v/utf + X X\x0a + 0: X X\x{0a} + X\x09X\x0b + 0: X\x{09}X\x{0b} +\= Expect no match + \x{a0} X\x0a +No match + +/\H*\h+\V?\v{3,4}/utf + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} + \x09\x20\x{a0}\x0a\x0b\x0c + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} +\= Expect no match + \x09\x20\x{a0}\x0a\x0b +No match + +/\H\h\V\v/utf + \x{3001}\x{3000}\x{2030}\x{2028} + 0: \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} + 0: X\x{180e}X\x{85} +\= Expect no match + \x{2009} X\x0a +No match + +/\H*\h+\V?\v{3,4}/utf + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} + \x09\x20\x{202f}\x0a\x0b\x0c + 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} +\= Expect no match + \x09\x{200a}\x{a0}\x{2028}\x0b +No match + +/[\h]/B,utf +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + >\x{1680} + 0: \x{1680} + +/[\h]{3,}/B,utf +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]{3,}+ + Ket + End +------------------------------------------------------------------ + >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< + 0: \x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000} + +/[\v]/B,utf +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}] + Ket + End +------------------------------------------------------------------ + +/[\H]/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[\V]/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/.*$/newline=any,utf + \x{1ec5} + 0: \x{1ec5} + +/a\Rb/I,bsr=anycrlf,utf +Capture group count = 0 +Options: utf +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b +\= Expect no match + a\x{85}b +No match + a\x0bb +No match + +/a\Rb/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + +/a\R?b/I,bsr=anycrlf,utf +Capture group count = 0 +Options: utf +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b +\= Expect no match + a\x{85}b +No match + a\x0bb +No match + +/a\R?b/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + +/.*a.*=.b.*/utf,newline=any + QQQ\x{2029}ABCaXYZ=!bPQR + 0: ABCaXYZ=!bPQR +\= Expect no match + a\x{2029}b +No match + \x61\xe2\x80\xa9\x62 +No match + +/[[:a\x{100}b:]]/utf +Failed: error 130 at offset 3: unknown POSIX class name + +/a[^]b/utf,allow_empty_class,match_unset_backref + a\x{1234}b + 0: a\x{1234}b + a\nb + 0: a\x{0a}b +\= Expect no match + ab +No match + +/a[^]+b/utf,allow_empty_class,match_unset_backref + aXb + 0: aXb + a\nX\nX\x{1234}b + 0: a\x{0a}X\x{0a}X\x{1234}b +\= Expect no match + ab +No match + +/(\x{de})\1/ + \x{de}\x{de} + 0: \xde\xde + 1: \xde + +/X/newline=any,utf,firstline + A\x{1ec5}ABCXYZ + 0: X + +/Xa{2,4}b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}?b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}+b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\x{123}{2,4}b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X\x{123}{2,4}?b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X\x{123}{2,4}+b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X\x{123}{2,4}b/utf +\= Expect no match + Xx\=ps +No match + X\x{123}x\=ps +No match + X\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}\x{123}x\=ps +No match + +/X\x{123}{2,4}?b/utf +\= Expect no match + Xx\=ps +No match + X\x{123}x\=ps +No match + X\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}\x{123}x\=ps +No match + +/X\x{123}{2,4}+b/utf +\= Expect no match + Xx\=ps +No match + X\x{123}x\=ps +No match + X\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}x\=ps +No match + X\x{123}\x{123}\x{123}\x{123}x\=ps +No match + +/X\d{2,4}b/utf + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}?b/utf + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}+b/utf + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\D{2,4}b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}?b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}+b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X\D{2,4}?b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X\D{2,4}+b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[abc]{2,4}b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}?b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}+b/utf + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc\x{123}]{2,4}b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[abc\x{123}]{2,4}?b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[abc\x{123}]{2,4}+b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[^a]{2,4}b/utf + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}?b/utf + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}+b/utf + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[^a]{2,4}?b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/X[^a]{2,4}+b/utf + X\=ps +Partial match: X + X\x{123}\=ps +Partial match: X\x{123} + X\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123} + X\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123} + X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: X\x{123}\x{123}\x{123}\x{123} + +/(Y)X\1{2,4}b/utf + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}?b/utf + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}+b/utf + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(\x{123})X\1{2,4}b/utf + \x{123}X\=ps +Partial match: \x{123}X + \x{123}X\x{123}\=ps +Partial match: \x{123}X\x{123} + \x{123}X\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} + +/(\x{123})X\1{2,4}?b/utf + \x{123}X\=ps +Partial match: \x{123}X + \x{123}X\x{123}\=ps +Partial match: \x{123}X\x{123} + \x{123}X\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} + +/(\x{123})X\1{2,4}+b/utf + \x{123}X\=ps +Partial match: \x{123}X + \x{123}X\x{123}\=ps +Partial match: \x{123}X\x{123} + \x{123}X\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123} + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps +Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} + +/\bthe cat\b/utf + the cat\=ps + 0: the cat + the cat\=ph +Partial match: the cat + +/abcd*/utf + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + +/abcd*/i,utf + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + XXXXABCD\=ps + 0: ABCD + XXXXABCD\=ph +Partial match: ABCD + +/abc\d*/utf + xxxxabc1\=ps + 0: abc1 + xxxxabc1\=ph +Partial match: abc1 + +/(a)bc\1*/utf + xxxxabca\=ps + 0: abca + 1: a + xxxxabca\=ph +Partial match: abca + +/abc[de]*/utf + xxxxabcde\=ps + 0: abcde + xxxxabcde\=ph +Partial match: abcde + +/X\W{3}X/utf + X\=ps +Partial match: X + +/\sxxx\s/utf,tables=2 + AB\x{85}xxx\x{a0}XYZ + 0: \x{85}xxx\x{a0} + AB\x{a0}xxx\x{85}XYZ + 0: \x{a0}xxx\x{85} + +/\S \S/utf,tables=2 + \x{a2} \x{84} + 0: \x{a2} \x{84} + +'A#хц'Bx,newline=any,utf +------------------------------------------------------------------ + Bra + A + Ket + End +------------------------------------------------------------------ + +'A#хц + PQ'Bx,newline=any,utf +------------------------------------------------------------------ + Bra + APQ + Ket + End +------------------------------------------------------------------ + +/a+#хaa + z#XX?/Bx,newline=any,utf +------------------------------------------------------------------ + Bra + a++ + z + Ket + End +------------------------------------------------------------------ + +/a+#хaa + z#х?/Bx,newline=any,utf +------------------------------------------------------------------ + Bra + a++ + z + Ket + End +------------------------------------------------------------------ + +/\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf +------------------------------------------------------------------ + Bra + \1 + xxx + CBra 1 + 456 + Ket + Ket + End +------------------------------------------------------------------ + +/\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf +------------------------------------------------------------------ + Bra + \1 + xxx + CBra 1 + 456 + Ket + Ket + End +------------------------------------------------------------------ + +/^\cÄ£/utf +Failed: error 168 at offset 3: \c must be followed by a printable ASCII character + +/(\R*)(.)/s,utf + \r\n + 0: \x{0d} + 1: + 2: \x{0d} + \r\r\n\n\r + 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} + 1: \x{0d}\x{0d}\x{0a}\x{0a} + 2: \x{0d} + \r\r\n\n\r\n + 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} + 1: \x{0d}\x{0d}\x{0a}\x{0a} + 2: \x{0d} + +/(\R)*(.)/s,utf + \r\n + 0: \x{0d} + 1: + 2: \x{0d} + \r\r\n\n\r + 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} + 1: \x{0a} + 2: \x{0d} + \r\r\n\n\r\n + 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} + 1: \x{0a} + 2: \x{0d} + +/[^\x{1234}]+/Ii,utf +Capture group count = 0 +Options: caseless utf +Subject length lower bound = 1 + +/[^\x{1234}]+?/Ii,utf +Capture group count = 0 +Options: caseless utf +Subject length lower bound = 1 + +/[^\x{1234}]++/Ii,utf +Capture group count = 0 +Options: caseless utf +Subject length lower bound = 1 + +/[^\x{1234}]{2}/Ii,utf +Capture group count = 0 +Options: caseless utf +Subject length lower bound = 2 + +/f.*/ + for\=ph +Partial match: for + +/f.*/s + for\=ph +Partial match: for + +/f.*/utf + for\=ph +Partial match: for + +/f.*/s,utf + for\=ph +Partial match: for + +/\x{d7ff}\x{e000}/utf + +/\x{d800}/utf +Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\x{dfff}/utf +Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) + +/\h+/utf + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + 0: \x{200a}\x{a0}\x{2000} + +/[\h\x{e000}]+/B,utf +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]++ + Ket + End +------------------------------------------------------------------ + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + 0: \x{200a}\x{a0}\x{2000} + +/\H+/utf + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + 0: \x{9f}\x{a1}\x{2fff}\x{3001} + +/[\H\x{d7ff}]+/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++ + Ket + End +------------------------------------------------------------------ + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + 0: \x{9f}\x{a1}\x{2fff}\x{3001} + +/\v+/utf + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} + +/[\v\x{e000}]+/B,utf +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]++ + Ket + End +------------------------------------------------------------------ + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} + +/\V+/utf + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + 0: \x{09}\x{0e}\x{84}\x{86} + +/[\V\x{d7ff}]+/B,utf +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++ + Ket + End +------------------------------------------------------------------ + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + 0: \x{09}\x{0e}\x{84}\x{86} + +/\R+/bsr=unicode,utf + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} + +/(..)\1/utf + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps + 0: abab + 1: ab + +/(..)\1/i,utf + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps + 0: aBAb + 1: aB + +/(..)\1{2,}/utf + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps +Partial match: abab + ababa\=ps +Partial match: ababa + ababab\=ps + 0: ababab + 1: ab + ababab\=ph +Partial match: ababab + abababa\=ps + 0: ababab + 1: ab + abababa\=ph +Partial match: abababa + +/(..)\1{2,}/i,utf + ab\=ps +Partial match: ab + aBa\=ps +Partial match: aBa + aBAb\=ps +Partial match: aBAb + AbaBA\=ps +Partial match: AbaBA + abABAb\=ps + 0: abABAb + 1: ab + aBAbaB\=ph +Partial match: aBAbaB + abABabA\=ps + 0: abABab + 1: ab + abaBABa\=ph +Partial match: abaBABa + +/(..)\1{2,}?x/i,utf + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps +Partial match: aBAb + abaBA\=ps +Partial match: abaBA + abAbaB\=ps +Partial match: abAbaB + abaBabA\=ps +Partial match: abaBabA + abAbABaBx\=ps + 0: abAbABaBx + 1: ab + +/./utf,newline=crlf + \r\=ps + 0: \x{0d} + \r\=ph +Partial match: \x{0d} + +/.{2,3}/utf,newline=crlf + \r\=ps +Partial match: \x{0d} + \r\=ph +Partial match: \x{0d} + \r\r\=ps + 0: \x{0d}\x{0d} + \r\r\=ph +Partial match: \x{0d}\x{0d} + \r\r\r\=ps + 0: \x{0d}\x{0d}\x{0d} + \r\r\r\=ph +Partial match: \x{0d}\x{0d}\x{0d} + +/.{2,3}?/utf,newline=crlf + \r\=ps +Partial match: \x{0d} + \r\=ph +Partial match: \x{0d} + \r\r\=ps + 0: \x{0d}\x{0d} + \r\r\=ph +Partial match: \x{0d}\x{0d} + \r\r\r\=ps + 0: \x{0d}\x{0d} + \r\r\r\=ph + 0: \x{0d}\x{0d} + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf +------------------------------------------------------------------ + Bra + [^\x{100}] + [^\x{1234}] + [^\x{ffff}] + [^\x{10000}] + [^\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf +------------------------------------------------------------------ + Bra + /i [^\x{100}] + /i [^\x{1234}] + /i [^\x{ffff}] + /i [^\x{10000}] + /i [^\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf +------------------------------------------------------------------ + Bra + [^\x{100}]* + [^\x{10000}]+ + [^\x{10ffff}]?? + [^\x{8000}]{4} + [^\x{8000}]* + [^\x{7fff}]{2} + [^\x{7fff}]{0,7}? + [^\x{fffff}]{5} + [^\x{fffff}]?+ + Ket + End +------------------------------------------------------------------ + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf +------------------------------------------------------------------ + Bra + /i [^\x{100}]* + /i [^\x{10000}]+ + /i [^\x{10ffff}]?? + /i [^\x{8000}]{4} + /i [^\x{8000}]* + /i [^\x{7fff}]{2} + /i [^\x{7fff}]{0,7}? + /i [^\x{fffff}]{5} + /i [^\x{fffff}]?+ + Ket + End +------------------------------------------------------------------ + +/(?<=\x{1234}\x{1234})\bxy/I,utf +Capture group count = 0 +Max lookbehind = 2 +Options: utf +First code unit = 'x' +Last code unit = 'y' +Subject length lower bound = 2 + +/(?= 0xd800 && <= 0xdfff) + +/^\u{0000000000010ffff}/utf,extra_alt_bsux + \x{10ffff} + 0: \x{10ffff} + +/\u{ 1bb1}/utf,extra_alt_bsux + u{ 1bb1} + 0: u{ 1bb1} +\= Expect no match + \x{1bb1} +No match + +/\u/utf,alt_bsux + \\u + 0: u + +/^a+[a\x{200}]/B,utf +------------------------------------------------------------------ + Bra + ^ + a+ + [a\x{200}] + Ket + End +------------------------------------------------------------------ + aa + 0: aa + +/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf +------------------------------------------------------------------ + Bra + [b-d\x{200}-\x{250}]*+ + [ae-h]?+ + # + [\x{200}-\x{250}]{0,8}+ + [\x00-\xff]* + # + [\x{200}-\x{250}]++ + [a-z] + Ket + End +------------------------------------------------------------------ + +/[\p{L}]/IB +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\p{^L}]/IB +------------------------------------------------------------------ + Bra + [\P{L}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\P{L}]/IB +------------------------------------------------------------------ + Bra + [\P{L}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[\P{^L}]/IB +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Subject length lower bound = 1 + +/[abc\p{L}\x{0660}]/IB,utf +------------------------------------------------------------------ + Bra + [a-c\p{L}\x{660}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + +/[\p{Nd}]/IB,utf +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + 1234 + 0: 1 + +/[\p{Nd}+-]+/IB,utf +------------------------------------------------------------------ + Bra + [+\-\p{Nd}]++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +Subject length lower bound = 1 + 1234 + 0: 1234 + 12-34 + 0: 12-34 + 12+\x{661}-34 + 0: 12+\x{661}-34 +\= Expect no match + abcd +No match + +/(?:[\PPa*]*){8,}/ + +/[\P{Any}]/B +------------------------------------------------------------------ + Bra + [\P{Any}] + Ket + End +------------------------------------------------------------------ + +/[\P{Any}\E]/B +------------------------------------------------------------------ + Bra + [\P{Any}] + Ket + End +------------------------------------------------------------------ + +/(\P{Yi}+\277)/ + +/(\P{Yi}+\277)?/ + +/(?<=\P{Yi}{3}A)X/ + +/\p{Yi}+(\P{Yi}+)(?1)/ + +/(\P{Yi}{2}\277)?/ + +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + +/(\P{Yi}*\277)*/ + +/(\P{Yi}*?\277)*/ + +/(\p{Yi}*+\277)*/ + +/(\P{Yi}?\277)*/ + +/(\P{Yi}??\277)*/ + +/(\p{Yi}?+\277)*/ + +/(\P{Yi}{0,3}\277)*/ + +/(\P{Yi}{0,3}?\277)*/ + +/(\p{Yi}{0,3}+\277)*/ + +/\p{Zl}{2,3}+/B,utf +------------------------------------------------------------------ + Bra + prop Zl {2} + prop Zl ?+ + Ket + End +------------------------------------------------------------------ + 

 + 0: \x{2028}\x{2028} + \x{2028}\x{2028}\x{2028} + 0: \x{2028}\x{2028}\x{2028} + +/\p{Zl}/B,utf +------------------------------------------------------------------ + Bra + prop Zl + Ket + End +------------------------------------------------------------------ + +/\p{Lu}{3}+/B,utf +------------------------------------------------------------------ + Bra + prop Lu {3} + Ket + End +------------------------------------------------------------------ + +/\pL{2}+/B,utf +------------------------------------------------------------------ + Bra + prop L {2} + Ket + End +------------------------------------------------------------------ + +/\p{Cc}{2}+/B,utf +------------------------------------------------------------------ + Bra + prop Cc {2} + Ket + End +------------------------------------------------------------------ + +/^\p{Cf}/utf + \x{180e} + 0: \x{180e} + \x{061c} + 0: \x{61c} + \x{2066} + 0: \x{2066} + \x{2067} + 0: \x{2067} + \x{2068} + 0: \x{2068} + \x{2069} + 0: \x{2069} + +/^\p{Cs}/utf + \x{dfff}\=no_utf_check + 0: \x{dfff} +\= Expect no match + \x{09f} +No match + +/^\p{Mn}/utf + \x{1a1b} + 0: \x{1a1b} + +/^\p{Pe}/utf + \x{2309} + 0: \x{2309} + \x{230b} + 0: \x{230b} + +/^\p{Ps}/utf + \x{2308} + 0: \x{2308} + \x{230a} + 0: \x{230a} + +/^\p{Sc}+/utf + $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} + 0: $\x{a2}\x{a3}\x{a4}\x{a5} + \x{9f2} + 0: \x{9f2} +\= Expect no match + X +No match + \x{2c2} +No match + +/^\p{Zs}/utf + \ \ + 0: + \x{a0} + 0: \x{a0} + \x{1680} + 0: \x{1680} + \x{2000} + 0: \x{2000} + \x{2001} + 0: \x{2001} +\= Expect no match + \x{2028} +No match + \x{200d} +No match + +# These are here because Perl has problems with the negative versions of the +# properties and has changed how it behaves for caseless matching. + +/\p{^Lu}/i,utf + 1234 + 0: 1 +\= Expect no match + ABC +No match + +/\P{Lu}/i,utf + 1234 + 0: 1 +\= Expect no match + ABC +No match + +/\p{Ll}/i,utf + a + 0: a + Az + 0: z +\= Expect no match + ABC +No match + +/\p{Lu}/i,utf + A + 0: A + a\x{10a0}B + 0: \x{10a0} +\= Expect no match + a +No match + \x{1d00} +No match + +/\p{Lu}/i,utf + A + 0: A + aZ + 0: Z +\= Expect no match + abc +No match + +/[\x{c0}\x{391}]/i,utf + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +# The next two are special cases where the lengths of the different cases of +# the same character differ. The first went wrong with heap frame storage; the +# second was broken in all cases. + +/^\x{023a}+?(\x{0130}+)/i,utf + \x{023a}\x{2c65}\x{0130} + 0: \x{23a}\x{2c65}\x{130} + 1: \x{130} + +/^\x{023a}+([^X])/i,utf + \x{023a}\x{2c65}X + 0: \x{23a}\x{2c65} + 1: \x{2c65} + +/\x{c0}+\x{116}+/i,utf + \x{c0}\x{e0}\x{116}\x{117} + 0: \x{c0}\x{e0}\x{116}\x{117} + +/[\x{c0}\x{116}]+/i,utf + \x{c0}\x{e0}\x{116}\x{117} + 0: \x{c0}\x{e0}\x{116}\x{117} + +/(\x{de})\1/i,utf + \x{de}\x{de} + 0: \x{de}\x{de} + 1: \x{de} + \x{de}\x{fe} + 0: \x{de}\x{fe} + 1: \x{de} + \x{fe}\x{fe} + 0: \x{fe}\x{fe} + 1: \x{fe} + \x{fe}\x{de} + 0: \x{fe}\x{de} + 1: \x{fe} + +/^\x{c0}$/i,utf + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +/^\x{e0}$/i,utf + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE +# will match it only with UCP support, because without that it has no notion +# of case for anything other than the ASCII letters. + +/((?i)[\x{c0}])/utf + \x{c0} + 0: \x{c0} + 1: \x{c0} + \x{e0} + 0: \x{e0} + 1: \x{e0} + +/(?i:[\x{c0}])/utf + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +# These are PCRE's extra properties to help with Unicodizing \d etc. + +/^\p{Xan}/utf + ABCD + 0: A + 1234 + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} +\= Expect no match + _ABC +No match + +/^\p{Xan}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} +\= Expect no match + _ABC +No match + +/^\p{Xan}+?/utf + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca} + +/^\p{Xan}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xan}{2,9}/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca} + +/^\p{Xan}{2,9}?/utf + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca}\x{a6c} + +/^[\p{Xan}]/utf + ABCD1234_ + 0: A + 1234abcd_ + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} +\= Expect no match + _ABC +No match + +/^[\p{Xan}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} +\= Expect no match + _ABC +No match + +/^>\p{Xsp}/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + >\x{a0} + 0: >\x{a0} +\= Expect no match + \x{0b} +No match + +/^>\p{Xsp}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}+?/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + +/^>\p{Xsp}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09} + +/^>[\p{Xsp}]/utf + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xsp}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + >\x{a0} + 0: >\x{a0} +\= Expect no match + \x{0b} +No match + +/^>\p{Xps}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + +/^>\p{Xps}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09} + +/^>[\p{Xps}]/utf + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xps}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/utf + ABCD + 0: A + 1234 + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ +\= Expect no match + [] +No match + +/^\p{Xwd}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/utf + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca} + +/^\p{Xwd}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/utf + A_B12\x{6ca}\x{a6c}\x{10a7} + 0: A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/utf + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca}\x{a6c} + +/^[\p{Xwd}]/utf + ABCD1234_ + 0: A + 1234abcd_ + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ +\= Expect no match + [] +No match + +/^[\p{Xwd}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +# A check not in UTF-8 mode + +/^[\p{Xwd}]+/ + ABCD1234_ + 0: ABCD1234_ + +# Some negative checks + +/^[\P{Xwd}]+/utf + !.+\x{019}\x{482}AB + 0: !.+\x{19}\x{482} + +/^[\p{^Xwd}]+/utf + !.+\x{019}\x{589}AB + 0: !.+\x{19}\x{589} + +/[\D]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{Nd}] + Ket + End +------------------------------------------------------------------ + 1\x{3c8}2 + 0: \x{3c8} + +/[\d]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ + >\x{6f4}< + 0: \x{6f4} + +/[\S]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{Xsp}] + Ket + End +------------------------------------------------------------------ + \x{1680}\x{6f4}\x{1680} + 0: \x{6f4} + +/[\s]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{Xsp}] + Ket + End +------------------------------------------------------------------ + >\x{1680}< + 0: \x{1680} + +/[\W]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{Xwd}] + Ket + End +------------------------------------------------------------------ + A\x{1735}B + 0: \x{1735} + +/[\w]/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{Xwd}] + Ket + End +------------------------------------------------------------------ + >\x{1723}< + 0: \x{1723} + +/\D/B,utf,ucp +------------------------------------------------------------------ + Bra + notprop Nd + Ket + End +------------------------------------------------------------------ + 1\x{3c8}2 + 0: \x{3c8} + +/\d/B,utf,ucp +------------------------------------------------------------------ + Bra + prop Nd + Ket + End +------------------------------------------------------------------ + >\x{6f4}< + 0: \x{6f4} + +/\S/B,utf,ucp +------------------------------------------------------------------ + Bra + notprop Xsp + Ket + End +------------------------------------------------------------------ + \x{1680}\x{6f4}\x{1680} + 0: \x{6f4} + +/\s/B,utf,ucp +------------------------------------------------------------------ + Bra + prop Xsp + Ket + End +------------------------------------------------------------------ + >\x{1680}> + 0: \x{1680} + +/\W/B,utf,ucp +------------------------------------------------------------------ + Bra + notprop Xwd + Ket + End +------------------------------------------------------------------ + A\x{1735}B + 0: \x{1735} + +/\w/B,utf,ucp +------------------------------------------------------------------ + Bra + prop Xwd + Ket + End +------------------------------------------------------------------ + >\x{1723}< + 0: \x{1723} + +/[[:alpha:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ + +/[[:lower:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Ll}] + Ket + End +------------------------------------------------------------------ + +/[[:upper:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Lu}] + Ket + End +------------------------------------------------------------------ + +/[[:alnum:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Xan}] + Ket + End +------------------------------------------------------------------ + +/[[:ascii:]]/B,ucp +------------------------------------------------------------------ + Bra + [\x00-\x7f] + Ket + End +------------------------------------------------------------------ + +/[[:cntrl:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Cc}] + Ket + End +------------------------------------------------------------------ + +/[[:digit:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ + +/[[:digit:]]/B,ucp,ascii_digit +------------------------------------------------------------------ + Bra + [0-9] + Ket + End +------------------------------------------------------------------ + +/[[:graph:]]/B,ucp +------------------------------------------------------------------ + Bra + [[:graph:]] + Ket + End +------------------------------------------------------------------ + +/[[:print:]]/B,ucp +------------------------------------------------------------------ + Bra + [[:print:]] + Ket + End +------------------------------------------------------------------ + +/[[:punct:]]/B,ucp +------------------------------------------------------------------ + Bra + [[:punct:]] + Ket + End +------------------------------------------------------------------ + +/[[:space:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Xps}] + Ket + End +------------------------------------------------------------------ + +/[[:word:]]/B,ucp +------------------------------------------------------------------ + Bra + [\p{Xwd}] + Ket + End +------------------------------------------------------------------ + +/[[:xdigit:]]/B,ucp +------------------------------------------------------------------ + Bra + [[:xdigit:]] + Ket + End +------------------------------------------------------------------ + +/[[:xdigit:]]/B,ucp,ascii_digit +------------------------------------------------------------------ + Bra + [0-9A-Fa-f] + Ket + End +------------------------------------------------------------------ + +# Unicode properties for \b and \B + +/\b...\B/utf,ucp + abc_ + 0: abc + \x{37e}abc\x{376} + 0: abc + \x{37e}\x{376}\x{371}\x{393}\x{394} + 0: \x{376}\x{371}\x{393} + !\x{c0}++\x{c1}\x{c2} + 0: ++\x{c1} + !\x{c0}+++++ + 0: \x{c0}++ + +# Without PCRE_UCP, non-ASCII always fail, even if < 256 + +/\b...\B/utf + abc_ + 0: abc +\= Expect no match + \x{37e}abc\x{376} +No match + \x{37e}\x{376}\x{371}\x{393}\x{394} +No match + !\x{c0}++\x{c1}\x{c2} +No match + !\x{c0}+++++ +No match + +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties + +/\b...\B/ucp + abc_ + 0: abc + !\x{c0}++\x{c1}\x{c2} + 0: ++\xc1 + !\x{c0}+++++ + 0: \xc0++ + +# Some of these are silly, but they check various combinations + +/[[:^alpha:][:^cntrl:]]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{L}\P{Cc}]++ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[[:^cntrl:][:^alpha:]]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{Cc}\P{L}]++ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[[:alpha:]]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\p{L}]++ + Ket + End +------------------------------------------------------------------ + abc + 0: abc + +/[[:^alpha:]\S]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [\P{L}\P{Xsp}]++ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[^\d]+/B,utf,ucp +------------------------------------------------------------------ + Bra + [^\p{Nd}]++ + Ket + End +------------------------------------------------------------------ + abc123 + 0: abc + abc\x{123} + 0: abc\x{123} + \x{660}abc + 0: abc + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B +------------------------------------------------------------------ + Bra + prop Lu ++ + 9 + prop Lu + + B + prop Lu ++ + b + Ket + End +------------------------------------------------------------------ + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B +------------------------------------------------------------------ + Bra + notprop Lu + + 9 + notprop Lu ++ + B + notprop Lu + + b + Ket + End +------------------------------------------------------------------ + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B +------------------------------------------------------------------ + Bra + notprop Lu + + 9 + notprop Lu ++ + B + notprop Lu + + b + Ket + End +------------------------------------------------------------------ + +/\p{Han}+X\p{Greek}+\x{370}/B,utf +------------------------------------------------------------------ + Bra + prop Han ++ + X + prop Greek + + \x{370} + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+!\p{Xan}+A/B +------------------------------------------------------------------ + Bra + prop Xan ++ + ! + prop Xan + + A + Ket + End +------------------------------------------------------------------ + +/\p{Xsp}+!\p{Xsp}\t/B +------------------------------------------------------------------ + Bra + prop Xsp ++ + ! + prop Xsp + \x09 + Ket + End +------------------------------------------------------------------ + +/\p{Xps}+!\p{Xps}\t/B +------------------------------------------------------------------ + Bra + prop Xps ++ + ! + prop Xps + \x09 + Ket + End +------------------------------------------------------------------ + +/\p{Xwd}+!\p{Xwd}_/B +------------------------------------------------------------------ + Bra + prop Xwd ++ + ! + prop Xwd + _ + Ket + End +------------------------------------------------------------------ + +/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp +------------------------------------------------------------------ + Bra + A++ + prop N + A++ + prop Nd + B+ + prop N *+ + B++ + prop Nd *+ + Ket + End +------------------------------------------------------------------ + +# These behaved oddly in Perl, so they are kept in this test + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} +No match + +/(ȺȺȺ)?\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ +No match + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(ȺȺȺ)?\1/i,utf + ȺȺȺⱥⱥⱥ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(\x{23a}\x{23a}\x{23a})\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} +No match + +/(ȺȺȺ)\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ +No match + +/(\x{23a}\x{23a}\x{23a})\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(ȺȺȺ)\1/i,utf + ȺȺȺⱥⱥⱥ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(\x{2c65}\x{2c65})\1/i,utf + \x{2c65}\x{2c65}\x{23a}\x{23a} + 0: \x{2c65}\x{2c65}\x{23a}\x{23a} + 1: \x{2c65}\x{2c65} + +/(ⱥⱥ)\1/i,utf + ⱥⱥȺȺ + 0: \x{2c65}\x{2c65}\x{23a}\x{23a} + 1: \x{2c65}\x{2c65} + +/(\x{23a}\x{23a}\x{23a})\1Y/i,utf + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y + 1: \x{23a}\x{23a}\x{23a} + +/(\x{2c65}\x{2c65})\1Y/i,utf + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y + 1: \x{2c65}\x{2c65} + +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE + +/^[\p{Batak}]/utf + \x{1bc0} + 0: \x{1bc0} + \x{1bff} + 0: \x{1bff} +\= Expect no match + \x{1bf4} +No match + +/^[\p{Brahmi}]/utf + \x{11000} + 0: \x{11000} + \x{1106f} + 0: \x{1106f} +\= Expect no match + \x{1104e} +No match + +/^[\p{Mandaic}]/utf + \x{840} + 0: \x{840} + \x{85e} + 0: \x{85e} +\= Expect no match + \x{85c} +No match + \x{85d} +No match + +/(\X*)(.)/s,utf + A\x{300} + 0: A + 1: + 2: A + +/^S(\X*)e(\X*)$/utf + Stéréo + 0: Ste\x{301}re\x{301}o + 1: te\x{301}r + 2: \x{301}o + +/^\X/utf + ́réo + 0: \x{301} + +/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aX41z + 0: aX41z +\= Expect no match + aAz +No match + +/\X/ + a\=ps + 0: a + a\=ph +Partial match: a + +/\Xa/ + aa\=ps + 0: aa + aa\=ph + 0: aa + +/\X{2}/ + aa\=ps + 0: aa + aa\=ph +Partial match: aa + +/\X+a/ + a\=ps +Partial match: a + aa\=ps + 0: aa + aa\=ph +Partial match: aa + +/\X+?a/ + a\=ps +Partial match: a + ab\=ps +Partial match: ab + aa\=ps + 0: aa + aa\=ph + 0: aa + aba\=ps + 0: aba + +# These Unicode 6.1.0 scripts are not known to Perl. + +/\p{Chakma}\d/utf,ucp + \x{11100}\x{1113c} + 0: \x{11100}\x{1113c} + +/\p{Takri}\d/utf,ucp + \x{11680}\x{116c0} + 0: \x{11680}\x{116c0} + +/^\X/utf + A\=ps + 0: A + A\=ph +Partial match: A + A\x{300}\x{301}\=ps + 0: A\x{300}\x{301} + A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301} + A\x{301}\=ps + 0: A\x{301} + A\x{301}\=ph +Partial match: A\x{301} + +/^\X{2,3}/utf + A\=ps +Partial match: A + A\=ph +Partial match: A + AA\=ps + 0: AA + AA\=ph +Partial match: AA + A\x{300}\x{301}\=ps +Partial match: A\x{300}\x{301} + A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ps + 0: A\x{300}\x{301}A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301}A\x{300}\x{301} + +/^\X{2}/utf + AA\=ps + 0: AA + AA\=ph +Partial match: AA + A\x{300}\x{301}A\x{300}\x{301}\=ps + 0: A\x{300}\x{301}A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301}A\x{300}\x{301} + +/^\X+/utf + AA\=ps + 0: AA + AA\=ph +Partial match: AA + +/^\X+?Z/utf + AA\=ps +Partial match: AA + AA\=ph +Partial match: AA + +/A\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + /i A + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless utf +First code unit = 'A' (caseless) +Last code unit = 'B' (caseless) +Subject length lower bound = 3 + +/[\x{3a3}]/Bi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/[^\x{3a3}]/Bi,utf +------------------------------------------------------------------ + Bra + not clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/[\x{3a3}]+/Bi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 ++ + Ket + End +------------------------------------------------------------------ + +/[^\x{3a3}]+/Bi,utf +------------------------------------------------------------------ + Bra + not clist 03a3 03c2 03c3 ++ + Ket + End +------------------------------------------------------------------ + +/a*\x{3a3}/Bi,utf +------------------------------------------------------------------ + Bra + /i a*+ + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/\x{3a3}+a/Bi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 ++ + /i a + Ket + End +------------------------------------------------------------------ + +/\x{3a3}*\x{3c2}/Bi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 * + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/\x{3a3}{3}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + 0: \x{3a3}\x{3c3}\x{3c2} + 0+ \x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3} + 0+ \x{3c3}\x{3c2} + +/\x{3a3}{2,4}?/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + 0: \x{3a3}\x{3c3} + 0+ \x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}+./i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + 0+ + +/\x{3a3}++./i,utf,aftertext +\= Expect no match + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} +No match + +/\x{3a3}*\x{3c2}/Bi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 * + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/[^\x{3a3}]*\x{3c2}/Bi,utf +------------------------------------------------------------------ + Bra + not clist 03a3 03c2 03c3 *+ + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/[^a]*\x{3c2}/Bi,utf +------------------------------------------------------------------ + Bra + /i [^a]* + clist 03a3 03c2 03c3 + Ket + End +------------------------------------------------------------------ + +/ist/Bi,utf +------------------------------------------------------------------ + Bra + /i i + clist 0053 0073 017f + /i t + Ket + End +------------------------------------------------------------------ +\= Expect no match + ikt +No match + +/is+t/i,utf + iSs\x{17f}t + 0: iSs\x{17f}t +\= Expect no match + ikt +No match + +/is+?t/i,utf +\= Expect no match + ikt +No match + +/is?t/i,utf +\= Expect no match + ikt +No match + +/is{2}t/i,utf +\= Expect no match + iskt +No match + +# This property is a PCRE special + +/^\p{Xuc}/utf + $abc + 0: $ + @abc + 0: @ + `abc + 0: ` + \x{1234}abc + 0: \x{1234} +\= Expect no match + abc +No match + +/^\p{Xuc}+/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}+?/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $ +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}+?\*/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000}* +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}++/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}{3,5}/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}{3,5}?/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@` +\= Expect no match + \x{9f} +No match + +/^[\p{Xuc}]/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $ +\= Expect no match + \x{9f} +No match + +/^[\p{Xuc}]+/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\P{Xuc}/utf + abc + 0: a +\= Expect no match + $abc +No match + @abc +No match + `abc +No match + \x{1234}abc +No match + +/^[\P{Xuc}]/utf + abc + 0: a +\= Expect no match + $abc +No match + @abc +No match + `abc +No match + \x{1234}abc +No match + +# Some auto-possessification tests + +/\pN+\z/B +------------------------------------------------------------------ + Bra + prop N ++ + \z + Ket + End +------------------------------------------------------------------ + +/\PN+\z/B +------------------------------------------------------------------ + Bra + notprop N ++ + \z + Ket + End +------------------------------------------------------------------ + +/\pN+/B +------------------------------------------------------------------ + Bra + prop N ++ + Ket + End +------------------------------------------------------------------ + +/\PN+/B +------------------------------------------------------------------ + Bra + notprop N ++ + Ket + End +------------------------------------------------------------------ + +/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + AllAny+ + AllAny + AllAny+ + notprop Any + AllAny+ + prop Lc + AllAny+ + prop L + AllAny+ + prop Lu + AllAny+ + prop Han + AllAny+ + prop Xan + AllAny+ + prop Xsp + AllAny+ + prop Xps + prop Xwd + + AllAny + AllAny+ + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Lc + + AllAny + prop Lc + + prop Lc + notprop Lc ++ + prop Lc + prop Lc + + prop L + prop Lc + + prop Lu + prop Lc + + prop Han + prop Lc + + prop Xan + prop Lc ++ + notprop Xan + prop Lc ++ + prop Xsp + prop Lc ++ + prop Xps + prop Xwd + + prop Lc + prop Lc + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop N + + AllAny + prop N + + prop Lc + prop N ++ + prop L + prop N + + notprop L + prop N ++ + notprop N + prop N ++ + prop Lu + prop N + + prop Han + prop N + + prop Xan + prop N ++ + prop Xsp + prop N ++ + prop Xps + prop Xwd + + prop N + prop N + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Lu + + AllAny + prop Lu + + prop Lc + prop Lu + + prop L + prop Lu + + prop Lu + notprop Lu ++ + prop Lu + prop Lu ++ + prop Nd + prop Lu + + notprop Nd + prop Lu + + prop Han + prop Lu + + prop Xan + prop Lu ++ + prop Xsp + prop Lu ++ + prop Xps + prop Xwd + + prop Lu + prop Lu + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Han + + prop Lu + prop Han + + prop Lc + prop Han + + prop L + prop Han + + prop Lu + prop Han ++ + prop Arabic + prop Arabic + + prop Arabic + prop Han + + prop Xan + prop Han + + prop Xsp + prop Han + + prop Xps + prop Xwd + + prop Han + prop Han + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xan + + AllAny + prop Xan + + prop Lc + notprop Xan ++ + prop Lc + prop Xan + + prop L + prop Xan + + prop Lu + prop Xan + + prop Han + prop Xan + + prop Xan + prop Xan ++ + notprop Xan + prop Xan ++ + prop Xsp + prop Xan ++ + prop Xps + prop Xwd + + prop Xan + prop Xan + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xsp + + AllAny + prop Xsp ++ + prop Lc + prop Xsp ++ + prop L + prop Xsp ++ + prop Lu + prop Xsp + + prop Han + prop Xsp ++ + prop Xan + prop Xsp + + prop Xsp + notprop Xsp ++ + prop Xsp + prop Xsp + + prop Xps + prop Xwd ++ + prop Xsp + prop Xsp + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xwd + + AllAny + prop Xwd + + prop Lc + prop Xwd + + prop L + prop Xwd + + prop Lu + prop Xwd + + prop Han + prop Xwd + + prop Xan + prop Xwd ++ + prop Xsp + prop Xwd ++ + prop Xps + prop Xwd + + prop Xwd + prop Xwd ++ + notprop Xwd + prop Xwd + + prop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xuc + + AllAny + prop Xuc + + prop Lc + prop Xuc + + prop L + prop Xuc + + prop Lu + prop Xuc + + prop Han + prop Xuc + + prop Xan + prop Xuc + + prop Xsp + prop Xuc + + prop Xps + prop Xwd + + prop Xuc + prop Xuc + + prop Xuc + prop Xuc ++ + notprop Xuc + Ket + End +------------------------------------------------------------------ + +/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp +------------------------------------------------------------------ + Bra + prop N ++ + prop Ll + prop N + + prop Nd + prop N + + notprop Nd + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xan + + prop L + prop Xan + + prop N + prop Xan ++ + prop C + prop Xan + + notprop L + notprop Xan ++ + prop N + prop Xan + + notprop C + Ket + End +------------------------------------------------------------------ + +/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp +------------------------------------------------------------------ + Bra + prop L + + prop Xan + prop N + + prop Xan + prop C ++ + prop Xan + notprop L + + prop Xan + prop N + + prop Xan + notprop C + + prop Xan + prop L ++ + notprop Xan + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xan + + prop Lu + prop Xan + + prop Nd + prop Xan ++ + prop Cc + prop Xan + + notprop Ll + notprop Xan ++ + prop No + prop Xan + + notprop Cf + Ket + End +------------------------------------------------------------------ + +/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Lu + + prop Xan + prop Nd + + prop Xan + prop Cs ++ + prop Xan + notprop Lt + + prop Xan + prop Nl + + prop Xan + notprop Cc + + prop Xan + prop Lt ++ + notprop Xan + Ket + End +------------------------------------------------------------------ + +/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xwd + + prop P + prop Xwd + + prop Po + prop Xwd ++ + prop Xsp + prop Xan ++ + prop Xsp + prop Xsp ++ + prop Xan + prop Xsp ++ + prop Xwd + Ket + End +------------------------------------------------------------------ + +/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xwd + + notprop P + notprop Xwd + + prop Po + prop Xwd + + notprop Xsp + notprop Xan + + prop Xsp + prop Xsp + + notprop Xan + prop Xsp + + notprop Xwd + Ket + End +------------------------------------------------------------------ + +/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xwd + + prop Po + prop Xwd ++ + prop Pc + notprop Xwd + + prop Po + notprop Xwd + + prop Pc + prop Xwd + + notprop Po + prop Xwd + + notprop Pc + Ket + End +------------------------------------------------------------------ + +/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Nl + + prop Xan + notprop Nl + + prop Xan + prop Nl ++ + notprop Xan + notprop Nl + + notprop Xan + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xan + + prop Nl + notprop Xan ++ + prop Nl + prop Xan + + notprop Nl + notprop Xan + + notprop Nl + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp +------------------------------------------------------------------ + Bra + prop Xan + + prop Nd + notprop Xan ++ + prop Nd + prop Xan + + notprop Nd + notprop Xan + + notprop Nd + Ket + End +------------------------------------------------------------------ + +# End auto-possessification tests + +/\w+/B,utf,ucp,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 3 + prop Xwd ++ + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ + abcd +--->abcd + +0 ^ \w+ + +3 ^ ^ End of pattern + 0: abcd + +/[\p{N}]?+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [\p{N}]?+ + Ket + End +------------------------------------------------------------------ + +/[\p{L}ab]{2,3}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [ab\p{L}]{2,3}+ + Ket + End +------------------------------------------------------------------ + +/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx +------------------------------------------------------------------ + Bra + \D+ + extuni + \d+ + extuni + \S+ + extuni + \s+ + extuni + \W+ + extuni + \w+ + extuni + \R+ + extuni + \H+ + extuni + \h+ + extuni + \V+ + extuni + \v+ + extuni + a+ + extuni + \x0a+ + extuni + Any+ + extuni + Ket + End +------------------------------------------------------------------ + +/.+\X/Bsx +------------------------------------------------------------------ + Bra + AllAny+ + extuni + Ket + End +------------------------------------------------------------------ + +/\X+$/Bmx +------------------------------------------------------------------ + Bra + extuni+ + /m $ + Ket + End +------------------------------------------------------------------ + +/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx +------------------------------------------------------------------ + Bra + extuni+ + \D + extuni+ + \d + extuni+ + \S + extuni+ + \s + extuni+ + \W + extuni+ + \w + extuni+ + Any + extuni+ + \R + extuni+ + \H + extuni+ + \h + extuni+ + \V + extuni+ + \v + extuni+ + extuni + extuni+ + \Z + extuni++ + \z + extuni+ + $ + Ket + End +------------------------------------------------------------------ + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp +------------------------------------------------------------------ + Bra + prop Nd ++ + prop Xsp {0,5}+ + = + prop Xsp *+ + notprop Xsp ? + = + prop Xwd {0,4}+ + notprop Xwd *+ + Ket + End +------------------------------------------------------------------ + +/[RST]+/Bi,utf,ucp +------------------------------------------------------------------ + Bra + [R-Tr-t\x{17f}]++ + Ket + End +------------------------------------------------------------------ + +/[R-T]+/Bi,utf,ucp +------------------------------------------------------------------ + Bra + [R-Tr-t\x{17f}]++ + Ket + End +------------------------------------------------------------------ + +/[Q-U]+/Bi,utf,ucp +------------------------------------------------------------------ + Bra + [Q-Uq-u\x{17f}]++ + Ket + End +------------------------------------------------------------------ + +/^s?c/Iim,utf +Capture group count = 0 +Options: caseless multiline utf +First code unit at start or follows newline +Last code unit = 'c' (caseless) +Subject length lower bound = 1 + scat + 0: sc + +/\X?abc/utf,no_start_optimize + \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + 0: A\x{300}abc + +/\x{100}\x{200}\K\x{300}/utf,startchar + \x{100}\x{200}\x{300} + 0: \x{100}\x{200}\x{300} + ^^^^^^^^^^^^^^ + +# Test UTF characters in a substitution + +/ábc/utf,replace=XሴZ + 123ábc123 + 1: 123X\x{1234}Z123 + +/(?<=abc)(|def)/g,utf,replace=<$0> + 123abcáyzabcdef789abcሴqr + 4: 123abc<>\x{e1}yzabc<>789abc<>\x{1234}qr + +/[A-`]/iB,utf +------------------------------------------------------------------ + Bra + [A-z\x{212a}\x{17f}] + Ket + End +------------------------------------------------------------------ + abcdefghijklmno + 0: a + +/(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f} + 0: \x{17f} + 0+ + +/(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f}\x{17f} + 0: \x{17f} + 0+ \x{17f} + 0: \x{17f} + 0+ + +"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?())(?))(?\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" +Failed: error 122 at offset 1227: unmatched closing parenthesis + +/$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ + +"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" +Failed: error 162 at offset 113: subpattern name expected + +/[\pS#moq]/ + = + 0: = + +/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark + cxxxz + 0: xxx +MK: a\x{12345}b\x{09}(d)c + +/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended + abcd + 1: x\x{824}y\x{6db}z(12\$34$$\x345$) + +/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended + a\x{e0}\x{101}\x{c0}\x{102} + 1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done + +/((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> + ab12cde + 7: + +/(*UCP)(*UTF)[[:>:]]X/B +------------------------------------------------------------------ + Bra + \b (ucp) + Assert back + Reverse + prop Xwd + Ket + X + Ket + End +------------------------------------------------------------------ + +/abc/utf,replace=xyz + abc\=zero_terminate + 1: xyz + +/a[[:punct:]b]/ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[[:punct:]b]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/[[:^ascii:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/[[:^ascii:]\w]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[\w[:^ascii:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] + Ket + End +------------------------------------------------------------------ + +/[^[:ascii:]\W]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [^\x00-\x7f\P{Xwd}] + Ket + End +------------------------------------------------------------------ + \x{de} + 0: \x{de} + \x{200} + 0: \x{200} +\= Expect no match + \x{589} +No match + \x{37e} +No match + +/[[:^ascii:]a]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + [a\x80-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/L(?#(|++\x{0a}\x{123}\x{123}\x{123}\x{123} + +0 ^ . + +0 ^ . + +1 ^ ^ . + +2 ^ ^ End of pattern + 0: \x{123}\x{123} + +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + +# These three test a bug fix that was not clearing up after a locale setting +# when the test or a subsequent one matched a wide character. + +//locale=C + +/[\P{Yi}]/utf +\x{2f000} + 0: \x{2f000} + +/[\P{Yi}]/utf,locale=C +\x{2f000} + 0: \x{2f000} + +/^(? +Overall options: anchored +Last code unit = 'z' +Subject length lower bound = 3 + +/(|ß)7/caseless,ucp + +/(\xc1)\1/i,ucp + \xc1\xe1\=no_jit + 0: \xc1\xe1 + 1: \xc1 + +/\p{L&}+\p{bidi_control}/B +------------------------------------------------------------------ + Bra + prop Lc + + prop Bidicontrol + Ket + End +------------------------------------------------------------------ + +/\p{bidi_control}+\p{L&}/B +------------------------------------------------------------------ + Bra + prop Bidicontrol + + prop Lc + Ket + End +------------------------------------------------------------------ + +/\p{han}/B +------------------------------------------------------------------ + Bra + prop Han + Ket + End +------------------------------------------------------------------ + +/\p{script:han}/B +------------------------------------------------------------------ + Bra + prop script:Han + Ket + End +------------------------------------------------------------------ + +/\p{sc:han}/B +------------------------------------------------------------------ + Bra + prop script:Han + Ket + End +------------------------------------------------------------------ + +/\p{script extensions:han}/B +------------------------------------------------------------------ + Bra + prop Han + Ket + End +------------------------------------------------------------------ + +/\p{scx:han}/B +------------------------------------------------------------------ + Bra + prop Han + Ket + End +------------------------------------------------------------------ + +# Test error - invalid script name + +/\p{sc:L}/ +Failed: error 147 at offset 8: unknown property after \P or \p + +# Some Boolean property tests that differ from Perl + +/\p{emojimodifierbase}\p{ebase}/g,utf + >AN<>\x{261d}\x{1faf6}<>yz< + 0: \x{261d}\x{1faf6} + +/\p{graphemelink}\p{grlink}/g,utf + >AN<>\x{11d97}\x{94d}<>yz< + 0: \x{11d97}\x{94d} + +/\p{soft dotted}\p{sd}/g,utf + >AF23<>\x{1df1a}\x{69}<>yz< + 0: \x{1df1a}i + +# ------------------------------------------------ + +/\p{\2b[:xäigi:t:_/ +Failed: error 146 at offset 17: malformed \P or \p sequence + +# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without +# the restriction. + +/AskZ/i,utf,caseless_restrict + AskZ + 0: AskZ + aSKz + 0: aSKz +\= Expect no match + A\x{17f}kZ +No match + As\x{212a}Z +No match + +/AskZ/i,utf + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A\x{17f}kZ + As\x{212a}Z + 0: As\x{212a}Z + +/A\x{17f}\x{212a}Z/ir,utf + \= Expect no match + AskZ +No match + +/A\x{17f}\x{212a}Z/i,utf + AskZ + 0: AskZ + +/[AskZ]+/i,utf,caseless_restrict + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A + As\x{212a}Z + 0: As + +/[AskZ]+/i,utf + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A\x{17f}kZ + As\x{212a}Z + 0: As\x{212a}Z + +/[\x{17f}\x{212a}]+/ir,utf +\= Expect no match + AskZ +No match + +/[\x{17f}\x{212a}]+/i,utf + AskZ + 0: sk + +/[^s]+/ir,utf + A\x{17f}Z + 0: A\x{17f}Z + +/[^s]+/i,utf + A\x{17f}Z + 0: A + +/[^k]+/ir,utf + A\x{212a}Z + 0: A\x{212a}Z + +/[^k]+/i,utf + A\x{212a}Z + 0: A + +/[^sk]+/ir,utf + A\x{17f}\x{212a}Z + 0: A\x{17f}\x{212a}Z + +/[^sk]+/i,utf + A\x{17f}\x{212a}Z + 0: A + +/[^\x{17f}]+/ir,utf + AsSZ + 0: AsSZ + +/[^\x{17f}]+/i,utf + AsSZ + 0: A + +/[Ss]+/irB,utf +------------------------------------------------------------------ + Bra + /i S++ + Ket + End +------------------------------------------------------------------ + Sss\x{17f}ss + 0: Sss + +/[Ss]+/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}]++ + Ket + End +------------------------------------------------------------------ + Sss\x{17f}ss + 0: Sss\x{17f}ss + +/[S\x{17f}]/irB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[S\x{17f}]/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{17f}s]/irB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{17f}s]/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{4b}\x{6b}]/irB,utf +------------------------------------------------------------------ + Bra + /i K + Ket + End +------------------------------------------------------------------ + +/[\x{4b}\x{6b}]/iB,utf +------------------------------------------------------------------ + Bra + [Kk\x{212a}\x{212a}] + Ket + End +------------------------------------------------------------------ + +/s(?r)s(?-r)s(?r:s)s/i,utf + \x{17f}S\x{17f}S\x{17f} + 0: \x{17f}S\x{17f}S\x{17f} +\= Expect no match + \x{17f}\x{17f}\x{17f}S\x{17f} +No match + \x{17f}S\x{17f}\x{17f}\x{17f} +No match + +/k(?^i)k/ir,utf + K\x{212a} + 0: K\x{212a} +\= Expect no match + \x{212a}\x{212a} +No match + +# End caseless restrict tests + +# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. + +# DIGITS + +/\d+/i,utf + 123\x{660}456 + 0: 123 + +/\d+/i,utf,ucp + 123\x{660}456 + 0: 123\x{660}456 + +/\d+/i,utf,ucp,ascii_bsd + 123\x{660}456 + 0: 123 + +/[\d]+/i,utf + 123\x{660}456 + 0: 123 + +/[\d]+/i,utf,ucp + 123\x{660}456 + 0: 123\x{660}456 + +/[\d]+/i,utf,ucp,ascii_bsd + 123\x{660}456 + 0: 123 + +/\d(?aD)\d(?-aD)\d/utf,ucp + \x{660}9\x{660} + 0: \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 0: 999 + 9\x{660}9 + 0: 9\x{660}9 + +/\d(?a)\d(?-a)\d/utf,ucp + \x{660}9\x{660} + 0: \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 0: 999 + 9\x{660}9 + 0: 9\x{660}9 + +# SPACES + +/>\s+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>\s+ < + 0: > < + >\x{a0} < + 0: >\x{a0} < + +/>\s+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>[\s]+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>[\s]+ < + 0: > < + >\x{a0} < + 0: >\x{a0} < + +/>[\s]+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< +No match + +/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< +No match + +# WORDS + +/\w+/i,utf + 123\x{660}abc + 0: 123 + +/\w+/i,utf,ucp + 123\x{660}abc + 0: 123\x{660}abc + +/\w+/i,utf,ucp,ascii_bsw + 123\x{660}abc + 0: 123 + +/[\w]+/i,utf + 123\x{660}abc + 0: 123 + +/[\w]+/i,utf,ucp + 123\x{660}abc + 0: 123\x{660}abc + +/[\w]+/i,utf,ucp,ascii_bsw + 123\x{660}abc + 0: 123 + +/\w(?aW)\w(?-aW)\w/utf,ucp + \x{660}A\x{c0} + 0: \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} +No match + +/\w(?a)\w(?-a)\w/utf,ucp + \x{660}A\x{c0} + 0: \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} +No match + +# WORD BOUNDARY + +/\bABC\b/utf + \x{c0}ABC\x{d0} + 0: ABC + +/\bABC\b/utf,ucp +\= Expect no match + \x{c0}ABC\x{d0} +No match + +/\bABC\b/utf,ucp,ascii_bsw + \x{c0}ABC\x{d0} + 0: ABC + +/\bABC\b/utf,ucp,ascii_all + \x{c0}ABC\x{d0} + 0: ABC + +# POSIX + +/^[[:digit:]]+$/utf,ucp + 123456 + 0: 123456 + 123\x{660}456 + 0: 123\x{660}456 + +/^[[:digit:]]+$/utf,ucp,ascii_digit + 123456 + 0: 123456 +\= Expect no match + 123\x{660}456 +No match + +/[[:digit:]]+/g,utf,ucp,ascii_digit + 123\x{660}456 + 0: 123 + 0: 456 + +/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit + 11 + 0: 11 +\= Expect no match + \x{ff11}1 +No match + 1\x{ff11} +No match + +/[[:digit:]]+/utf,ucp,ascii_posix + 123\x{660}456 + 0: 123 + +/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/^[[:xdigit:]]+$/utf,ucp + f0 + 0: f0 + 1A + 0: 1A + d\x{ff10} + 0: d\x{ff10} + \x{ff26}8 + 0: \x{ff26}8 +\= Expect no match + 8g\=no_jit +No match + +/^[[:xdigit:]]+$/utf,ucp,ascii_digit + f0 + 0: f0 + 1A + 0: 1A +\= Expect no match + d\x{ff10} +No match + \x{ff26}8 +No match + 8g +No match + +/>[[:space:]]+\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< + >\x{a0}\x{a0}\x{a0}< + 0: >\x{a0}\x{a0}\x{a0}< + +/>[[:space:]]+\x{a0} \x{a0}< +No match + +/(?aP)[[:alnum:]]+/i,ucp,utf + abcáxyz + 0: abc + abc\x{660}xyz + 0: abc + +/(?aP)[[:alnum:]\d]+/i,ucp,utf + abc\x{660}xyz + 0: abc\x{660}xyz + +/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ + \x{660}A\x{660} + 0: \x{660}A\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +# VARIOUS + +/[\d\s\w]+/a,ucp,utf + 9 A\x{660}À + 0: 9 A + 9 AÀ\x{660} + 0: 9 A + +# End PCRE2_EXTRA_ASCII_xxx tests + +/(?abc>([^()]|\((?1)*\))*abc>123abc>123abc>1(2)3abc>1(2)3abc>(1(2)3)abc>(1(2)3)a*)\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9876 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa9 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/x + <> + 0: <> + + 0: + hij> + 0: hij> + hij> + 0: + def> + 0: def> + + 0: <> +\= Expect no match + abcxyz + 1 ^ ^ x + 0: abcxyz + 123abcxyz999 +--->123abcxyz999 + 1 ^ ^ x + 0: abcxyz + +/(ab|cd){3,4}/auto_callout + ababab +--->ababab + +0 ^ ( + +1 ^ a + +4 ^ c + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +2 ^ ^ b + +3 ^ ^ | ++12 ^ ^ End of pattern + +1 ^ ^ a + +4 ^ ^ c + 0: ababab + abcdabcd +--->abcdabcd + +0 ^ ( + +1 ^ a + +4 ^ c + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +2 ^ ^ b + +3 ^ ^ | ++12 ^ ^ End of pattern + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdabcd + 1: abcdab + abcdcdcdcdcd +--->abcdcdcdcdcd + +0 ^ ( + +1 ^ a + +4 ^ c + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdcdcd + 1: abcdcd + +/^abc/ + abcdef + 0: abc +\= Expect no match + abcdef\=notbol +No match + +/^(a*|xyz)/ + bcd + 0: + aaabcd + 0: aaa + xyz + 0: xyz + 1: + xyz\=notempty + 0: xyz +\= Expect no match + bcd\=notempty +No match + +/xyz$/ + xyz + 0: xyz + xyz\n + 0: xyz +\= Expect no match + xyz\=noteol +No match + xyz\n\=noteol +No match + +/xyz$/m + xyz + 0: xyz + xyz\n + 0: xyz + abcxyz\npqr + 0: xyz + abcxyz\npqr\=noteol + 0: xyz + xyz\n\=noteol + 0: xyz +\= Expect no match + xyz\=noteol +No match + +/\Gabc/ + abcdef + 0: abc + defabcxyz\=offset=3 + 0: abc +\= Expect no match + defabcxyz +No match + +/^abcdef/ + ab\=ps +Partial match: ab + abcde\=ps +Partial match: abcde + abcdef\=ps + 0: abcdef +\= Expect no match + abx\=ps +No match + +/^a{2,4}\d+z/ + a\=ps +Partial match: a + aa\=ps +Partial match: aa + aa2\=ps +Partial match: aa2 + aaa\=ps +Partial match: aaa + aaa23\=ps +Partial match: aaa23 + aaaa12345\=ps +Partial match: aaaa12345 + aa0z\=ps + 0: aa0z + aaaa4444444444444z\=ps + 0: aaaa4444444444444z +\= Expect no match + az\=ps +No match + aaaaa\=ps +No match + a56\=ps +No match + +/^abcdef/ + abc\=ps +Partial match: abc + def\=dfa_restart + 0: def + +/(?<=foo)bar/ + foob\=ps,offset=2,allusedtext +Partial match: foob + <<< + foobar...\=ps,dfa_restart,offset=4 + 0: ar + foobar\=offset=2 + 0: bar +\= Expect no match + xyzfo\=ps +No match + obar\=dfa_restart +No match + +/(ab*(cd|ef))+X/ + lkjhlkjhlkjhlkjhabbbbbbcdaefabbbbbbbefa\=ps,notbol,noteol +Partial match: abbbbbbcdaefabbbbbbbefa + cdabbbbbbbb\=ps,notbol,dfa_restart,noteol +Partial match: cdabbbbbbbb + efabbbbbbbbbbbbbbbb\=ps,notbol,dfa_restart,noteol +Partial match: efabbbbbbbbbbbbbbbb + bbbbbbbbbbbbcdXyasdfadf\=ps,notbol,dfa_restart,noteol + 0: bbbbbbbbbbbbcdX +\= Expect no match + adfadadaklhlkalkajhlkjahdfasdfasdfladsfjkj\=ps,noteol +No match + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox + What do you know about the quick brown fox? + 0: the quick brown fox +\= Expect no match + The quick brown FOX +No match + What do you know about THE QUICK BROWN FOX? +No match + +/The quick brown fox/i + the quick brown fox + 0: the quick brown fox + The quick brown FOX + 0: The quick brown FOX + What do you know about the quick brown fox? + 0: the quick brown fox + What do you know about THE QUICK BROWN FOX? + 0: THE QUICK BROWN FOX + +/abcd\t\n\r\f\a\e\071\x3b\$\\\?caxyz/ + abcd\t\n\r\f\a\e9;\$\\?caxyz + 0: abcd\x09\x0a\x0d\x0c\x07\x1b9;$\?caxyz + +/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/ + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + abxyzpqrrrabbxyyyypqAzz + 0: abxyzpqrrrabbxyyyypqAzz + aabxyzpqrrrabbxyyyypqAzz + 0: aabxyzpqrrrabbxyyyypqAzz + aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz + aabcxyzpqrrrabbxyyyypqAzz + 0: aabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypAzz + 0: aaabcxyzpqrrrabbxyyyypAzz + aaabcxyzpqrrrabbxyyyypqAzz + 0: aaabcxyzpqrrrabbxyyyypqAzz + aaabcxyzpqrrrabbxyyyypqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqAzz + aaabcxyzpqrrrabbxyyyypqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz + aaabcxyzpqrrrabbxyyyypqqqqqqAzz + 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz + aaaabcxyzpqrrrabbxyyyypqAzz + 0: aaaabcxyzpqrrrabbxyyyypqAzz + abxyzzpqrrrabbxyyyypqAzz + 0: abxyzzpqrrrabbxyyyypqAzz + aabxyzzzpqrrrabbxyyyypqAzz + 0: aabxyzzzpqrrrabbxyyyypqAzz + aaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaabxyzzzzpqrrrabbxyyyypqAzz + aaaabxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabxyzzzzpqrrrabbxyyyypqAzz + abcxyzzpqrrrabbxyyyypqAzz + 0: abcxyzzpqrrrabbxyyyypqAzz + aabcxyzzzpqrrrabbxyyyypqAzz + 0: aabcxyzzzpqrrrabbxyyyypqAzz + aaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz + aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz + aaabcxyzpqrrrabbxyyyypABzz + 0: aaabcxyzpqrrrabbxyyyypABzz + aaabcxyzpqrrrabbxyyyypABBzz + 0: aaabcxyzpqrrrabbxyyyypABBzz + >>>aaabxyzpqrrrabbxyyyypqAzz + 0: aaabxyzpqrrrabbxyyyypqAzz + >aaaabxyzpqrrrabbxyyyypqAzz + 0: aaaabxyzpqrrrabbxyyyypqAzz + >>>>abcxyzpqrrrabbxyyyypqAzz + 0: abcxyzpqrrrabbxyyyypqAzz +\= Expect no match + abxyzpqrrabbxyyyypqAzz +No match + abxyzpqrrrrabbxyyyypqAzz +No match + abxyzpqrrrabxyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz +No match + aaaabcxyzzzzpqrrrabbbxyyypqAzz +No match + aaabcxyzpqrrrabbxyyyypqqqqqqqAzz +No match + +/^(abc){1,2}zz/ + abczz + 0: abczz + abcabczz + 0: abcabczz +\= Expect no match + zz +No match + abcabcabczz +No match + >>abczz +No match + +/^(b+?|a){1,2}?c/ + bc + 0: bc + bbc + 0: bbc + bbbc + 0: bbbc + bac + 0: bac + bbac + 0: bbac + aac + 0: aac + abbbbbbbbbbbc + 0: abbbbbbbbbbbc + bbbbbbbbbbbac + 0: bbbbbbbbbbbac +\= Expect no match + aaac +No match + abbbbbbbbbbbac +No match + +/^(b+|a){1,2}c/ + bc + 0: bc + bbc + 0: bbc + bbbc + 0: bbbc + bac + 0: bac + bbac + 0: bbac + aac + 0: aac + abbbbbbbbbbbc + 0: abbbbbbbbbbbc + bbbbbbbbbbbac + 0: bbbbbbbbbbbac +\= Expect no match + aaac +No match + abbbbbbbbbbbac +No match + +/^(b+|a){1,2}?bc/ + bbc + 0: bbc + +/^(b*|ba){1,2}?bc/ + babc + 0: babc + bbabc + 0: bbabc + bababc + 0: bababc +\= Expect no match + bababbc +No match + babababc +No match + +/^(ba|b*){1,2}?bc/ + babc + 0: babc + bbabc + 0: bbabc + bababc + 0: bababc +\= Expect no match + bababbc +No match + babababc +No match + +/^\ca\cA\c[\c{\c:/ + \x01\x01\e;z + 0: \x01\x01\x1b;z + +/^[ab\]cde]/ + athing + 0: a + bthing + 0: b + ]thing + 0: ] + cthing + 0: c + dthing + 0: d + ething + 0: e +\= Expect no match + fthing +No match + [thing +No match + \\thing +No match + +/^[]cde]/ + ]thing + 0: ] + cthing + 0: c + dthing + 0: d + ething + 0: e +\= Expect no match + athing +No match + fthing +No match + +/^[^ab\]cde]/ + fthing + 0: f + [thing + 0: [ + \\thing + 0: \ +\= Expect no match + athing +No match + bthing +No match + ]thing +No match + cthing +No match + dthing +No match + ething +No match + +/^[^]cde]/ + athing + 0: a + fthing + 0: f +\= Expect no match + ]thing +No match + cthing +No match + dthing +No match + ething +No match + +/^\/ +  + 0: \x81 + +/^ÿ/ + ÿ + 0: \xff + +/^[0-9]+$/ + 0 + 0: 0 + 1 + 0: 1 + 2 + 0: 2 + 3 + 0: 3 + 4 + 0: 4 + 5 + 0: 5 + 6 + 0: 6 + 7 + 0: 7 + 8 + 0: 8 + 9 + 0: 9 + 10 + 0: 10 + 100 + 0: 100 +\= Expect no match + abc +No match + +/^.*nter/ + enter + 0: enter + inter + 0: inter + uponter + 0: uponter + +/^xxx[0-9]+$/ + xxx0 + 0: xxx0 + xxx1234 + 0: xxx1234 +\= Expect no match + xxx +No match + +/^.+[0-9][0-9][0-9]$/ + x123 + 0: x123 + xx123 + 0: xx123 + 123456 + 0: 123456 + x1234 + 0: x1234 +\= Expect no match + 123 +No match + +/^.+?[0-9][0-9][0-9]$/ + x123 + 0: x123 + xx123 + 0: xx123 + 123456 + 0: 123456 + x1234 + 0: x1234 +\= Expect no match + 123 +No match + +/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/ + abc!pqr=apquxz.ixr.zzz.ac.uk + 0: abc!pqr=apquxz.ixr.zzz.ac.uk +\= Expect no match + !pqr=apquxz.ixr.zzz.ac.uk +No match + abc!=apquxz.ixr.zzz.ac.uk +No match + abc!pqr=apquxz:ixr.zzz.ac.uk +No match + abc!pqr=apquxz.ixr.zzz.ac.ukk +No match + +/:/ + Well, we need a colon: somewhere + 0: : +\= Expect no match + No match without a colon +No match + +/([\da-f:]+)$/i + 0abc + 0: 0abc + abc + 0: abc + fed + 0: fed + E + 0: E + :: + 0: :: + 5f03:12C0::932e + 0: 5f03:12C0::932e + fed def + 0: def + Any old stuff + 0: ff +\= Expect no match + 0zzz +No match + gzzz +No match + fed\x20 +No match + Any old rubbish +No match + +/^.*\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ + .1.2.3 + 0: .1.2.3 + A.12.123.0 + 0: A.12.123.0 +\= Expect no match + .1.2.3333 +No match + 1.2.3 +No match + 1234.2.3 +No match + +/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/ + 1 IN SOA non-sp1 non-sp2( + 0: 1 IN SOA non-sp1 non-sp2( + 1 IN SOA non-sp1 non-sp2 ( + 0: 1 IN SOA non-sp1 non-sp2 ( +\= Expect no match + 1IN SOA non-sp1 non-sp2( +No match + +/^[a-zA-Z\d][a-zA-Z\d\-]*(\.[a-zA-Z\d][a-zA-z\d\-]*)*\.$/ + a. + 0: a. + Z. + 0: Z. + 2. + 0: 2. + ab-c.pq-r. + 0: ab-c.pq-r. + sxk.zzz.ac.uk. + 0: sxk.zzz.ac.uk. + x-.y-. + 0: x-.y-. +\= Expect no match + -abc.peq. +No match + +/^\*\.[a-z]([a-z\-\d]*[a-z\d]+)?(\.[a-z]([a-z\-\d]*[a-z\d]+)?)*$/ + *.a + 0: *.a + *.b0-a + 0: *.b0-a + *.c3-b.c + 0: *.c3-b.c + *.c-a.b-c + 0: *.c-a.b-c +\= Expect no match + *.0 +No match + *.a- +No match + *.a-b.c- +No match + *.c-a.0-c +No match + +/^(?=ab(de))(abd)(e)/ + abde + 0: abde + +/^(?!(ab)de|x)(abd)(f)/ + abdf + 0: abdf + +/^(?=(ab(cd)))(ab)/ + abcd + 0: ab + +/^[\da-f](\.[\da-f])*$/i + a.b.c.d + 0: a.b.c.d + A.B.C.D + 0: A.B.C.D + a.b.c.1.2.3.C + 0: a.b.c.1.2.3.C + +/^\".*\"\s*(;.*)?$/ + \"1234\" + 0: "1234" + \"abcd\" ; + 0: "abcd" ; + \"\" ; rhubarb + 0: "" ; rhubarb +\= Expect no match + \"1234\" : things +No match + +/^$/ + \ + 0: + +/ ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/x + ab c + 0: ab c +\= Expect no match + abc +No match + ab cde +No match + +/(?x) ^ a (?# begins with a) b\sc (?# then b c) $ (?# then end)/ + ab c + 0: ab c +\= Expect no match + abc +No match + ab cde +No match + +/^ a\ b[c ]d $/x + a bcd + 0: a bcd + a b d + 0: a b d +\= Expect no match + abcd +No match + ab d +No match + +/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/ + abcdefhijklm + 0: abcdefhijklm + +/^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$/ + abcdefhijklm + 0: abcdefhijklm + +/^[\w][\W][\s][\S][\d][\D][\b][\n][\c]][\022]/ + a+ Z0+\x08\n\x1d\x12 + 0: a+ Z0+\x08\x0a\x1d\x12 + +/^[.^$|()*+?{,}]+/ + .^\$(*+)|{?,?} + 0: .^$(*+)|{?,?} + +/^a*\w/ + z + 0: z + az + 0: az + 1: a + aaaz + 0: aaaz + 1: aaa + 2: aa + 3: a + a + 0: a + aa + 0: aa + 1: a + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + a+ + 0: a + aa+ + 0: aa + 1: a + +/^a*?\w/ + z + 0: z + az + 0: az + 1: a + aaaz + 0: aaaz + 1: aaa + 2: aa + 3: a + a + 0: a + aa + 0: aa + 1: a + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + a+ + 0: a + aa+ + 0: aa + 1: a + +/^a+\w/ + az + 0: az + aaaz + 0: aaaz + 1: aaa + 2: aa + aa + 0: aa + aaaa + 0: aaaa + 1: aaa + 2: aa + aa+ + 0: aa + +/^a+?\w/ + az + 0: az + aaaz + 0: aaaz + 1: aaa + 2: aa + aa + 0: aa + aaaa + 0: aaaa + 1: aaa + 2: aa + aa+ + 0: aa + +/^\d{8}\w{2,}/ + 1234567890 + 0: 1234567890 + 12345678ab + 0: 12345678ab + 12345678__ + 0: 12345678__ +\= Expect no match + 1234567 +No match + +/^[aeiou\d]{4,5}$/ + uoie + 0: uoie + 1234 + 0: 1234 + 12345 + 0: 12345 + aaaaa + 0: aaaaa +\= Expect no match + 123456 +No match + +/^[aeiou\d]{4,5}?/ + uoie + 0: uoie + 1234 + 0: 1234 + 12345 + 0: 12345 + 1: 1234 + aaaaa + 0: aaaaa + 1: aaaa + 123456 + 0: 12345 + 1: 1234 + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + +/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}\d{1,2}\s+\d\d:\d\d/ + From abcd Mon Sep 01 12:33:02 1997 + 0: From abcd Mon Sep 01 12:33 + From abcd Mon Sep 1 12:33:02 1997 + 0: From abcd Mon Sep 1 12:33 +\= Expect no match + From abcd Sep 01 12:33:02 1997 +No match + +/^12.34/s + 12\n34 + 0: 12\x0a34 + 12\r34 + 0: 12\x0d34 + +/\w+(?=\t)/ + the quick brown\t fox + 0: brown + +/foo(?!bar)(.*)/ + foobar is foolish see? + 0: foolish see? + +/(?:(?!foo)...|^.{0,2})bar(.*)/ + foobar crowbar etc + 0: rowbar etc + barrel + 0: barrel + 2barrel + 0: 2barrel + A barrel + 0: A barrel + +/^(\D*)(?=\d)(?!123)/ + abc456 + 0: abc +\= Expect no match + abc123 +No match + +/^1234(?# test newlines + inside)/ + 1234 + 0: 1234 + +/^1234 #comment in extended re + /x + 1234 + 0: 1234 + +/#rhubarb + abcd/x + abcd + 0: abcd + +/^abcd#rhubarb/x + abcd + 0: abcd + +/(?!^)abc/ + the abc + 0: abc +\= Expect no match + abc +No match + +/(?=^)abc/ + abc + 0: abc +\= Expect no match + the abc +No match + +/^[ab]{1,3}(ab*|b)/no_auto_possess + aabbbbb + 0: aabbbbb + 1: aabbbb + 2: aabbb + 3: aabb + 4: aab + 5: aa + +/^[ab]{1,3}?(ab*|b)/no_auto_possess + aabbbbb + 0: aabbbbb + 1: aabbbb + 2: aabbb + 3: aabb + 4: aab + 5: aa + +/^[ab]{1,3}?(ab*?|b)/no_auto_possess + aabbbbb + 0: aabbbbb + 1: aabbbb + 2: aabbb + 3: aabb + 4: aab + 5: aa + +/^[ab]{1,3}(ab*?|b)/no_auto_possess + aabbbbb + 0: aabbbbb + 1: aabbbb + 2: aabbb + 3: aabb + 4: aab + 5: aa + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/x + Alan Other + 0: Alan Other + + 0: user@dom.ain + 1: user@dom + user\@dom.ain + 0: user@dom.ain + 1: user@dom + \"A. Other\" (a comment) + 0: "A. Other" (a comment) + 1: "A. Other" + 2: "A. Other" + A. Other (a comment) + 0: Other (a comment) + 1: Other + 2: Other + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay + 1: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re + A missing angle @,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +# leading word +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # "normal" atoms and or spaces +(?: +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +| +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +) # "special" comment or quoted string +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] * # more "normal" +)* +< +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# < +(?: +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +(?: , +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +)* # additional domains +: +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +# Atom +| # or +" # " +[^\\\x80-\xff\n\015"] * # normal +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015"] * )* # ( special normal* )* +" # " +# Quoted string +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# additional words +)* +@ +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +(?: +\. +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +\[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) +[\040\t]* # Nab whitespace. +(?: +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: # ( +(?: \\ [^\x80-\xff] | +\( # ( +[^\\\x80-\xff\n\015()] * # normal* +(?: \\ [^\x80-\xff] [^\\\x80-\xff\n\015()] * )* # (special normal*)* +\) # ) +) # special +[^\\\x80-\xff\n\015()] * # normal* +)* # )* +\) # ) +[\040\t]* )* # If comment found, allow more spaces. +# optional trailing comments +)* +# address spec +> # > +# name and address +) +/x + Alan Other + 0: Alan Other + + 0: user@dom.ain + 1: user@dom + user\@dom.ain + 0: user@dom.ain + 1: user@dom + \"A. Other\" (a comment) + 0: "A. Other" + A. Other (a comment) + 0: Other + \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay + 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay + 1: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re + A missing angle ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff + +/P[^*]TAIRE[^*]{1,6}?LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/P[^*]TAIRE[^*]{1,}?LL/ + xxxxxxxxxxxPSTAIREISLLxxxxxxxxx + 0: PSTAIREISLL + +/(\.\d\d[1-9]?)\d+/ + 1.230003938 + 0: .230003938 + 1.875000282 + 0: .875000282 + 1.235 + 0: .235 + +/(\.\d\d((?=0)|\d(?=\d)))/ + 1.230003938 + 0: .230 + 1: .23 + 1.875000282 + 0: .875 +\= Expect no match + 1.235 +No match + +/a(?)b/ + ab + 0: ab + +/\b(foo)\s+(\w+)/i + Food is on the foo table + 0: foo table + +/foo(.*)bar/ + The food is under the bar in the barn. + 0: food is under the bar in the bar + 1: food is under the bar + +/foo(.*?)bar/ + The food is under the bar in the barn. + 0: food is under the bar in the bar + 1: food is under the bar + +/(.*)(\d*)/no_auto_possess + I have 2 numbers: 53147 +Matched, but offsets vector is too small to show all matches + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: I have 2 numbers: 531 + 3: I have 2 numbers: 53 + 4: I have 2 numbers: 5 + 5: I have 2 numbers: + 6: I have 2 numbers: + 7: I have 2 numbers + 8: I have 2 number + 9: I have 2 numbe +10: I have 2 numb +11: I have 2 num +12: I have 2 nu +13: I have 2 n +14: I have 2 + +/(.*)(\d+)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 + +/(.*?)(\d*)/no_auto_possess + I have 2 numbers: 53147 +Matched, but offsets vector is too small to show all matches + 0: I have 2 numbers: 53147 + 1: I have 2 numbers: 5314 + 2: I have 2 numbers: 531 + 3: I have 2 numbers: 53 + 4: I have 2 numbers: 5 + 5: I have 2 numbers: + 6: I have 2 numbers: + 7: I have 2 numbers + 8: I have 2 number + 9: I have 2 numbe +10: I have 2 numb +11: I have 2 num +12: I have 2 nu +13: I have 2 n +14: I have 2 + +/(.*?)(\d+)/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + 1: I have 2 + +/(.*)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + +/(.*?)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + +/(.*)\b(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + +/(.*\D)(\d+)$/ + I have 2 numbers: 53147 + 0: I have 2 numbers: 53147 + +/^\D*(?!123)/ + ABC123 + 0: AB + 1: A + 2: + +/^(\D*)(?=\d)(?!123)/ + ABC445 + 0: ABC +\= Expect no match + ABC123 +No match + +/^[W-]46]/ + W46]789 + 0: W46] + -46]789 + 0: -46] +\= Expect no match + Wall +No match + Zebra +No match + 42 +No match + [abcd] +No match + ]abcd[ +No match + +/^[W-\]46]/ + W46]789 + 0: W + Wall + 0: W + Zebra + 0: Z + Xylophone + 0: X + 42 + 0: 4 + [abcd] + 0: [ + ]abcd[ + 0: ] + \\backslash + 0: \ +\= Expect no match + -46]789 +No match + well +No match + +/\d\d\/\d\d\/\d\d\d\d/ + 01/01/2000 + 0: 01/01/2000 + +/word (?:[a-zA-Z0-9]+ ){0,10}otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword + 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark +No match + +/word (?:[a-zA-Z0-9]+ ){0,300}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope +No match + +/^(a){0,0}/ + bcd + 0: + abc + 0: + aab + 0: + +/^(a){0,1}/ + bcd + 0: + abc + 0: a + 1: + aab + 0: a + 1: + +/^(a){0,2}/ + bcd + 0: + abc + 0: a + 1: + aab + 0: aa + 1: a + 2: + +/^(a){0,3}/ + bcd + 0: + abc + 0: a + 1: + aab + 0: aa + 1: a + 2: + aaa + 0: aaa + 1: aa + 2: a + 3: + +/^(a){0,}/ + bcd + 0: + abc + 0: a + 1: + aab + 0: aa + 1: a + 2: + aaa + 0: aaa + 1: aa + 2: a + 3: + aaaaaaaa + 0: aaaaaaaa + 1: aaaaaaa + 2: aaaaaa + 3: aaaaa + 4: aaaa + 5: aaa + 6: aa + 7: a + 8: + +/^(a){1,1}/ + abc + 0: a + aab + 0: a +\= Expect no match + bcd +No match + +/^(a){1,2}/ + abc + 0: a + aab + 0: aa + 1: a +\= Expect no match + bcd +No match + +/^(a){1,3}/ + abc + 0: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: aa + 2: a +\= Expect no match + bcd +No match + +/^(a){1,}/ + abc + 0: a + aab + 0: aa + 1: a + aaa + 0: aaa + 1: aa + 2: a + aaaaaaaa + 0: aaaaaaaa + 1: aaaaaaa + 2: aaaaaa + 3: aaaaa + 4: aaaa + 5: aaa + 6: aa + 7: a +\= Expect no match + bcd +No match + +/.*\.gif/ + borfle\nbib.gif\nno + 0: bib.gif + +/.{0,}\.gif/ + borfle\nbib.gif\nno + 0: bib.gif + +/.*\.gif/m + borfle\nbib.gif\nno + 0: bib.gif + +/.*\.gif/s + borfle\nbib.gif\nno + 0: borfle\x0abib.gif + +/.*\.gif/ms + borfle\nbib.gif\nno + 0: borfle\x0abib.gif + +/.*$/ + borfle\nbib.gif\nno + 0: no + +/.*$/m + borfle\nbib.gif\nno + 0: borfle + +/.*$/s + borfle\nbib.gif\nno + 0: borfle\x0abib.gif\x0ano + +/.*$/ms + borfle\nbib.gif\nno + 0: borfle\x0abib.gif\x0ano + 1: borfle\x0abib.gif + 2: borfle + +/.*$/ + borfle\nbib.gif\nno\n + 0: no + +/.*$/m + borfle\nbib.gif\nno\n + 0: borfle + +/.*$/s + borfle\nbib.gif\nno\n + 0: borfle\x0abib.gif\x0ano\x0a + 1: borfle\x0abib.gif\x0ano + +/.*$/ms + borfle\nbib.gif\nno\n + 0: borfle\x0abib.gif\x0ano\x0a + 1: borfle\x0abib.gif\x0ano + 2: borfle\x0abib.gif + 3: borfle + +/(.*X|^B)/ + abcde\n1234Xyz + 0: 1234X + BarFoo + 0: B +\= Expect no match + abcde\nBar +No match + +/(.*X|^B)/m + abcde\n1234Xyz + 0: 1234X + BarFoo + 0: B + abcde\nBar + 0: B + +/(.*X|^B)/s + abcde\n1234Xyz + 0: abcde\x0a1234X + BarFoo + 0: B +\= Expect no match + abcde\nBar +No match + +/(.*X|^B)/ms + abcde\n1234Xyz + 0: abcde\x0a1234X + BarFoo + 0: B + abcde\nBar + 0: B + +/(?s)(.*X|^B)/ + abcde\n1234Xyz + 0: abcde\x0a1234X + BarFoo + 0: B +\= Expect no match + abcde\nBar +No match + +/(?s:.*X|^B)/ + abcde\n1234Xyz + 0: abcde\x0a1234X + BarFoo + 0: B +\= Expect no match + abcde\nBar +No match + +/^.*B/ +\= Expect no match + abc\nB +No match + +/(?s)^.*B/ + abc\nB + 0: abc\x0aB + +/(?m)^.*B/ + abc\nB + 0: B + +/(?ms)^.*B/ + abc\nB + 0: abc\x0aB + +/(?ms)^B/ + abc\nB + 0: B + +/(?s)B$/ + B\n + 0: B + +/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/ + 123456654321 + 0: 123456654321 + +/^\d\d\d\d\d\d\d\d\d\d\d\d/ + 123456654321 + 0: 123456654321 + +/^[\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d][\d]/ + 123456654321 + 0: 123456654321 + +/^[abc]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^[a-c]{12}/ + abcabcabcabc + 0: abcabcabcabc + +/^(a|b|c){12}/ + abcabcabcabc + 0: abcabcabcabc + +/^[abcdefghijklmnopqrstuvwxy0123456789]/ + n + 0: n +\= Expect no match + z +No match + +/abcde{0,0}/ + abcd + 0: abcd +\= Expect no match + abce +No match + +/ab[cd]{0,0}e/ + abe + 0: abe +\= Expect no match + abcde +No match + +/ab(c){0,0}d/ + abd + 0: abd +\= Expect no match + abcd +No match + +/a(b*)/ + a + 0: a + ab + 0: ab + abbbb + 0: abbbb +\= Expect no match + bbbbb +No match + +/ab\d{0}e/ + abe + 0: abe +\= Expect no match + ab1e +No match + +/"([^\\"]+|\\.)*"/ + the \"quick\" brown fox + 0: "quick" + \"the \\\"quick\\\" brown fox\" + 0: "the \"quick\" brown fox" + +/.*?/g,aftertext + abc + 0: abc + 0+ + 1: ab + 2: a + 3: + 0: + 0+ + +/\b/g,aftertext + abc + 0: + 0+ abc + 0: + 0+ + +/\b/g,aftertext + abc + 0: + 0+ abc + 0: + 0+ + +//g + abc + 0: + 0: + 0: + 0: + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/is + 43.
Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + 0: 43.Word Processor
(N-1286)
Lega lstaff.comCA - Statewide + +/a[^a]b/ + acb + 0: acb + a\nb + 0: a\x0ab + +/a.b/ + acb + 0: acb +\= Expect no match + a\nb +No match + +/a[^a]b/s + acb + 0: acb + a\nb + 0: a\x0ab + +/a.b/s + acb + 0: acb + a\nb + 0: a\x0ab + +/^(b+?|a){1,2}?c/ + bac + 0: bac + bbac + 0: bbac + bbbac + 0: bbbac + bbbbac + 0: bbbbac + bbbbbac + 0: bbbbbac + +/^(b+|a){1,2}?c/ + bac + 0: bac + bbac + 0: bbac + bbbac + 0: bbbac + bbbbac + 0: bbbbac + bbbbbac + 0: bbbbbac + +/(?!\A)x/m + a\bx\n + 0: x +\= Expect no match + x\nb\n +No match + +/\x0{ab}/ + \0{ab} + 0: \x00{ab} + +/(A|B)*?CD/ + CD + 0: CD + +/(A|B)*CD/ + CD + 0: CD + +/(?.*/)foo" +\= Expect no match + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ +No match + +"(?>.*/)foo" + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + +/(?>(\.\d\d[1-9]?))\d+/ + 1.230003938 + 0: .230003938 + 1.875000282 + 0: .875000282 +\= Expect no match + 1.235 +No match + +/^((?>\w+)|(?>\s+))*$/ + now is the time for all good men to come to the aid of the party + 0: now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! +No match + +/(\d+)(\w)/ + 12345a + 0: 12345a + 1: 12345 + 2: 1234 + 3: 123 + 4: 12 + 12345+ + 0: 12345 + 1: 1234 + 2: 123 + 3: 12 + +/((?>\d+))(\w)/ + 12345a + 0: 12345a +\= Expect no match + 12345+ +No match + +/(?>a+)b/ + aaab + 0: aaab + +/((?>a+)b)/ + aaab + 0: aaab + +/(?>(a+))b/ + aaab + 0: aaab + +/(?>b)+/ + aaabbbccc + 0: bbb + 1: bb + 2: b + +/(?>a+|b+|c+)*c/ + aaabbbbccccd + 0: aaabbbbcccc + 1: aaabbbbc + +/(a+|b+|c+)*c/ + aaabbbbccccd + 0: aaabbbbcccc + 1: aaabbbbccc + 2: aaabbbbcc + 3: aaabbbbc + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: abc(ade)ufh()() + 2: abc(ade)ufh() + 3: abc(ade)ufh + 4: abc(ade) + 5: abc + +/\(((?>[^()]+)|\([^()]+\))+\)/ + (abc) + 0: (abc) + (abc(def)xyz) + 0: (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/a(?-i)b/i + ab + 0: ab + Ab + 0: Ab +\= Expect no match + aB +No match + AB +No match + +/(a (?x)b c)d e/ + a bcd e + 0: a bcd e +\= Expect no match + a b cd e +No match + abcd e +No match + a bcde +No match + +/(a b(?x)c d (?-x)e f)/ + a bcde f + 0: a bcde f +\= Expect no match + abcdef +No match + +/(a(?i)b)c/ + abc + 0: abc + aBc + 0: aBc +\= Expect no match + abC +No match + aBC +No match + Abc +No match + ABc +No match + ABC +No match + AbC +No match + +/a(?i:b)c/ + abc + 0: abc + aBc + 0: aBc +\= Expect no match + ABC +No match + abC +No match + aBC +No match + +/a(?i:b)*c/ + aBc + 0: aBc + aBBc + 0: aBBc +\= Expect no match + aBC +No match + aBBC +No match + +/a(?=b(?i)c)\w\wd/ + abcd + 0: abcd + abCd + 0: abCd +\= Expect no match + aBCd +No match + abcD +No match + +/(?s-i:more.*than).*million/i + more than million + 0: more than million + more than MILLION + 0: more than MILLION + more \n than Million + 0: more \x0a than Million +\= Expect no match + MORE THAN MILLION +No match + more \n than \n million +No match + +/(?:(?s-i)more.*than).*million/i + more than million + 0: more than million + more than MILLION + 0: more than MILLION + more \n than Million + 0: more \x0a than Million +\= Expect no match + MORE THAN MILLION +No match + more \n than \n million +No match + +/(?>a(?i)b+)+c/ + abc + 0: abc + aBbc + 0: aBbc + aBBc + 0: aBBc +\= Expect no match + Abc +No match + abAb +No match + abbC +No match + +/(?=a(?i)b)\w\wc/ + abc + 0: abc + aBc + 0: aBc +\= Expect no match + Ab +No match + abC +No match + aBC +No match + +/(?<=a(?i)b)(\w\w)c/ + abxxc + 0: xxc + aBxxc + 0: xxc +\= Expect no match + Abxxc +No match + ABxxc +No match + abxxC +No match + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 0: abc: + 12 + 0: 12 +\= Expect no match + 123 +No match + xyz +No match + +/^(?(?!abc)\d\d|\w{3}:)$/ + abc: + 0: abc: + 12 + 0: 12 +\= Expect no match + 123 +No match + xyz +No match + +/(?(?<=foo)bar|cat)/ + foobar + 0: bar + cat + 0: cat + fcat + 0: cat + focat + 0: cat +\= Expect no match + foocat +No match + +/(?(?a*)*/ + a + 0: a + 1: + aa + 0: aa + 1: + aaaa + 0: aaaa + 1: + +/(abc|)+/ + abc + 0: abc + 1: + abcabc + 0: abcabc + 1: abc + 2: + abcabcabc + 0: abcabcabc + 1: abcabc + 2: abc + 3: + xyz + 0: + +/([a]*)*/ + a + 0: a + 1: + aaaaa + 0: aaaaa + 1: aaaa + 2: aaa + 3: aa + 4: a + 5: + +/([ab]*)*/ + a + 0: a + 1: + b + 0: b + 1: + ababab + 0: ababab + 1: ababa + 2: abab + 3: aba + 4: ab + 5: a + 6: + aaaabcde + 0: aaaab + 1: aaaa + 2: aaa + 3: aa + 4: a + 5: + bbbb + 0: bbbb + 1: bbb + 2: bb + 3: b + 4: + +/([^a]*)*/ + b + 0: b + 1: + bbbb + 0: bbbb + 1: bbb + 2: bb + 3: b + 4: + aaa + 0: + +/([^ab]*)*/ + cccc + 0: cccc + 1: ccc + 2: cc + 3: c + 4: + abab + 0: + +/([a]*?)*/ + a + 0: a + 1: + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + 4: + +/([ab]*?)*/ + a + 0: a + 1: + b + 0: b + 1: + abab + 0: abab + 1: aba + 2: ab + 3: a + 4: + baba + 0: baba + 1: bab + 2: ba + 3: b + 4: + +/([^a]*?)*/ + b + 0: b + 1: + bbbb + 0: bbbb + 1: bbb + 2: bb + 3: b + 4: + aaa + 0: + +/([^ab]*?)*/ + c + 0: c + 1: + cccc + 0: cccc + 1: ccc + 2: cc + 3: c + 4: + baba + 0: + +/(?>a*)*/ + a + 0: a + 1: + aaabcde + 0: aaa + 1: + +/((?>a*))*/ + aaaaa + 0: aaaaa + 1: + aabbaa + 0: aa + 1: + +/((?>a*?))*/ + aaaaa + 0: aaaaa + 1: + aabbaa + 0: aa + 1: + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /x + 12-sep-98 + 0: 12-sep-98 + 12-09-98 + 0: 12-09-98 +\= Expect no match + sep-12-98 +No match + +/(?i:saturday|sunday)/ + saturday + 0: saturday + sunday + 0: sunday + Saturday + 0: Saturday + Sunday + 0: Sunday + SATURDAY + 0: SATURDAY + SUNDAY + 0: SUNDAY + SunDay + 0: SunDay + +/(a(?i)bc|BB)x/ + abcx + 0: abcx + aBCx + 0: aBCx + bbx + 0: bbx + BBx + 0: BBx +\= Expect no match + abcX +No match + aBCX +No match + bbX +No match + BBX +No match + +/^([ab](?i)[cd]|[ef])/ + ac + 0: ac + aC + 0: aC + bD + 0: bD + elephant + 0: e + Europe + 0: E + frog + 0: f + France + 0: F +\= Expect no match + Africa +No match + +/^(ab|a(?i)[b-c](?m-i)d|x(?i)y|z)/ + ab + 0: ab + aBd + 0: aBd + xy + 0: xy + xY + 0: xY + zebra + 0: z + Zambesi + 0: Z +\= Expect no match + aCD +No match + XY +No match + +/(?<=foo\n)^bar/m + foo\nbar + 0: bar +\= Expect no match + bar +No match + baz\nbar +No match + +/(?<=(?]&/ + <&OUT + 0: <& + +/(?:(f)(o)(o)|(b)(a)(r))*/ + foobar + 0: foobar + 1: foo + 2: + +/(?<=a)b/ + ab + 0: b +\= Expect no match + cb +No match + b +No match + +/(?a+)ab/ + +/(?>a+)b/ + aaab + 0: aaab + +/([[:]+)/ + a:[b]: + 0: :[ + +/([[=]+)/ + a=[b]= + 0: =[ + +/([[.]+)/ + a.[b]. + 0: .[ + +/((?>a+)b)/ + aaab + 0: aaab + +/(?>(a+))b/ + aaab + 0: aaab + +/((?>[^()]+)|\([^()]*\))+/ + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: abc(ade)ufh()() + 2: abc(ade)ufh() + 3: abc(ade)ufh + 4: abc(ade) + 5: abc + +/a\Z/ +\= Expect no match + aaab +No match + a\nb\n +No match + +/b\Z/ + a\nb\n + 0: b + +/b\z/ + +/b\Z/ + a\nb + 0: b + +/b\z/ + a\nb + 0: b + +/(?>.*)(?<=(abcd|wxyz))/ + alphabetabcd + 0: alphabetabcd + endingwxyz + 0: endingwxyz +\= Expect no match + a rather long string that doesn't end with one of them +No match + +/word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword/ + word cat dog elephant mussel cow horse canary baboon snake shark otherword + 0: word cat dog elephant mussel cow horse canary baboon snake shark otherword +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark +No match + +/word (?>[a-zA-Z0-9]+ ){0,30}otherword/ +\= Expect no match + word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope +No match + +/(?<=\d{3}(?!999))foo/ + 999foo + 0: foo + 123999foo + 0: foo +\= Expect no match + 123abcfoo +No match + +/(?<=(?!...999)\d{3})foo/ + 999foo + 0: foo + 123999foo + 0: foo +\= Expect no match + 123abcfoo +No match + +/(?<=\d{3}(?!999)...)foo/ + 123abcfoo + 0: foo + 123456foo + 0: foo +\= Expect no match + 123999foo +No match + +/(?<=\d{3}...)(?Z)+|A)*/ + ZABCDEFG + 0: ZA + 1: Z + 2: + +/((?>)+|A)*/ + ZABCDEFG + 0: + +/a*/g + abbab + 0: a + 0: + 0: + 0: a + 0: + 0: + +/[[:space:]]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/[[:blank:]]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09 + +/[\s]+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/\s+/ + > \x09\x0a\x0c\x0d\x0b< + 0: \x09\x0a\x0c\x0d\x0b + +/a b/x + ab + 0: ab + +/(?!\A)x/m + a\nxb\n + 0: x + +/(?!^)x/m +\= Expect no match + a\nxb\n +No match + +/abc\Qabc\Eabc/ + abcabcabc + 0: abcabcabc + +/abc\Q(*+|\Eabc/ + abc(*+|abc + 0: abc(*+|abc + +/ abc\Q abc\Eabc/x + abc abcabc + 0: abc abcabc +\= Expect no match + abcabcabc +No match + +/abc#comment + \Q#not comment + literal\E/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal\E #more comment + /x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/abc#comment + \Q#not comment + literal\E #more comment/x + abc#not comment\n literal + 0: abc#not comment\x0a literal + +/\Qabc\$xyz\E/ + abc\\\$xyz + 0: abc\$xyz + +/\Qabc\E\$\Qxyz\E/ + abc\$xyz + 0: abc$xyz + +/\Gabc/ + abc + 0: abc +\= Expect no match + xyzabc +No match + +/\Gabc./g + abc1abc2xyzabc3 + 0: abc1 + 0: abc2 + +/abc./g + abc1abc2xyzabc3 + 0: abc1 + 0: abc2 + 0: abc3 + +/a(?x: b c )d/ + XabcdY + 0: abcd +\= Expect no match + Xa b c d Y +No match + +/((?x)x y z | a b c)/ + XabcY + 0: abc + AxyzB + 0: xyz + +/(?i)AB(?-i)C/ + XabCY + 0: abC +\= Expect no match + XabcY +No match + +/((?i)AB(?-i)C|D)E/ + abCE + 0: abCE + DE + 0: DE +\= Expect no match + abcE +No match + abCe +No match + dE +No match + De +No match + +/[z\Qa-d]\E]/ + z + 0: z + a + 0: a + - + 0: - + d + 0: d + ] + 0: ] +\= Expect no match + b +No match + +/(a+)*b/ +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?i)reg(?:ul(?:[aä]|ae)r|ex)/ + REGular + 0: REGular + regulaer + 0: regulaer + Regex + 0: Regex + regulär + 0: regul\xe4r + +/Åæåä[à-ÿÀ-ß]+/ + Åæåäà + 0: \xc5\xe6\xe5\xe4\xe0 + Åæåäÿ + 0: \xc5\xe6\xe5\xe4\xff + ÅæåäÀ + 0: \xc5\xe6\xe5\xe4\xc0 + Åæåäß + 0: \xc5\xe6\xe5\xe4\xdf + +/(?<=Z)X./ + \x84XAZXB + 0: XB + +/^(?(2)a|(1)(2))+$/ + 123a +Failed: error -40: backreference condition or recursion test is not supported for DFA matching + +/(?<=a|bbbb)c/ + ac + 0: c + bbbbc + 0: c + +/line\nbreak/ + this is a line\nbreak + 0: line\x0abreak + line one\nthis is a line\nbreak in the second line + 0: line\x0abreak + +/line\nbreak/firstline + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/line\nbreak/m,firstline + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/1234/ + 123\=ps +Partial match: 123 +\= Expect no match + a4\=ps,dfa_restart +No match + +/1234/ + 123\=ps +Partial match: 123 + 4\=ps,dfa_restart + 0: 4 + +/^/gm + a\nb\nc\n + 0: + 0: + 0: + \ + 0: + +/(?<=C\n)^/gm + A\nC\nC\n + 0: + +/(?s)A?B/ + AB + 0: AB + aB + 0: B + +/(?s)A*B/ + AB + 0: AB + aB + 0: B + +/(?m)A?B/ + AB + 0: AB + aB + 0: B + +/(?m)A*B/ + AB + 0: AB + aB + 0: B + +/Content-Type\x3A[^\r\n]{6,}/ + Content-Type:xxxxxyyy + 0: Content-Type:xxxxxyyy + +/Content-Type\x3A[^\r\n]{6,}z/ + Content-Type:xxxxxyyyz + 0: Content-Type:xxxxxyyyz + +/Content-Type\x3A[^a]{6,}/ + Content-Type:xxxyyy + 0: Content-Type:xxxyyy + +/Content-Type\x3A[^a]{6,}z/ + Content-Type:xxxyyyz + 0: Content-Type:xxxyyyz + +/^abc/Im,newline=lf +Capture group count = 0 +Options: multiline +Forced newline is LF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\nabc + 0: abc + xyz\r\nabc + 0: abc +\= Expect no match + xyz\rabc +No match + xyzabc\r +No match + xyzabc\rpqr +No match + xyzabc\r\n +No match + xyzabc\r\npqr +No match + +/^abc/Im,newline=crlf +Capture group count = 0 +Options: multiline +Forced newline is CRLF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\r\nabclf> + 0: abc +\= Expect no match + xyz\nabclf +No match + xyz\rabclf +No match + +/^abc/Im,newline=cr +Capture group count = 0 +Options: multiline +Forced newline is CR +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\rabc + 0: abc +\= Expect no match + xyz\nabc +No match + xyz\r\nabc +No match + +/.*/I,newline=lf +Capture group count = 0 +May match empty string +Forced newline is LF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d + +/.*/I,newline=cr +Capture group count = 0 +May match empty string +Forced newline is CR +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc + abc\r\ndef + 0: abc + +/.*/I,newline=crlf +Capture group count = 0 +May match empty string +Forced newline is CRLF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc + +/\w+(.)(.)?def/Is +Capture group count = 2 +Options: dotall +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = 'f' +Subject length lower bound = 5 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d\x0adef + +/\w+(.)(.)?def/s + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d\x0adef + +/^\w+=.*(\\\n.*)*/ + abc=xyz\\\npqr + 0: abc=xyz\\x0apqr + 1: abc=xyz\\x0apq + 2: abc=xyz\\x0ap + 3: abc=xyz\\x0a + 4: abc=xyz\ + 5: abc=xyz + 6: abc=xy + 7: abc=x + 8: abc= + +/^(a()*)*/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + 4: + +/^(?:a(?:(?:))*)*/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + 4: + +/^(a()+)+/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + +/^(?:a(?:(?:))+)+/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?>a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(?:a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/^a.b/newline=lf + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/^a.b/newline=cr + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/^a.b/newline=anycrlf + a\x85b + 0: a\x85b +\= Expect no match + a\rb +No match + +/^a.b/newline=any +\= Expect no match + a\nb +No match + a\rb +No match + a\x85b +No match + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc9 + +/^a\Rb/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b +\= Expect no match + a\n\rb +No match + +/^a\R*b/bsr=unicode + ab + 0: ab + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb +\= Expect no match + ab +No match + +/^a\R{1,3}b/bsr=unicode + a\nb + 0: a\x0ab + a\n\rb + 0: a\x0a\x0db + a\n\r\x85b + 0: a\x0a\x0d\x85b + a\r\n\r\nb + 0: a\x0d\x0a\x0d\x0ab + a\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0ab + a\n\r\n\rb + 0: a\x0a\x0d\x0a\x0db + a\n\n\r\nb + 0: a\x0a\x0a\x0d\x0ab +\= Expect no match + a\n\n\n\rb +No match + a\r +No match + +/.+foo/ + afoo + 0: afoo +\= Expect no match + \r\nfoo +No match + \nfoo +No match + +/.+foo/newline=crlf + afoo + 0: afoo + \nfoo + 0: \x0afoo +\= Expect no match + \r\nfoo +No match + +/.+foo/newline=any + afoo + 0: afoo +\= Expect no match + \nfoo +No match + \r\nfoo +No match + +/.+foo/s + afoo + 0: afoo + \r\nfoo + 0: \x0d\x0afoo + \nfoo + 0: \x0afoo + +/^$/gm,newline=any + abc\r\rxyz + 0: + abc\n\rxyz + 0: +\= Expect no match + abc\r\nxyz +No match + +/^X/m + XABC + 0: X +\= Expect no match + XABC\=notbol +No match + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + 0: \x0d\x0a + 0+ + 1: + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a\x0d\x0a + 0: + 0+ \x0d\x0a + 0: + 0+ + +/(?|(abc)|(xyz))/ + >abc< + 0: abc + >xyz< + 0: xyz + +/(x)(?|(abc)|(xyz))(x)/ + xabcx + 0: xabcx + xxyzx + 0: xxyzx + +/(x)(?|(abc)(pqr)|(xyz))(x)/ + xabcpqrx + 0: xabcpqrx + xxyzx + 0: xxyzx + +/(?|(abc)|(xyz))(?1)/ + abcabc + 0: abcabc + xyzabc + 0: xyzabc +\= Expect no match + xyzxyz +No match + +/\H\h\V\v/ + X X\x0a + 0: X X\x0a + X\x09X\x0b + 0: X\x09X\x0b +\= Expect no match + \xa0 X\x0a +No match + +/\H*\h+\V?\v{3,4}/ + \x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a + 0: \x09 \xa0X\x0a\x0b\x0c\x0d + \x09\x20\xa0\x0a\x0b\x0c\x0d\x0a + 0: \x09 \xa0\x0a\x0b\x0c\x0d + \x09\x20\xa0\x0a\x0b\x0c + 0: \x09 \xa0\x0a\x0b\x0c +\= Expect no match + \x09\x20\xa0\x0a\x0b +No match + +/\H{3,4}/ + XY ABCDE + 0: ABCD + XY PQR ST + 0: PQR + +/.\h{3,4}./ + XY AB PQRS + 0: B P + 1: B + +/\h*X\h?\H+Y\H?Z/ + >XNNNYZ + 0: XNNNYZ + > X NYQZ + 0: X NYQZ +\= Expect no match + >XYZ +No match + > X NY Z +No match + +/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/ + >XY\x0aZ\x0aA\x0bNN\x0c + 0: XY\x0aZ\x0aA\x0bNN\x0c + >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + 0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c + +/.+A/newline=crlf +\= Expect no match + \r\nA +No match + +/\nA/newline=crlf + \r\nA + 0: \x0aA + +/[\r\n]A/newline=crlf + \r\nA + 0: \x0aA + +/(\r|\n)A/newline=crlf + \r\nA + 0: \x0aA + +/a\Rb/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\Rb/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R?b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\R?b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\n\nb + 0: a\x0d\x0a\x0ab + a\n\r\rb + 0: a\x0a\x0d\x0db + a\r\n\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0a\x0d\x0ab +\= Expect no match + a\x0b\x0bb +No match + a\x85\x85b +No match + +/a\R{2,4}b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\rb + 0: a\x0d\x0db + a\n\n\nb + 0: a\x0a\x0a\x0ab + a\r\n\n\r\rb + 0: a\x0d\x0a\x0a\x0d\x0db + a\x85\x85b + 0: a\x85\x85b + a\x0b\x0bb + 0: a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb +No match + +/a(?!)|\wbc/ + abc + 0: abc + +/a[]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab +No match + +/a[]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab +No match + +/a[]*+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames +\= Expect no match + ab +No match + +/a[^]b/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aXb + 0: aXb + a\nb + 0: a\x0ab +\= Expect no match + ab +No match + +/a[^]+b/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aXb + 0: aXb + a\nX\nXb + 0: a\x0aX\x0aXb +\= Expect no match + ab +No match + +/X$/dollar_endonly + X + 0: X +\= Expect no match + X\n +No match + +/X$/ + X + 0: X + X\n + 0: X + +/xyz/auto_callout + xyz +--->xyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz + abcxyz +--->abcxyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +No match + abcxypqr +No match + +/xyz/auto_callout,no_start_optimize + abcxyz +--->abcxyz + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +--->abc + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + abcxypqr +--->abcxypqr + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + +/(*NO_START_OPT)xyz/auto_callout + abcxyz +--->abcxyz ++15 ^ x ++15 ^ x ++15 ^ x ++15 ^ x ++16 ^^ y ++17 ^ ^ z ++18 ^ ^ End of pattern + 0: xyz + +/(?C)ab/ + ab +--->ab + 0 ^ a + 0: ab + ab\=callout_none + 0: ab + +/ab/auto_callout + ab +--->ab + +0 ^ a + +1 ^^ b + +2 ^ ^ End of pattern + 0: ab + ab\=callout_none + 0: ab + +/^"((?(?=[a])[^"])|b)*"$/auto_callout + "ab" +--->"ab" + +0 ^ ^ + +1 ^ " + +2 ^^ ( ++21 ^^ " + +3 ^^ (? ++18 ^^ b + +5 ^^ (?= + +8 ^ [a] ++11 ^^ ) ++12 ^^ [^"] ++16 ^ ^ ) ++17 ^ ^ | ++21 ^ ^ " + +3 ^ ^ (? ++18 ^ ^ b + +5 ^ ^ (?= + +8 ^ [a] ++19 ^ ^ )* ++21 ^ ^ " + +3 ^ ^ (? ++18 ^ ^ b + +5 ^ ^ (?= + +8 ^ [a] ++17 ^ ^ | ++22 ^ ^ $ ++23 ^ ^ End of pattern + 0: "ab" + "ab"\=callout_none + 0: "ab" + +/\d+X|9+Y/ + ++++123999\=ps +Partial match: 123999 + ++++123999Y\=ps + 0: 999Y + +/Z(*F)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/Z(?!)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/dog(sbody)?/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/dog(sbody)??/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/dog|dogsbody/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/dogsbody|dog/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/Z(*F)Q|ZXY/ + Z\=ps +Partial match: Z +\= Expect no match + ZA\=ps +No match + X\=ps +No match + +/\bthe cat\b/ + the cat\=ps + 0: the cat + the cat\=ph +Partial match: the cat + +/dog(sbody)?/ + dogs\=ps + 0: dog + body\=dfa_restart + 0: body + +/dog(sbody)?/ + dogs\=ph +Partial match: dogs + body\=dfa_restart + 0: body + +/abc/ + abc\=ps + 0: abc + abc\=ph + 0: abc + +/abc\K123/ + xyzabc123pqr +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/(?<=abc)123/allusedtext + xyzabc123pqr + 0: abc123 + <<< + xyzabc12\=ps +Partial match: abc12 + <<< + xyzabc12\=ph +Partial match: abc12 + <<< + +/\babc\b/allusedtext + +++abc+++ + 0: +abc+ + < > + +++ab\=ps +Partial match: +ab + < + +++ab\=ph +Partial match: +ab + < + +/(?=C)/g,aftertext + ABCDECBA + 0: + 0+ CDECBA + 0: + 0+ CBA + +/(abc|def|xyz)/I +Capture group count = 1 +Starting code units: a d x +Subject length lower bound = 3 + terhjk;abcdaadsfe + 0: abc + the quick xyz brown fox + 0: xyz +\= Expect no match + thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd +No match + +/(abc|def|xyz)/I,no_start_optimize +Capture group count = 1 +Options: no_start_optimize + terhjk;abcdaadsfe + 0: abc + the quick xyz brown fox + 0: xyz +\= Expect no match + thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd +No match + +/abcd*/aftertext + xxxxabcd\=ps + 0: abcd + 0+ + xxxxabcd\=ph +Partial match: abcd + dddxxx\=dfa_restart + 0: ddd + 0+ xxx + xxxxabcd\=ph +Partial match: abcd + xxx\=dfa_restart + 0: + 0+ xxx + +/abcd*/i + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + XXXXABCD\=ps + 0: ABCD + XXXXABCD\=ph +Partial match: ABCD + +/abc\d*/ + xxxxabc1\=ps + 0: abc1 + xxxxabc1\=ph +Partial match: abc1 + +/abc[de]*/ + xxxxabcde\=ps + 0: abcde + xxxxabcde\=ph +Partial match: abcde + +/(?:(?1)|B)(A(*F)|C)/ + ABCD + 0: BC + CCD + 0: CC +\= Expect no match + CAD +No match + +/^(?:(?1)|B)(A(*F)|C)/ + CCD + 0: CC + BCD + 0: BC +\= Expect no match + ABCD +No match + CAD +No match + BAD +No match + +/^(?!a(*SKIP)b)/ + ac +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/^(?=a(*SKIP)b|ac)/ + ac +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/^(?=a(*THEN)b|ac)/ + ac +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/^(?=a(*PRUNE)b)/ + ab +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/^(?(?!a(*SKIP)b))/ + ac +Failed: error -42: pattern contains an item that is not supported for DFA matching + +/(?<=abc)def/allusedtext + abc\=ph +Partial match: abc + <<< + +/abc$/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc$/m + abc + 0: abc + abc\n + 0: abc + abc\=ph +Partial match: abc + abc\n\=ph + 0: abc + abc\=ps + 0: abc + abc\n\=ps + 0: abc + +/abc\z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\Z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\b/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\B/ + abc\=ps +Partial match: abc + abc\=ph +Partial match: abc +\= Expect no match + abc +No match + +/.+/ + abc\=offset=0 + 0: abc + abc\=offset=1 + 0: bc + abc\=offset=2 + 0: c +\= Bad offsets + abc\=offset=4 +Failed: error -33: bad offset value + abc\=offset=-4 +** Invalid value in 'offset=-4' +\= Expect no match + abc\=offset=3 +No match + +/^(?:a)++\w/ + aaaab + 0: aaaab +\= Expect no match + aaaa +No match + bbb +No match + +/^(?:aa|(?:a)++\w)/ + aaaab + 0: aaaab + 1: aa + aaaa + 0: aa +\= Expect no match + bbb +No match + +/^(?:a)*+\w/ + aaaab + 0: aaaab + bbb + 0: b +\= Expect no match + aaaa +No match + +/^(a)++\w/ + aaaab + 0: aaaab +\= Expect no match + aaaa +No match + bbb +No match + +/^(a|)++\w/ + aaaab + 0: aaaab +\= Expect no match + aaaa +No match + bbb +No match + +/(?=abc){3}abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc)+abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc)++abc/aftertext + abcabcabc + 0: abc + 0+ abcabc +\= Expect no match + xyz +No match + +/(?=abc){0}xyz/ + xyz + 0: xyz + +/(?=abc){1}xyz/ +\= Expect no match + xyz +No match + +/(?=(a))?./ + ab + 0: a + bc + 0: b + +/(?=(a))??./ + ab + 0: a + bc + 0: b + +/^(?=(a)){0}b(?1)/ + backgammon + 0: ba + +/^(?=(?1))?[az]([abc])d/ + abd + 0: abd + zcdxx + 0: zcd + +/^(?!a){0}\w+/ + aaaaa + 0: aaaaa + +/(?<=(abc))?xyz/ + abcxyz + 0: xyz + pqrxyz + 0: xyz + +/((?2))((?1))/ + abc +Failed: error -52: nested recursion at the same subject position + +/(?(R)a+|(?R)b)/ + aaaabcde + 0: aaaab + +/(?(R)a+|((?R))b)/ + aaaabcde + 0: aaaab + +/((?(R)a+|(?1)b))/ + aaaabcde + 0: aaaab + +/((?(R2)a+|(?1)b))()/ + aaaabcde +Failed: error -40: backreference condition or recursion test is not supported for DFA matching + +/(?(R)a*(?1)|((?R))b)/ + aaaabcde +Failed: error -52: nested recursion at the same subject position + +/(a+)/no_auto_possess + aaaa\=ovector=3 +Matched, but offsets vector is too small to show all matches + 0: aaaa + 1: aaa + 2: aa + aaaa\=ovector=4 + 0: aaaa + 1: aaa + 2: aa + 3: a + +/^\R/ + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/^\R{2,3}x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R{2,3}?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + x + 0: x + \rx + 0: \x0dx + +/^\R+x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\n\=ps +Partial match: \x0d\x0a + \r\n\=ph +Partial match: \x0d\x0a + \rx + 0: \x0dx + +/^a$/newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^a$/m,newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/newline=crlf + a\r\=ps + 0: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + 0: a\x0d + a\r\=ph +Partial match: a\x0d + +/./newline=crlf + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/.{2,3}/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + +/.{2,3}?/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d\x0d + 1: \x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + +# Test simple validity check for restarts + +/abcdef/ + abc\=dfa_restart +Failed: error -38: invalid data in workspace for DFA restart + +/)(.)|(?R))++)*F>/ + text text xxxxx text F> text2 more text. + 0: text xxxxx text F> + +/^(?>.{4})abc|^\w\w.xabcd/ + xxxxabcd + 0: xxxxabcd + 1: xxxxabc + xx\xa0xabcd + 0: xx\xa0xabcd + 1: xx\xa0xabc + +/^(.{4}){2}+abc|^\w\w.x\w\w\w\wabcd/ + xxxxxxxxabcd + 0: xxxxxxxxabcd + 1: xxxxxxxxabc + xx\xa0xxxxxabcd + 0: xx\xa0xxxxxabcd + 1: xx\xa0xxxxxabc + +/abcd/ + abcd\=ovector=0 + 0: abcd + +# These tests show up auto-possessification + +/[ab]*/ + aaaa + 0: aaaa + +/[ab]*?/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + 4: + +/[ab]?/ + aaaa + 0: a + +/[ab]??/ + aaaa + 0: a + 1: + +/[ab]+/ + aaaa + 0: aaaa + +/[ab]+?/ + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + +/[ab]{2,3}/ + aaaa + 0: aaa + +/[ab]{2,3}?/ + aaaa + 0: aaa + 1: aa + +/[ab]{2,}/ + aaaa + 0: aaaa + +/[ab]{2,}?/ + aaaa + 0: aaaa + 1: aaa + 2: aa + +'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++' + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + +'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++' + NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED + 0: NON QUOTED "QUOT""ED" AFTER + +/abc(?=xyz)/allusedtext + abcxyzpqr + 0: abcxyz + >>> + abcxyzpqr\=aftertext + 0: abcxyz + >>> + 0+ xyzpqr + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + 0: pqrabcxyz + <<< >>> + xyzpqrabcxyzpqr\=aftertext + 0: pqrabcxyz + <<< >>> + 0+ xyzpqr + +/a\b/ + a.\=allusedtext + 0: a. + > + a\=allusedtext + 0: a + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + 0: abcabcde + >>>>> + +/a*?b*?/ + ab + 0: ab + 1: a + 2: + +/(*NOTEMPTY)a*?b*?/ + ab + 0: ab + 1: a + ba + 0: b + cb + 0: b + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + 0: ab + 0+ + 1: a + cdab + 0: + 0+ dab + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall + 0: c +Get substring 1 failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available +Get substring 3 failed (-54): requested value is not available +Get substring 4 failed (-54): requested value is not available + 0L c + +/(?aa)/ + aa\=get=A + 0: aa +Get substring 'A' failed (-41): function is not supported for DFA matching + aa\=copy=A + 0: aa +Copy substring 'A' failed (-41): function is not supported for DFA matching + +/a+/no_auto_possess + a\=ovector=2,get=1,get=2,getall + 0: a +Get substring 1 failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available + 0L a + aaa\=ovector=2,get=1,get=2,getall +Matched, but offsets vector is too small to show all matches + 0: aaa + 1: aa + 1G aa (2) +Get substring 2 failed (-54): requested value is not available + 0L aaa + 1L aa + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall +Partial match: abc + 0C abc (3) +Copy substring 1 failed (-2): partial match +get substring list failed (-2): partial match + +/ab(?C" any text with spaces ")cde/B +------------------------------------------------------------------ + Bra + ab + CalloutStr " any text with spaces " 6 30 1 + cde + Ket + End +------------------------------------------------------------------ + abcde +Callout (6): " any text with spaces " +--->abcde + ^ ^ c + 0: abcde + 12abcde +Callout (6): " any text with spaces " +--->12abcde + ^ ^ c + 0: abcde + +/^a(b)c(?C1)def/ + abcdef +--->abcdef + 1 ^ ^ d + 0: abcdef + +/^a(b)c(?C"AB")def/ + abcdef +Callout (10): "AB" +--->abcdef + ^ ^ d + 0: abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture +Callout 1: last capture = 0 +--->abcdef + ^ ^ d + 0: abcdef + +/^a(b)c(?C{AB})def/B +------------------------------------------------------------------ + Bra + ^ + a + CBra 1 + b + Ket + c + CalloutStr {AB} 10 14 1 + def + Ket + End +------------------------------------------------------------------ + abcdef\=callout_capture +Callout (10): {AB} last capture = 0 +--->abcdef + ^ ^ d + 0: abcdef + +/^(?(?C25)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + Callout 25 9 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +--->abcdefg + 25 ^ (?= + 0: abcd + xyz123 +--->xyz123 + 25 ^ (?= + 0: xyz + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + CalloutStr $abc$ 7 12 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +Callout (7): $abc$ +--->abcdefg + ^ (?= + 0: abcd + xyz123 +Callout (7): $abc$ +--->xyz123 + ^ (?= + 0: xyz + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg +Callout (7): 'first' +--->abcdefg + ^ ^ c +Callout (20): "second" +--->abcdefg + ^ ^ e + 0: abcdef + +/(?:a(?C`code`)){3}X/ + aaaXY +Callout (8): `code` +--->aaaXY + ^^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} + 0: aaaX + +# Binary zero in callout string +/"a(?C'x" 00 "z')b"/hex + abcdefgh +Callout (5): 'x\x00z' +--->abcdefgh + ^^ b + 0: ab + +/(?(?!)a|b)/ + bbb + 0: b +\= Expect no match + aaa +No match + +/^/gm + \n\n\n + 0: + 0: + 0: + +/^/gm,alt_circumflex + \n\n\n + 0: + 0: + 0: + 0: + +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 0: abc + 1234abcde\=offset_limit=9 + 0: abc + 1234abcde\=offset_limit=4 + 0: abc + 1234abcde\=offset_limit=4,offset=4 + 0: abc +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 +No match + 1234abcde\=offset_limit=3 +No match + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 + 0: +\= Expect no match + 1234abc\=offset_limit=6 +No match + +/abcd/null_context + abcd\=null_context + 0: abcd + +/()()a+/no_auto_possess + aaa\=allcaptures +** Ignored for DFA matching: allcaptures + 0: aaa + 1: aa + 2: a + a\=allcaptures +** Ignored for DFA matching: allcaptures + 0: a + +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] +Failed: error -53: matching depth limit exceeded + +/(*LIMIT_HEAP=0)^((.)(?1)|.)$/ +\= Expect heap limit exceeded + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] +Failed: error -63: heap limit exceeded + +/(*LIMIT_HEAP=50000)^((.)(?1)|.)$/ +\= Expect success + a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + 0: a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] + +/(02-)?[0-9]{3}-[0-9]{3}/ + 02-123-123 + 0: 02-123-123 + +/^(a(?2))(b)(?1)/ + abbab\=find_limits +Minimum heap limit = 0 +Minimum match limit = 4 +Minimum depth limit = 2 + 0: abbab + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +/(*NUL)^.*/ + a\nb\x00ccc + 0: a\x0ab + +/(*NUL)^.*/s + a\nb\x00ccc + 0: a\x0ab\x00ccc + +/^x/m,newline=nul + ab\x00xy + 0: x + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + 0: x\x0ay + +/(*NUL)^X\NY/ + X\nY + 0: X\x0aY + X\rY + 0: X\x0dY +\= Expect no match + X\x00Y +No match + +/(?<=abc|)/ + abcde\=aftertext + 0: + 0+ abcde + +/(?<=|abc)/ + abcde\=aftertext + 0: + 0+ abcde + +/(?<=abc|)/endanchored + abcde\=aftertext + 0: + 0+ + +/(?<=|abc)/endanchored + abcde\=aftertext + 0: + 0+ + +/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor +\= Expect limit exceeded +.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); +Failed: error -47: match limit exceeded + +/\n/firstline + xyz\nabc + 0: \x0a + +/\nabc/firstline + xyz\nabc + 0: \x0aabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc +No match + +/[abc]/firstline +\= Expect no match + \na +No match + +/foobar/ + the foobar thing\=copy_matched_subject + 0: foobar + the foobar thing\=copy_matched_subject,zero_terminate + 0: foobar + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + 0: foobar + 0: foobar + +/(?(VERSION>=0)^B0W)/ + B0W-W0W + 0: B0W +\= Expect no match + 0 +No match + +/(?(VERSION>=1000)^B0W|W0W)/ + B0W-W0W + 0: W0W +\= Expect no match + 0 +No match + +/(?<=pqr)abc(?=xyz)/ + 123pqrabcxy\=ps,allusedtext +Partial match: pqrabcxy + <<< + 123pqrabcxyz\=ps,allusedtext + 0: pqrabcxyz + <<< >>> + +/(?>a+b)/ + aaaa\=ps +Partial match: aaaa + aaaab\=ps + 0: aaaab + +/(abc)(?1)/ + abca\=ps +Partial match: abca + abcabc\=ps + 0: abcabc + +/(?(?=abc).*|Z)/ + ab\=ps +Partial match: ab + abcxyz\=ps + 0: abcxyz + +/(abc)++x/ + abcab\=ps +Partial match: abcab + abc\=ps +Partial match: abc + ab\=ps +Partial match: ab + abcx + 0: abcx + +/\z/ + abc\=ph +Partial match: + abc\=ps + 0: + +/\Z/ + abc\=ph +Partial match: + abc\=ps + 0: + abc\n\=ph +Partial match: \x0a + abc\n\=ps + 0: + +/c*+(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + abc\=ps + 0: c + ab\=ps + 0: + +/c++(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + +/(?<=(?=.(?<=x)))/ + abx + 0: + ab\=ph +Partial match: + bxyz + 0: + xyz + 0: + +/(?![ab]).*/ + ab\=ph +Partial match: + +/c*+/ + ab\=ph,offset=2 +Partial match: + +/ +/anchored, firstline + \x0a + 0: \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + 0: \x0a + +/ +/firstline + \x0a + 0: \x0a + abc\x0adef + 0: \x0a + +/|a(?0)/endanchored + aaaa + 0: aaaa + 1: aaa + 2: aa + 3: a + 4: + +# End of testinput6 diff --git a/testdata/testoutput7 b/testdata/testoutput7 new file mode 100644 index 0000000..cfa1881 --- /dev/null +++ b/testdata/testoutput7 @@ -0,0 +1,4230 @@ +# This set of tests checks UTF and Unicode property support with the DFA +# matching functionality of pcre2_dfa_match(). A default subject modifier is +# used to force DFA matching for all tests. + +#subject dfa +#newline_default LF any anyCRLF + +/\x{100}ab/utf + \x{100}ab + 0: \x{100}ab + +/a\x{100}*b/utf + ab + 0: ab + a\x{100}b + 0: a\x{100}b + a\x{100}\x{100}b + 0: a\x{100}\x{100}b + +/a\x{100}+b/utf + a\x{100}b + 0: a\x{100}b + a\x{100}\x{100}b + 0: a\x{100}\x{100}b +\= Expect no match + ab +No match + +/\bX/utf + Xoanon + 0: X + +Xoanon + 0: X + \x{300}Xoanon + 0: X +\= Expect no match + YXoanon +No match + +/\BX/utf + YXoanon + 0: X +\= Expect no match + Xoanon +No match + +Xoanon +No match + \x{300}Xoanon +No match + +/X\b/utf + X+oanon + 0: X + ZX\x{300}oanon + 0: X + FAX + 0: X +\= Expect no match + Xoanon +No match + +/X\B/utf + Xoanon + 0: X +\= Expect no match + X+oanon +No match + ZX\x{300}oanon +No match + FAX +No match + +/[^a]/utf + abcd + 0: b + a\x{100} + 0: \x{100} + +/^[abc\x{123}\x{400}-\x{402}]{2,3}\d/utf + ab99 + 0: ab9 + \x{123}\x{123}45 + 0: \x{123}\x{123}4 + \x{400}\x{401}\x{402}6 + 0: \x{400}\x{401}\x{402}6 +\= Expect no match + d99 +No match + \x{123}\x{122}4 +No match + \x{400}\x{403}6 +No match + \x{400}\x{401}\x{402}\x{402}6 +No match + +/a.b/utf + acb + 0: acb + a\x7fb + 0: a\x{7f}b + a\x{100}b + 0: a\x{100}b +\= Expect no match + a\nb +No match + +/a(.{3})b/utf + a\x{4000}xyb + 0: a\x{4000}xyb + a\x{4000}\x7fyb + 0: a\x{4000}\x{7f}yb + a\x{4000}\x{100}yb + 0: a\x{4000}\x{100}yb +\= Expect no match + a\x{4000}b +No match + ac\ncb +No match + +/a(.*?)(.)/ + a\xc0\x88b + 0: a\xc0\x88b + 1: a\xc0\x88 + 2: a\xc0 + +/a(.*?)(.)/utf + a\x{100}b + 0: a\x{100}b + 1: a\x{100} + +/a(.*)(.)/ + a\xc0\x88b + 0: a\xc0\x88b + 1: a\xc0\x88 + 2: a\xc0 + +/a(.*)(.)/utf + a\x{100}b + 0: a\x{100}b + 1: a\x{100} + +/a(.)(.)/ + a\xc0\x92bcd + 0: a\xc0\x92 + +/a(.)(.)/utf + a\x{240}bcd + 0: a\x{240}b + +/a(.?)(.)/ + a\xc0\x92bcd + 0: a\xc0\x92 + 1: a\xc0 + +/a(.?)(.)/utf + a\x{240}bcd + 0: a\x{240}b + 1: a\x{240} + +/a(.??)(.)/ + a\xc0\x92bcd + 0: a\xc0\x92 + 1: a\xc0 + +/a(.??)(.)/utf + a\x{240}bcd + 0: a\x{240}b + 1: a\x{240} + +/a(.{3})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b +\= Expect no match + a\x{1234}b +No match + ac\ncb +No match + +/a(.{3,})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + 0: axxxxbcdefghijb + 1: axxxxb + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b +No match + +/a(.{3,}?)b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + 0: axxxxbcdefghijb + 1: axxxxb + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b +\= Expect no match + a\x{1234}b +No match + +/a(.{3,5})b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + 0: axxxxb + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + 0: axbxxb + axxxxxbcdefghijb + 0: axxxxxb +\= Expect no match + a\x{1234}b +No match + axxxxxxbcdefghijb +No match + +/a(.{3,5}?)b/utf + a\x{1234}xyb + 0: a\x{1234}xyb + a\x{1234}\x{4321}yb + 0: a\x{1234}\x{4321}yb + a\x{1234}\x{4321}\x{3412}b + 0: a\x{1234}\x{4321}\x{3412}b + axxxxbcdefghijb + 0: axxxxb + a\x{1234}\x{4321}\x{3412}\x{3421}b + 0: a\x{1234}\x{4321}\x{3412}\x{3421}b + axbxxbcdefghijb + 0: axbxxb + axxxxxbcdefghijb + 0: axxxxxb +\= Expect no match + a\x{1234}b +No match + axxxxxxbcdefghijb +No match + +/^[a\x{c0}]/utf +\= Expect no match + \x{100} +No match + +/(?<=aXb)cd/utf + aXbcd + 0: cd + +/(?<=a\x{100}b)cd/utf + a\x{100}bcd + 0: cd + +/(?<=a\x{100000}b)cd/utf + a\x{100000}bcd + 0: cd + +/(?:\x{100}){3}b/utf + \x{100}\x{100}\x{100}b + 0: \x{100}\x{100}\x{100}b +\= Expect no match + \x{100}\x{100}b +No match + +/\x{ab}/utf + \x{ab} + 0: \x{ab} + \xc2\xab + 0: \x{ab} +\= Expect no match + \x00{ab} +No match + +/(?<=(.))X/utf + WXYZ + 0: X + \x{256}XYZ + 0: X +\= Expect no match + XYZ +No match + +/[^a]+/g,utf + bcd + 0: bcd + \x{100}aY\x{256}Z + 0: \x{100} + 0: Y\x{256}Z + +/^[^a]{2}/utf + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/utf + \x{100}bcAa + 0: \x{100}bcA + +/^[^a]{2,}?/utf + \x{100}bca + 0: \x{100}bc + 1: \x{100}b + +/[^a]+/gi,utf + bcd + 0: bcd + \x{100}aY\x{256}Z + 0: \x{100} + 0: Y\x{256}Z + +/^[^a]{2}/i,utf + \x{100}bc + 0: \x{100}b + +/^[^a]{2,}/i,utf + \x{100}bcAa + 0: \x{100}bc + +/^[^a]{2,}?/i,utf + \x{100}bca + 0: \x{100}bc + 1: \x{100}b + +/\x{100}{0,0}/utf + abcd + 0: + +/\x{100}?/utf + abcd + 0: + \x{100}\x{100} + 0: \x{100} + +/\x{100}{0,3}/utf + \x{100}\x{100} + 0: \x{100}\x{100} + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}*/utf + abce + 0: + \x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{1,1}/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100} + +/\x{100}{1,3}/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100} + +/\x{100}+/utf + abcd\x{100}\x{100}\x{100}\x{100} + 0: \x{100}\x{100}\x{100}\x{100} + +/\x{100}{3}/utf + abcd\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100} + +/\x{100}{3,5}/utf + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100} + +/\x{100}{3,}/utf,no_auto_possess + abcd\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}XX + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 1: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 2: \x{100}\x{100}\x{100}\x{100}\x{100} + 3: \x{100}\x{100}\x{100}\x{100} + 4: \x{100}\x{100}\x{100} + +/(?<=a\x{100}{2}b)X/utf + Xyyya\x{100}\x{100}bXzzz + 0: X + +/\D*/utf,no_auto_possess + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Matched, but offsets vector is too small to show all matches + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 2: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 3: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 4: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 5: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 6: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 7: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 8: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 9: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +10: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +11: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +12: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +13: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +14: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/\D*/utf,no_auto_possess + \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +Matched, but offsets vector is too small to show all matches + 0: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 1: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 2: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 3: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 4: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 5: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 6: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 7: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 8: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + 9: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +10: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +11: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +12: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +13: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} +14: \x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100}\x{100} + +/\D/utf + 1X2 + 0: X + 1\x{100}2 + 0: \x{100} + +/>\S/utf + > >X Y + 0: >X + > >\x{100} Y + 0: >\x{100} + +/\d/utf + \x{100}3 + 0: 3 + +/\s/utf + \x{100} X + 0: + +/\D+/utf + 12abcd34 + 0: abcd +\= Expect no match + 1234 +No match + +/\D{2,3}/utf + 12abcd34 + 0: abc + 12ab34 + 0: ab +\= Expect no match + 1234 +No match + 12a34 +No match + +/\D{2,3}?/utf + 12abcd34 + 0: abc + 1: ab + 12ab34 + 0: ab +\= Expect no match + 1234 +No match + 12a34 +No match + +/\d+/utf + 12abcd34 + 0: 12 + +/\d{2,3}/utf + 12abcd34 + 0: 12 + 1234abcd + 0: 123 +\= Expect no match + 1.4 +No match + +/\d{2,3}?/utf + 12abcd34 + 0: 12 + 1234abcd + 0: 123 + 1: 12 +\= Expect no match + 1.4 +No match + +/\S+/utf + 12abcd34 + 0: 12abcd34 +\= Expect no match + \ \ +No match + +/\S{2,3}/utf + 12abcd34 + 0: 12a + 1234abcd + 0: 123 +\= Expect no match + \ \ +No match + +/\S{2,3}?/utf + 12abcd34 + 0: 12a + 1: 12 + 1234abcd + 0: 123 + 1: 12 +\= Expect no match + \ \ +No match + +/>\s+ <34 + 0: > < + +/>\s{2,3} < + ab> < +\= Expect no match + ab> \s{2,3}? < + ab> < +\= Expect no match + ab> \xff< + 0: \xff + +/[\xff]/utf + >\x{ff}< + 0: \x{ff} + +/[^\xFF]/ + XYZ + 0: X + +/[^\xff]/utf + XYZ + 0: X + \x{123} + 0: \x{123} + +/^[ac]*b/utf +\= Expect no match + xb +No match + +/^[ac\x{100}]*b/utf +\= Expect no match + xb +No match + +/^[^x]*b/i,utf +\= Expect no match + xb +No match + +/^[^x]*b/utf +\= Expect no match + xb +No match + +/^\d*b/utf +\= Expect no match + xb +No match + +/(|a)/g,utf + catac + 0: + 0: a + 1: + 0: + 0: a + 1: + 0: + 0: + a\x{256}a + 0: a + 1: + 0: + 0: a + 1: + 0: + +/^\x{85}$/i,utf + \x{85} + 0: \x{85} + +/^abc./gmx,newline=any,utf + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/abc.$/gmx,newline=any,utf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + 0: abc8 + 0: abc9 + +/^a\Rb/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0cb + 0: a\x{0c}b + a\x{85}b + 0: a\x{85}b + a\x{2028}b + 0: a\x{2028}b + a\x{2029}b + 0: a\x{2029}b +\= Expect no match + a\n\rb +No match + +/^a\R*b/bsr=unicode,utf + ab + 0: ab + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b + +/^a\R+b/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\rb + 0: a\x{0d}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x0bb + 0: a\x{0b}b + a\x0c\x{2028}\x{2029}b + 0: a\x{0c}\x{2028}\x{2029}b + a\x{85}b + 0: a\x{85}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}\x0cb + 0: a\x{0a}\x{0d}\x{85}\x{0c}b +\= Expect no match + ab +No match + +/^a\R{1,3}b/bsr=unicode,utf + a\nb + 0: a\x{0a}b + a\n\rb + 0: a\x{0a}\x{0d}b + a\n\r\x{85}b + 0: a\x{0a}\x{0d}\x{85}b + a\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}b + a\r\n\r\n\r\nb + 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b + a\n\r\n\rb + 0: a\x{0a}\x{0d}\x{0a}\x{0d}b + a\n\n\r\nb + 0: a\x{0a}\x{0a}\x{0d}\x{0a}b +\= Expect no match + a\n\n\n\rb +No match + a\r +No match + +/\h+\V?\v{3,4}/utf,no_auto_possess + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} + 1: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c} + +/\V?\v{3,4}/utf,no_auto_possess + \x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: X\x{0a}\x{0b}\x{0c}\x{0d} + 1: X\x{0a}\x{0b}\x{0c} + +/\h+\V?\v{3,4}/utf,no_auto_possess + >\x09\x20\x{a0}X\x0a\x0a\x0a< + 0: \x{09} \x{a0}X\x{0a}\x{0a}\x{0a} + +/\V?\v{3,4}/utf,no_auto_possess + >\x09\x20\x{a0}X\x0a\x0a\x0a< + 0: X\x{0a}\x{0a}\x{0a} + +/\H\h\V\v/utf + X X\x0a + 0: X X\x{0a} + X\x09X\x0b + 0: X\x{09}X\x{0b} +\= Expect no match + \x{a0} X\x0a +No match + +/\H*\h+\V?\v{3,4}/utf,no_auto_possess + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} + 1: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c} + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} + 1: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} + \x09\x20\x{a0}\x0a\x0b\x0c + 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} +\= Expect no match + \x09\x20\x{a0}\x0a\x0b +No match + +/\H\h\V\v/utf + \x{3001}\x{3000}\x{2030}\x{2028} + 0: \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} + 0: X\x{180e}X\x{85} +\= Expect no match + \x{2009} X\x0a +No match + +/\H*\h+\V?\v{3,4}/utf,no_auto_possess + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} + 1: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c} + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} + 1: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c} + \x09\x20\x{202f}\x0a\x0b\x0c + 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} +\= Expect no match + \x09\x{200a}\x{a0}\x{2028}\x0b +No match + +/a\Rb/I,bsr=anycrlf,utf +Capture group count = 0 +Options: utf +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b +\= Expect no match + a\x{85}b +No match + a\x0bb +No match + +/a\Rb/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + +/a\R?b/I,bsr=anycrlf,utf +Capture group count = 0 +Options: utf +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b +\= Expect no match + a\x{85}b +No match + a\x0bb +No match + +/a\R?b/I,bsr=unicode,utf +Capture group count = 0 +Options: utf +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x{0d}b + a\nb + 0: a\x{0a}b + a\r\nb + 0: a\x{0d}\x{0a}b + a\x{85}b + 0: a\x{85}b + a\x0bb + 0: a\x{0b}b + +/X/newline=any,utf,firstline + A\x{1ec5}ABCXYZ + 0: X + +/abcd*/utf + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + +/abcd*/i,utf + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + XXXXABCD\=ps + 0: ABCD + XXXXABCD\=ph +Partial match: ABCD + +/abc\d*/utf + xxxxabc1\=ps + 0: abc1 + xxxxabc1\=ph +Partial match: abc1 + +/abc[de]*/utf + xxxxabcde\=ps + 0: abcde + xxxxabcde\=ph +Partial match: abcde + +/\bthe cat\b/utf + the cat\=ps + 0: the cat + the cat\=ph +Partial match: the cat + +/./newline=crlf,utf + \r\=ps + 0: \x{0d} + \r\=ph +Partial match: \x{0d} + +/.{2,3}/newline=crlf,utf + \r\=ps +Partial match: \x{0d} + \r\=ph +Partial match: \x{0d} + \r\r\=ps + 0: \x{0d}\x{0d} + \r\r\=ph +Partial match: \x{0d}\x{0d} + \r\r\r\=ps + 0: \x{0d}\x{0d}\x{0d} + \r\r\r\=ph +Partial match: \x{0d}\x{0d}\x{0d} + +/.{2,3}?/newline=crlf,utf + \r\=ps +Partial match: \x{0d} + \r\=ph +Partial match: \x{0d} + \r\r\=ps + 0: \x{0d}\x{0d} + \r\r\=ph +Partial match: \x{0d}\x{0d} + \r\r\r\=ps + 0: \x{0d}\x{0d}\x{0d} + 1: \x{0d}\x{0d} + \r\r\r\=ph +Partial match: \x{0d}\x{0d}\x{0d} + +/[^\x{100}]/utf + \x{100}\x{101}X + 0: \x{101} + +/[^\x{100}]+/utf + \x{100}\x{101}X + 0: \x{101}X + +/\pL\P{Nd}/utf + AB + 0: AB +\= Expect no match + A0 +No match + 00 +No match + +/\X./utf + AB + 0: AB + A\x{300}BC + 0: A\x{300}B + A\x{300}\x{301}\x{302}BC + 0: A\x{300}\x{301}\x{302}B +\= Expect no match + \x{300} +No match + +/\X\X/utf + ABC + 0: AB + A\x{300}B\x{300}\x{301}C + 0: A\x{300}B\x{300}\x{301} + A\x{300}\x{301}\x{302}BC + 0: A\x{300}\x{301}\x{302}B +\= Expect no match + \x{300} +No match + +/^\pL+/utf + abcd + 0: abcd + a + 0: a + +/^\PL+/utf + 1234 + 0: 1234 + = + 0: = +\= Expect no match + abcd +No match + +/^\X+/utf + abcdA\x{300}\x{301}\x{302} + 0: abcdA\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302} + 0: A\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302} + 0: A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302} + a + 0: a + \x{300}\x{301}\x{302} + 0: \x{300}\x{301}\x{302} + +/\X?abc/utf + abc + 0: abc + A\x{300}abc + 0: A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + 0: A\x{300}abc + \x{300}abc + 0: \x{300}abc + +/^\X?abc/utf + abc + 0: abc + A\x{300}abc + 0: A\x{300}abc + \x{300}abc + 0: \x{300}abc +\= Expect no match + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz +No match + +/\X*abc/utf + abc + 0: abc + A\x{300}abc + 0: A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + 0: A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abc + \x{300}abc + 0: \x{300}abc + +/^\X*abc/utf + abc + 0: abc + A\x{300}abc + 0: A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + 0: A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abc + \x{300}abc + 0: \x{300}abc + +/^\pL?=./utf + A=b + 0: A=b + =c + 0: =c +\= Expect no match + 1=2 +No match + AAAA=b +No match + +/^\pL*=./utf + AAAA=b + 0: AAAA=b + =c + 0: =c +\= Expect no match + 1=2 +No match + +/^\X{2,3}X/utf + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + 0: A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + 0: A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X +\= Expect no match + X +No match + A\x{300}\x{301}\x{302}X +No match + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X +No match + +/^\pC\pL\pM\pN\pP\pS\pZ\p{Xsp}/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} +\= Expect no match + \x{0b} +No match + +/^>\p{Xsp}+/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 1: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + 2: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680} + 3: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0} + 4: > \x{09}\x{0a}\x{0c}\x{0d} + 5: > \x{09}\x{0a}\x{0c} + 6: > \x{09}\x{0a} + 7: > \x{09} + 8: > + +/^>\p{Xsp}*/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 1: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + 2: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680} + 3: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0} + 4: > \x{09}\x{0a}\x{0c}\x{0d} + 5: > \x{09}\x{0a}\x{0c} + 6: > \x{09}\x{0a} + 7: > \x{09} + 8: > + 9: > + +/^>\p{Xsp}{2,9}/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 1: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + 2: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680} + 3: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0} + 4: > \x{09}\x{0a}\x{0c}\x{0d} + 5: > \x{09}\x{0a}\x{0c} + 6: > \x{09}\x{0a} + 7: > \x{09} + +/^>[\p{Xsp}]/utf,no_auto_possess + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xsp}]+/utf,no_auto_possess + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 1: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + 2: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680} + 3: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0} + 4: > \x{09}\x{0a}\x{0c}\x{0d} + 5: > \x{09}\x{0a}\x{0c} + 6: > \x{09}\x{0a} + 7: > \x{09} + 8: > + +/^>\p{Xps}/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + >\x{a0} + 0: >\x{a0} +\= Expect no match + \x{0b} +No match + +/^>\p{Xps}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/utf + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680}\x{2028}\x{0b} + 1: >\x{1680}\x{2028} + 2: >\x{1680} + +/^>\p{Xps}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 1: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + 2: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680} + 3: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0} + 4: > \x{09}\x{0a}\x{0c}\x{0d} + 5: > \x{09}\x{0a}\x{0c} + 6: > \x{09}\x{0a} + 7: > \x{09} + +/^>[\p{Xps}]/utf + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xps}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/utf + ABCD + 0: A + 1234 + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ +\= Expect no match + [] +No match + +/^\p{Xwd}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/utf + A_12\x{6ca}\x{a6c}\x{10a7} + 0: A_12\x{6ca}\x{a6c}\x{10a7} + +/^[\p{Xwd}]/utf + ABCD1234_ + 0: A + 1234abcd_ + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ +\= Expect no match + [] +No match + +/^[\p{Xwd}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +# Unicode properties for \b and \B + +/\b...\B/utf,ucp + abc_ + 0: abc + \x{37e}abc\x{376} + 0: abc + \x{37e}\x{376}\x{371}\x{393}\x{394} + 0: \x{376}\x{371}\x{393} + !\x{c0}++\x{c1}\x{c2} + 0: ++\x{c1} + !\x{c0}+++++ + 0: \x{c0}++ + +# Without PCRE_UCP, non-ASCII always fail, even if < 256 + +/\b...\B/utf + abc_ + 0: abc +\= Expect no match + \x{37e}abc\x{376} +No match + \x{37e}\x{376}\x{371}\x{393}\x{394} +No match + !\x{c0}++\x{c1}\x{c2} +No match + !\x{c0}+++++ +No match + +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties + +/\b...\B/ucp + abc_ + 0: abc + !\x{c0}++\x{c1}\x{c2} + 0: ++\xc1 + !\x{c0}+++++ + 0: \xc0++ + +# Caseless single negated characters > 127 need UCP support + +/[^\x{100}]/i,utf + \x{100}\x{101}X + 0: X + +/[^\x{100}]+/i,utf + \x{100}\x{101}XX + 0: XX + +/^\X/utf + A\=ps + 0: A + A\=ph +Partial match: A + A\x{300}\x{301}\=ps + 0: A\x{300}\x{301} + A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301} + A\x{301}\=ps + 0: A\x{301} + A\x{301}\=ph +Partial match: A\x{301} + +/^\X{2,3}/utf + A\=ps +Partial match: A + A\=ph +Partial match: A + AA\=ps + 0: AA + AA\=ph +Partial match: AA + A\x{300}\x{301}\=ps +Partial match: A\x{300}\x{301} + A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ps + 0: A\x{300}\x{301}A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301}A\x{300}\x{301} + +/^\X{2}/utf + AA\=ps + 0: AA + AA\=ph +Partial match: AA + A\x{300}\x{301}A\x{300}\x{301}\=ps + 0: A\x{300}\x{301}A\x{300}\x{301} + A\x{300}\x{301}A\x{300}\x{301}\=ph +Partial match: A\x{300}\x{301}A\x{300}\x{301} + +/^\X+/utf + AA\=ps + 0: AA + AA\=ph +Partial match: AA + +/^\X+?Z/utf + AA\=ps +Partial match: AA + AA\=ph +Partial match: AA + +# These are tests for extended grapheme clusters + +/^\X/utf,aftertext + G\x{34e}\x{34e}X + 0: G\x{34e}\x{34e} + 0+ X + \x{34e}\x{34e}X + 0: \x{34e}\x{34e} + 0+ X + \x04X + 0: \x{04} + 0+ X + \x{1100}X + 0: \x{1100} + 0+ X + \x{1100}\x{34e}X + 0: \x{1100}\x{34e} + 0+ X + \x{1b04}\x{1b04}X + 0: \x{1b04}\x{1b04} + 0+ X +\= These match up to the roman letters + \x{1111}\x{1111}L,L + 0: \x{1111}\x{1111} + 0+ L,L + \x{1111}\x{1111}\x{1169}L,L,V + 0: \x{1111}\x{1111}\x{1169} + 0+ L,L,V + \x{1111}\x{ae4c}L, LV + 0: \x{1111}\x{ae4c} + 0+ L, LV + \x{1111}\x{ad89}L, LVT + 0: \x{1111}\x{ad89} + 0+ L, LVT + \x{1111}\x{ae4c}\x{1169}L, LV, V + 0: \x{1111}\x{ae4c}\x{1169} + 0+ L, LV, V + \x{1111}\x{ae4c}\x{1169}\x{1169}L, LV, V, V + 0: \x{1111}\x{ae4c}\x{1169}\x{1169} + 0+ L, LV, V, V + \x{1111}\x{ae4c}\x{1169}\x{11fe}L, LV, V, T + 0: \x{1111}\x{ae4c}\x{1169}\x{11fe} + 0+ L, LV, V, T + \x{1111}\x{ad89}\x{11fe}L, LVT, T + 0: \x{1111}\x{ad89}\x{11fe} + 0+ L, LVT, T + \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T + 0: \x{1111}\x{ad89}\x{11fe}\x{11fe} + 0+ L, LVT, T, T + \x{ad89}\x{11fe}\x{11fe}LVT, T, T + 0: \x{ad89}\x{11fe}\x{11fe} + 0+ LVT, T, T +\= These match just the first codepoint (invalid sequence) + \x{1111}\x{11fe}L, T + 0: \x{1111} + 0+ \x{11fe}L, T + \x{ae4c}\x{1111}LV, L + 0: \x{ae4c} + 0+ \x{1111}LV, L + \x{ae4c}\x{ae4c}LV, LV + 0: \x{ae4c} + 0+ \x{ae4c}LV, LV + \x{ae4c}\x{ad89}LV, LVT + 0: \x{ae4c} + 0+ \x{ad89}LV, LVT + \x{1169}\x{1111}V, L + 0: \x{1169} + 0+ \x{1111}V, L + \x{1169}\x{ae4c}V, LV + 0: \x{1169} + 0+ \x{ae4c}V, LV + \x{1169}\x{ad89}V, LVT + 0: \x{1169} + 0+ \x{ad89}V, LVT + \x{ad89}\x{1111}LVT, L + 0: \x{ad89} + 0+ \x{1111}LVT, L + \x{ad89}\x{1169}LVT, V + 0: \x{ad89} + 0+ \x{1169}LVT, V + \x{ad89}\x{ae4c}LVT, LV + 0: \x{ad89} + 0+ \x{ae4c}LVT, LV + \x{ad89}\x{ad89}LVT, LVT + 0: \x{ad89} + 0+ \x{ad89}LVT, LVT + \x{11fe}\x{1111}T, L + 0: \x{11fe} + 0+ \x{1111}T, L + \x{11fe}\x{1169}T, V + 0: \x{11fe} + 0+ \x{1169}T, V + \x{11fe}\x{ae4c}T, LV + 0: \x{11fe} + 0+ \x{ae4c}T, LV + \x{11fe}\x{ad89}T, LVT + 0: \x{11fe} + 0+ \x{ad89}T, LVT +\= Test extend and spacing mark + \x{1111}\x{ae4c}\x{0711}L, LV, extend + 0: \x{1111}\x{ae4c}\x{711} + 0+ L, LV, extend + \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark + 0: \x{1111}\x{ae4c}\x{1b04} + 0+ L, LV, spacing mark + \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark + 0: \x{1111}\x{ae4c}\x{1b04}\x{711}\x{1b04} + 0+ L, LV, spacing mark, extend, spacing mark +\= Test CR, LF, and control + \x0d\x{0711}CR, extend + 0: \x{0d} + 0+ \x{711}CR, extend + \x0d\x{1b04}CR, spacingmark + 0: \x{0d} + 0+ \x{1b04}CR, spacingmark + \x0a\x{0711}LF, extend + 0: \x{0a} + 0+ \x{711}LF, extend + \x0a\x{1b04}LF, spacingmark + 0: \x{0a} + 0+ \x{1b04}LF, spacingmark + \x0b\x{0711}Control, extend + 0: \x{0b} + 0+ \x{711}Control, extend + \x09\x{1b04}Control, spacingmark + 0: \x{09} + 0+ \x{1b04}Control, spacingmark +\= There are no Prepend characters, so we can't test Prepend, CR + +/^(?>\X{2})X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/^\X{2,4}X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/^\X{2,4}?X/utf,aftertext + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0: \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + 0+ + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/[z\x{1e9e}]+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/[z\x{00df}]+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +/[z\x{1f88}]+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +# Perl matches these + +/\x{00b5}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/\x{039c}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + +/\x{03bc}+/i,utf + \x{00b5}\x{039c}\x{03bc} + 0: \x{b5}\x{39c}\x{3bc} + + +/\x{00c5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/\x{00e5}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/\x{212b}+/i,utf + \x{00c5}\x{00e5}\x{212b} + 0: \x{c5}\x{e5}\x{212b} + +/\x{01c4}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/\x{01c5}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/\x{01c6}+/i,utf + \x{01c4}\x{01c5}\x{01c6} + 0: \x{1c4}\x{1c5}\x{1c6} + +/\x{01c7}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/\x{01c8}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + +/\x{01c9}+/i,utf + \x{01c7}\x{01c8}\x{01c9} + 0: \x{1c7}\x{1c8}\x{1c9} + + +/\x{01ca}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/\x{01cb}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/\x{01cc}+/i,utf + \x{01ca}\x{01cb}\x{01cc} + 0: \x{1ca}\x{1cb}\x{1cc} + +/\x{01f1}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/\x{01f2}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/\x{01f3}+/i,utf + \x{01f1}\x{01f2}\x{01f3} + 0: \x{1f1}\x{1f2}\x{1f3} + +/\x{0345}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{0399}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{03b9}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{1fbe}+/i,utf + \x{0345}\x{0399}\x{03b9}\x{1fbe} + 0: \x{345}\x{399}\x{3b9}\x{1fbe} + +/\x{0392}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/\x{03b2}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + +/\x{03d0}+/i,utf + \x{0392}\x{03b2}\x{03d0} + 0: \x{392}\x{3b2}\x{3d0} + + +/\x{0395}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/\x{03b5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/\x{03f5}+/i,utf + \x{0395}\x{03b5}\x{03f5} + 0: \x{395}\x{3b5}\x{3f5} + +/\x{0398}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03b8}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03d1}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{03f4}+/i,utf + \x{0398}\x{03b8}\x{03d1}\x{03f4} + 0: \x{398}\x{3b8}\x{3d1}\x{3f4} + +/\x{039a}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/\x{03ba}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/\x{03f0}+/i,utf + \x{039a}\x{03ba}\x{03f0} + 0: \x{39a}\x{3ba}\x{3f0} + +/\x{03a0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/\x{03c0}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/\x{03d6}+/i,utf + \x{03a0}\x{03c0}\x{03d6} + 0: \x{3a0}\x{3c0}\x{3d6} + +/\x{03a1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/\x{03c1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/\x{03f1}+/i,utf + \x{03a1}\x{03c1}\x{03f1} + 0: \x{3a1}\x{3c1}\x{3f1} + +/\x{03a3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/\x{03c2}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/\x{03c3}+/i,utf + \x{03A3}\x{03C2}\x{03C3} + 0: \x{3a3}\x{3c2}\x{3c3} + +/\x{03a6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/\x{03c6}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/\x{03d5}+/i,utf + \x{03a6}\x{03c6}\x{03d5} + 0: \x{3a6}\x{3c6}\x{3d5} + +/\x{03c9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/\x{03a9}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/\x{2126}+/i,utf + \x{03c9}\x{03a9}\x{2126} + 0: \x{3c9}\x{3a9}\x{2126} + +/\x{1e60}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e61}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9b}+/i,utf + \x{1e60}\x{1e61}\x{1e9b} + 0: \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9e}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{00df}+/i,utf + \x{1e9e}\x{00df} + 0: \x{1e9e}\x{df} + +/\x{1f88}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +/\x{1f80}+/i,utf + \x{1f88}\x{1f80} + 0: \x{1f88}\x{1f80} + +/\x{004b}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/\x{006b}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/\x{212a}+/i,utf + \x{004b}\x{006b}\x{212a} + 0: Kk\x{212a} + +/\x{0053}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/\x{0073}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/\x{017f}+/i,utf + \x{0053}\x{0073}\x{017f} + 0: Ss\x{17f} + +/ist/i,utf +\= Expect no match + ikt +No match + +/is+t/i,utf + iSs\x{17f}t + 0: iSs\x{17f}t +\= Expect no match + ikt +No match + +/is+?t/i,utf +\= Expect no match + ikt +No match + +/is?t/i,utf +\= Expect no match + ikt +No match + +/is{2}t/i,utf +\= Expect no match + iskt +No match + +/^\p{Xuc}/utf + $abc + 0: $ + @abc + 0: @ + `abc + 0: ` + \x{1234}abc + 0: \x{1234} +\= Expect no match + abc +No match + +/^\p{Xuc}+/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}+?/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} + 1: $@`\x{a0}\x{1234} + 2: $@`\x{a0} + 3: $@` + 4: $@ + 5: $ +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}+?\*/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000}* +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}++/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}{3,5}/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234} +\= Expect no match + \x{9f} +No match + +/^\p{Xuc}{3,5}?/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234} + 1: $@`\x{a0} + 2: $@` +\= Expect no match + \x{9f} +No match + +/^[\p{Xuc}]/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $ +\= Expect no match + \x{9f} +No match + +/^[\p{Xuc}]+/utf + $@`\x{a0}\x{1234}\x{e000}** + 0: $@`\x{a0}\x{1234}\x{e000} +\= Expect no match + \x{9f} +No match + +/^\P{Xuc}/utf + abc + 0: a +\= Expect no match + $abc +No match + @abc +No match + `abc +No match + \x{1234}abc +No match + +/^[\P{Xuc}]/utf + abc + 0: a +\= Expect no match + $abc +No match + @abc +No match + `abc +No match + \x{1234}abc +No match + +/^A\s+Z/utf,ucp + A\x{2005}Z + 0: A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + 0: A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + 0: A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + 0: A\x{85}\x{180e}\x{2005}Z + +/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext + \x{100}\x{200}\x{300} + 0: \x{100}\x{200}\x{300} + <<<<<<< >>>>>>> + +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + 0: \x{202c} + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}< + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: >\x{2066}\x{2067}\x{2068}\x{2069}< + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + 0: \x{61d} + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + 1: \x{61d}\x{61e} + 2: \x{61d} + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + 0: \x{602}\x{604} + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + 0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029} + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + 0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff} + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + 0: ,.\x{60c}\x{ff1a} + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + 0: 09\x{b2}\x{2074}\x{1fbf9} + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + 0: +-\x{207a}\x{ff0d} + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + 0: # + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + 0: \x{2068} + +/\p{bidi class:L}+/utf + -->ABC<-- + 0: ABC + +/\P{bidi class:L}+/utf + -->ABC<-- + 0: --> + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + 0: \x{202a}\x{2066}\x{202d} + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + 0: \x{9bc}\x{a71}\x{e31} + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + 0: >!'()*;@\x{384}\x{2039}<= + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + 0: \x{202c}\x{2069} + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + 0: \x{590}\x{5c6}\x{200f}\x{10805} + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + 0: \x{202b}\x{2067}\x{202e} + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + 0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000} + +# ----------------------------------------------------------------------------- + +/\p{katakana}/utf + \x{30a1} + 0: \x{30a1} + \x{3001} + 0: \x{3001} + +/\p{scx:katakana}/utf + \x{30a1} + 0: \x{30a1} + \x{3001} + 0: \x{3001} + +/\p{script extensions:katakana}/utf + \x{30a1} + 0: \x{30a1} + \x{3001} + 0: \x{3001} + +/\p{sc:katakana}/utf + \x{30a1} + 0: \x{30a1} +\= Expect no match + \x{3001} +No match + +/\p{script:katakana}/utf + \x{30a1} + 0: \x{30a1} +\= Expect no match + \x{3001} +No match + +/\p{sc:katakana}{3,}/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + 0: \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66} + +/\p{sc:katakana}{3,}?/utf + \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC + 0: \x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66} + 1: \x{30a1}\x{30fa}\x{32d0}\x{1b122} + 2: \x{30a1}\x{30fa}\x{32d0} + +# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without +# the restriction. + +/AskZ/i,utf,caseless_restrict + AskZ + 0: AskZ + aSKz + 0: aSKz +\= Expect no match + A\x{17f}kZ +No match + As\x{212a}Z +No match + +/AskZ/i,utf + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A\x{17f}kZ + As\x{212a}Z + 0: As\x{212a}Z + +/A\x{17f}\x{212a}Z/ir,utf + \= Expect no match + AskZ +No match + +/A\x{17f}\x{212a}Z/i,utf + AskZ + 0: AskZ + +/[AskZ]+/i,utf,caseless_restrict + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A + As\x{212a}Z + 0: As + +/[AskZ]+/i,utf + AskZ + 0: AskZ + aSKz + 0: aSKz + A\x{17f}kZ + 0: A\x{17f}kZ + As\x{212a}Z + 0: As\x{212a}Z + +/[\x{17f}\x{212a}]+/ir,utf +\= Expect no match + AskZ +No match + +/[\x{17f}\x{212a}]+/i,utf + AskZ + 0: sk + +/[^s]+/ir,utf + A\x{17f}Z + 0: A\x{17f}Z + +/[^s]+/i,utf + A\x{17f}Z + 0: A + +/[^k]+/ir,utf + A\x{212a}Z + 0: A\x{212a}Z + +/[^k]+/i,utf + A\x{212a}Z + 0: A + +/[^sk]+/ir,utf + A\x{17f}\x{212a}Z + 0: A\x{17f}\x{212a}Z + +/[^sk]+/i,utf + A\x{17f}\x{212a}Z + 0: A + +/[^\x{17f}]+/ir,utf + AsSZ + 0: AsSZ + +/[^\x{17f}]+/i,utf + AsSZ + 0: A + +/[Ss]+/irB,utf +------------------------------------------------------------------ + Bra + /i S++ + Ket + End +------------------------------------------------------------------ + Sss\x{17f}ss + 0: Sss + +/[Ss]+/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}]++ + Ket + End +------------------------------------------------------------------ + Sss\x{17f}ss + 0: Sss\x{17f}ss + +/[S\x{17f}]/irB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[S\x{17f}]/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{17f}s]/irB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{17f}s]/iB,utf +------------------------------------------------------------------ + Bra + [Ss\x{17f}\x{17f}] + Ket + End +------------------------------------------------------------------ + +/[\x{4b}\x{6b}]/irB,utf +------------------------------------------------------------------ + Bra + /i K + Ket + End +------------------------------------------------------------------ + +/[\x{4b}\x{6b}]/iB,utf +------------------------------------------------------------------ + Bra + [Kk\x{212a}\x{212a}] + Ket + End +------------------------------------------------------------------ + +/s(?r)s(?-r)s(?r:s)s/i,utf + \x{17f}S\x{17f}S\x{17f} + 0: \x{17f}S\x{17f}S\x{17f} +\= Expect no match + \x{17f}\x{17f}\x{17f}S\x{17f} +No match + \x{17f}S\x{17f}\x{17f}\x{17f} +No match + +/k(?^i)k/ir,utf + K\x{212a} + 0: K\x{212a} +\= Expect no match + \x{212a}\x{212a} +No match + +# End caseless restrict tests + +# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without. + +# DIGITS + +/\d+/i,utf + 123\x{660}456 + 0: 123 + +/\d+/i,utf,ucp + 123\x{660}456 + 0: 123\x{660}456 + +/\d+/i,utf,ucp,ascii_bsd + 123\x{660}456 + 0: 123 + +/[\d]+/i,utf + 123\x{660}456 + 0: 123 + +/[\d]+/i,utf,ucp + 123\x{660}456 + 0: 123\x{660}456 + +/[\d]+/i,utf,ucp,ascii_bsd + 123\x{660}456 + 0: 123 + +/\d(?aD)\d(?-aD)\d/utf,ucp + \x{660}9\x{660} + 0: \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 0: 999 + 9\x{660}9 + 0: 9\x{660}9 + +/\d(?a)\d(?-a)\d/utf,ucp + \x{660}9\x{660} + 0: \x{660}9\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd + 999 + 0: 999 + 9\x{660}9 + 0: 9\x{660}9 + +# SPACES + +/>\s+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>\s+ < + 0: > < + >\x{a0} < + 0: >\x{a0} < + +/>\s+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>[\s]+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>[\s]+ < + 0: > < + >\x{a0} < + 0: >\x{a0} < + +/>[\s]+ < + 0: > < +\= Expect no match + >\x{a0} < +No match + +/>\s(?aS)\s(?-aS)\s\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< +No match + +/>\s(?a)\s(?-a)\s\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< +\= Expect no match + >\x{a0}\x{a0}\x{a0}< +No match + +# WORDS + +/\w+/i,utf + 123\x{660}abc + 0: 123 + +/\w+/i,utf,ucp + 123\x{660}abc + 0: 123\x{660}abc + +/\w+/i,utf,ucp,ascii_bsw + 123\x{660}abc + 0: 123 + +/[\w]+/i,utf + 123\x{660}abc + 0: 123 + +/[\w]+/i,utf,ucp + 123\x{660}abc + 0: 123\x{660}abc + +/[\w]+/i,utf,ucp,ascii_bsw + 123\x{660}abc + 0: 123 + +/\w(?aW)\w(?-aW)\w/utf,ucp + \x{660}A\x{c0} + 0: \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} +No match + +/\w(?a)\w(?-a)\w/utf,ucp + \x{660}A\x{c0} + 0: \x{660}A\x{c0} +\= Expect no match + \x{660}\x{c0}\x{c0} +No match + +# POSIX + +/^[[:digit:]]+$/utf,ucp + 123456 + 0: 123456 + 123\x{660}456 + 0: 123\x{660}456 + +/^[[:digit:]]+$/utf,ucp,ascii_digit + 123456 + 0: 123456 +\= Expect no match + 123\x{660}456 +No match + +/[[:digit:]]+/g,utf,ucp,ascii_digit + 123\x{660}456 + 0: 123 + 0: 456 + +/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit + 11 + 0: 11 +\= Expect no match + \x{ff11}1 +No match + 1\x{ff11} +No match + +/[[:digit:]]+/utf,ucp,ascii_posix + 123\x{660}456 + 0: 123 + +/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp + 11 + 0: 11 + \x{ff11}1 + 0: \x{ff11}1 +\= Expect no match + 1\x{ff11} +No match + +/>[[:space:]]+\x{a0} \x{a0}< + 0: >\x{a0} \x{a0}< + >\x{a0}\x{a0}\x{a0}< + 0: >\x{a0}\x{a0}\x{a0}< + +/>[[:space:]]+\x{a0} \x{a0}< +No match + +/(?aP)[[:alnum:]]+/i,ucp,utf + abcáxyz + 0: abc + abc\x{660}xyz + 0: abc + +/(?aP)[[:alnum:]\d]+/i,ucp,utf + abc\x{660}xyz + 0: abc\x{660}xyz + +/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/ + \x{660}A\x{660} + 0: \x{660}A\x{660} +\= Expect no match + \x{660}\x{660}\x{660} +No match + +# VARIOUS + +/[\d\s\w]+/a,ucp,utf + 9 A\x{660}À + 0: 9 A + 9 AÀ\x{660} + 0: 9 A + +# End PCRE2_EXTRA_ASCII_xxx tests + +/\w+/utf,ucp + --cafe\x{300}_au\x{203f}lait! + 0: cafe\x{300}_au\x{203f}lait + +/[\w]+/utf,ucp + --cafe\x{300}_au\x{203f}lait! + 0: cafe\x{300}_au\x{203f}lait + +/\b.+?\b/utf,ucp + --cafe\x{300}_au\x{203f}lait! + 0: cafe\x{300}_au\x{203f}lait + +/caf\B.+?\B/utf,ucp + --cafe\x{300}_au\x{203f}lait! + 0: cafe\x{300}_au\x{203f}lait! + 1: cafe\x{300}_au\x{203f}lai + 2: cafe\x{300}_au\x{203f}la + 3: cafe\x{300}_au\x{203f}l + 4: cafe\x{300}_au\x{203f} + 5: cafe\x{300}_au + 6: cafe\x{300}_a + 7: cafe\x{300}_ + 8: cafe\x{300} + 9: cafe + +# End of testinput7 diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2 new file mode 100644 index 0000000..bcb9e17 --- /dev/null +++ b/testdata/testoutput8-16-2 @@ -0,0 +1,1076 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 174 +Memory allocation - code portion : 38 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 172 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 182 +Memory allocation - code portion : 46 +------------------------------------------------------------------ + 0 20 Bra + 2 ^ + 3 [0-9A-Za-z] + 20 20 Ket + 22 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 278 +Memory allocation - code portion : 142 +------------------------------------------------------------------ + 0 68 Bra + 2 ^ + 3 63 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 21 CBra 3 + 16 [ab]+? + 34 21 Ket + 36 21 CBra 4 + 39 [bc]+ + 57 21 Ket + 59 5 CBra 5 + 62 \w*+ + 64 5 Ket + 66 63 Ket + 68 68 Ket + 70 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1784 +Memory allocation - code portion : 1648 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1764 +Memory allocation - code portion : 1628 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 176 +Memory allocation - code portion : 40 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 SBra + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 242 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 218 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 196 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 186 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 214 +Memory allocation - code portion : 78 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 5 Bra + 2 \x{10000} + 5 5 Ket + 7 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 5 Bra + 2 \x{100000} + 5 5 Ket + 7 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 5 Bra + 2 \x{10ffff} + 5 5 Ket + 7 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 158 +Memory allocation - code portion : 22 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 158 +Memory allocation - code portion : 22 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 190 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 24 Bra + 2 [Z\x{100}] + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 27 Bra + 2 [a-c\p{L}\x{660}] + 27 27 Ket + 29 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 194 +Memory allocation - code portion : 58 +------------------------------------------------------------------ + 0 26 Bra + 2 [+\-\p{Nd}]++ + 26 26 Ket + 28 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 13 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 13 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 178 +Memory allocation - code portion : 42 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 35 Bra + 2 Brazero + 3 28 SCBra 1 + 6 12 CBra 2 + 9 7 CBra 3 + 12 a + 14 \2 + 16 7 Ket + 18 11 Alt + 20 5 CBra 4 + 23 a* + 25 5 Ket + 27 20 Recurse + 29 23 Ket + 31 28 KetRmax + 33 a?+ + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 16 Bra + 2 12 CBra 1 + 5 7 Recurse + 7 5 CBra 2 + 10 \1 + 12 5 Ket + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 24 Bra + 2 Any + 3 7 CBra 1 + 6 19 Recurse + 8 0 Recurse + 10 4 Alt + 12 \1 + 14 3 Alt + 16 $ + 17 14 Ket + 19 3 CBra 2 + 22 3 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 31 Bra + 2 Any + 3 14 CBra 1 + 6 26 Recurse + 8 0 Recurse + 10 3 CBra 2 + 13 3 Ket + 15 10 Recurse + 17 4 Alt + 19 \1 + 21 3 Alt + 23 $ + 24 21 Ket + 26 3 CBra 3 + 29 3 Ket + 31 31 Ket + 33 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode +Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand +Failed: error 120 at offset 131070: regular expression is too large + +# End of testinput8 diff --git a/testdata/testoutput8-16-3 b/testdata/testoutput8-16-3 new file mode 100644 index 0000000..4ec13ea --- /dev/null +++ b/testdata/testoutput8-16-3 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 6 CBra 1 + 7 /i b + 9 6 Ket + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 20 Bra + 3 8 CBra 1 + 7 AllAny* + 9 X + 11 6 Alt + 14 ^ + 15 B + 17 14 Ket + 20 20 Ket + 23 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 182 +Memory allocation - code portion : 46 +------------------------------------------------------------------ + 0 19 Bra + 3 7 Bra + 6 AllAny* + 8 X + 10 6 Alt + 13 ^ + 14 B + 16 13 Ket + 19 19 Ket + 22 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 186 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 21 Bra + 3 ^ + 4 [0-9A-Za-z] + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 3 Bra + 3 3 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x?+ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x++ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 8 Bra + 3 x + 5 x{0,2}+ + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 170 +Memory allocation - code portion : 34 +------------------------------------------------------------------ + 0 13 Bra + 3 Braposzero + 4 6 CBraPos 1 + 8 x + 10 6 KetRpos + 13 13 Ket + 16 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 302 +Memory allocation - code portion : 166 +------------------------------------------------------------------ + 0 79 Bra + 3 ^ + 4 72 CBra 1 + 8 6 CBra 2 + 12 a+ + 14 6 Ket + 17 22 CBra 3 + 21 [ab]+? + 39 22 Ket + 42 22 CBra 4 + 46 [bc]+ + 64 22 Ket + 67 6 CBra 5 + 71 \w*+ + 73 6 Ket + 76 72 Ket + 79 79 Ket + 82 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1788 +Memory allocation - code portion : 1652 +------------------------------------------------------------------ + 0 822 Bra + 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +821 \b +822 822 Ket +825 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1768 +Memory allocation - code portion : 1632 +------------------------------------------------------------------ + 0 812 Bra + 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +811 \b +812 812 Ket +815 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 178 +Memory allocation - code portion : 42 +------------------------------------------------------------------ + 0 17 Bra + 3 11 CBra 1 + 7 a + 9 3 Recurse + 12 b + 14 11 Ket + 17 17 Ket + 20 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 190 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 23 Bra + 3 17 CBra 1 + 7 a + 9 6 SBra + 12 3 Recurse + 15 6 KetRmax + 18 b + 20 17 Ket + 23 23 Ket + 26 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 68 +------------------------------------------------------------------ + 0 30 Bra + 3 a + 5 6 CBra 1 + 9 b + 11 5 Alt + 14 c + 16 11 Ket + 19 d + 21 6 CBra 2 + 25 e + 27 6 Ket + 30 30 Ket + 33 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 238 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 38 Bra + 3 23 Bra + 6 a + 8 15 CBra 1 + 12 c + 14 6 CBra 2 + 18 d + 20 6 Ket + 23 15 Ket + 26 23 Ket + 29 6 CBra 3 + 33 a + 35 6 Ket + 38 38 Ket + 41 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 206 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 28 Bra + 3 6 CBra 1 + 7 a + 9 6 Ket + 12 Any + 13 Any + 14 Any + 15 \1 + 17 bbb + 23 3 Recurse + 26 d + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 198 +Memory allocation - code portion : 62 +------------------------------------------------------------------ + 0 27 Bra + 3 abc + 9 Callout 255 10 1 + 15 de + 19 Callout 0 16 1 + 25 f + 27 27 Ket + 30 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 242 +Memory allocation - code portion : 106 +------------------------------------------------------------------ + 0 49 Bra + 3 Callout 255 0 1 + 9 a + 11 Callout 255 1 1 + 17 b + 19 Callout 255 2 1 + 25 c + 27 Callout 255 3 1 + 33 d + 35 Callout 255 4 1 + 41 e + 43 Callout 255 5 0 + 49 49 Ket + 52 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{1000} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10ffff} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{80} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 A\x{2262}\x{391}. + 11 11 Ket + 14 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{d55c}\x{ad6d}\x{c5b4} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{65e5}\x{672c}\x{8a9e} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 26 Bra + 3 [Z\x{100}] + 26 26 Ket + 29 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 202 +Memory allocation - code portion : 66 +------------------------------------------------------------------ + 0 29 Bra + 3 [a-c\p{L}\x{660}] + 29 29 Ket + 32 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 200 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 28 Bra + 3 [+\-\p{Nd}]++ + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 172 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 14 Bra + 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 172 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 14 Bra + 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\x{104}-\x{109}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 206 +Memory allocation - code portion : 70 +------------------------------------------------------------------ + 0 31 Bra + 3 25 CBra 1 + 7 Brazero + 8 17 SCBra 2 + 12 7 Cond + 15 1 Cond ref + 17 0 + 19 3 Alt + 22 10 Ket + 25 17 KetRmax + 28 25 Ket + 31 31 Ket + 34 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 192 +Memory allocation - code portion : 56 +------------------------------------------------------------------ + 0 24 Bra + 3 18 CBra 1 + 7 Brazero + 8 7 SCond + 11 1 Cond ref + 13 0 + 15 3 Alt + 18 10 KetRmax + 21 18 Ket + 24 24 Ket + 27 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 11 Bra + 3 [^\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Cc}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{Cc}\P{L}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 12 Bra + 3 [\p{L}]++ + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Xsp}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 70 Bra + 3 abc + 9 6 CBra 1 + 13 d + 15 5 Alt + 18 e + 20 11 Ket + 23 *THEN + 24 x + 26 13 CBra 2 + 30 123 + 36 *THEN + 37 4 + 39 28 Alt + 42 567 + 48 6 CBra 3 + 52 b + 54 5 Alt + 57 q + 59 11 Ket + 62 *THEN + 63 xx + 67 41 Ket + 70 70 Ket + 73 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 46 Bra + 3 Brazero + 4 37 SCBra 1 + 8 15 CBra 2 + 12 8 CBra 3 + 16 a + 18 \2 + 20 8 Ket + 23 15 Alt + 26 6 CBra 4 + 30 a* + 32 6 Ket + 35 26 Recurse + 38 30 Ket + 41 37 KetRmax + 44 a?+ + 46 46 Ket + 49 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 22 Bra + 3 16 CBra 1 + 7 10 Recurse + 10 6 CBra 2 + 14 \1 + 16 6 Ket + 19 16 Ket + 22 22 Ket + 25 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 18 Bra + 3 9 Recurse + 6 9 Recurse + 9 6 CBra 1 + 13 a + 15 6 Ket + 18 18 Ket + 21 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 33 Bra + 3 Any + 4 10 CBra 1 + 8 26 Recurse + 11 0 Recurse + 14 5 Alt + 17 \1 + 19 4 Alt + 22 $ + 23 19 Ket + 26 4 CBra 2 + 30 4 Ket + 33 33 Ket + 36 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 43 Bra + 3 Any + 4 20 CBra 1 + 8 36 Recurse + 11 0 Recurse + 14 4 CBra 2 + 18 4 Ket + 21 14 Recurse + 24 5 Alt + 27 \1 + 29 4 Alt + 32 $ + 33 29 Ket + 36 4 CBra 3 + 40 4 Ket + 43 43 Ket + 46 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 69 Bra + 3 6 Recurse + 6 4 CBra 1 + 10 4 Ket + 13 53 CBra 2 + 17 43 CBra 3 + 21 36 CBra 4 + 25 29 CBra 5 + 29 20 CBra 6 + 33 13 CBra 7 + 37 6 Once + 40 \1+ + 43 6 Ket + 46 13 Ket + 49 20 Ket + 52 \x{85} + 54 29 KetRmax + 57 36 Ket + 60 3 Alt + 63 46 Ket + 66 53 Ket + 69 69 Ket + 72 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 110 Bra + 3 97 Once + 6 8 Cond + 9 1 Cond ref + 11 103 Recurse + 14 8 Ket + 17 8 Cond + 20 1 Cond ref + 22 103 Recurse + 25 8 Ket + 28 8 Cond + 31 1 Cond ref + 33 103 Recurse + 36 8 Ket + 39 8 Cond + 42 1 Cond ref + 44 103 Recurse + 47 8 Ket + 50 8 Cond + 53 1 Cond ref + 55 103 Recurse + 58 8 Ket + 61 8 Cond + 64 1 Cond ref + 66 103 Recurse + 69 8 Ket + 72 8 Cond + 75 1 Cond ref + 77 103 Recurse + 80 8 Ket + 83 14 SBraPos + 86 8 SCond + 89 1 Cond ref + 91 103 Recurse + 94 8 Ket + 97 14 KetRpos +100 97 Ket +103 4 CBra 1 +107 4 Ket +110 110 Ket +113 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 58 Bra + 3 45 Once + 6 5 Cond + 9 1 Cond ref + 11 10 Alt + 14 a + 16 51 Recurse + 19 b + 21 15 Ket + 24 21 SBraPos + 27 5 SCond + 30 1 Cond ref + 32 10 Alt + 35 a + 37 51 Recurse + 40 b + 42 15 Ket + 45 21 KetRpos + 48 45 Ket + 51 4 CBra 1 + 55 4 Ket + 58 58 Ket + 61 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 194 Bra + 3 61 CBra 1 + 7 3 Recurse + 10 131 Recurse + 13 138 Recurse + 16 145 Recurse + 19 152 Recurse + 22 159 Recurse + 25 166 Recurse + 28 173 Recurse + 31 180 Recurse + 34 180 Recurse + 37 173 Recurse + 40 166 Recurse + 43 159 Recurse + 46 152 Recurse + 49 145 Recurse + 52 138 Recurse + 55 131 Recurse + 58 3 Recurse + 61 0 Recurse + 64 61 Ket + 67 61 SCBra 1 + 71 3 Recurse + 74 131 Recurse + 77 138 Recurse + 80 145 Recurse + 83 152 Recurse + 86 159 Recurse + 89 166 Recurse + 92 173 Recurse + 95 180 Recurse + 98 180 Recurse +101 173 Recurse +104 166 Recurse +107 159 Recurse +110 152 Recurse +113 145 Recurse +116 138 Recurse +119 131 Recurse +122 3 Recurse +125 0 Recurse +128 61 KetRmax +131 4 CBra 2 +135 4 Ket +138 4 CBra 3 +142 4 Ket +145 4 CBra 4 +149 4 Ket +152 4 CBra 5 +156 4 Ket +159 4 CBra 6 +163 4 Ket +166 4 CBra 7 +170 4 Ket +173 4 CBra 8 +177 4 Ket +180 4 CBra 9 +184 4 Ket +187 4 CBra 10 +191 4 Ket +194 194 Ket +197 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-16-4 b/testdata/testoutput8-16-4 new file mode 100644 index 0000000..4ec13ea --- /dev/null +++ b/testdata/testoutput8-16-4 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 6 CBra 1 + 7 /i b + 9 6 Ket + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 20 Bra + 3 8 CBra 1 + 7 AllAny* + 9 X + 11 6 Alt + 14 ^ + 15 B + 17 14 Ket + 20 20 Ket + 23 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 182 +Memory allocation - code portion : 46 +------------------------------------------------------------------ + 0 19 Bra + 3 7 Bra + 6 AllAny* + 8 X + 10 6 Alt + 13 ^ + 14 B + 16 13 Ket + 19 19 Ket + 22 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 186 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 21 Bra + 3 ^ + 4 [0-9A-Za-z] + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 3 Bra + 3 3 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x?+ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 x++ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 8 Bra + 3 x + 5 x{0,2}+ + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 170 +Memory allocation - code portion : 34 +------------------------------------------------------------------ + 0 13 Bra + 3 Braposzero + 4 6 CBraPos 1 + 8 x + 10 6 KetRpos + 13 13 Ket + 16 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 302 +Memory allocation - code portion : 166 +------------------------------------------------------------------ + 0 79 Bra + 3 ^ + 4 72 CBra 1 + 8 6 CBra 2 + 12 a+ + 14 6 Ket + 17 22 CBra 3 + 21 [ab]+? + 39 22 Ket + 42 22 CBra 4 + 46 [bc]+ + 64 22 Ket + 67 6 CBra 5 + 71 \w*+ + 73 6 Ket + 76 72 Ket + 79 79 Ket + 82 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1788 +Memory allocation - code portion : 1652 +------------------------------------------------------------------ + 0 822 Bra + 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +821 \b +822 822 Ket +825 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 1768 +Memory allocation - code portion : 1632 +------------------------------------------------------------------ + 0 812 Bra + 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +811 \b +812 812 Ket +815 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 178 +Memory allocation - code portion : 42 +------------------------------------------------------------------ + 0 17 Bra + 3 11 CBra 1 + 7 a + 9 3 Recurse + 12 b + 14 11 Ket + 17 17 Ket + 20 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 190 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 23 Bra + 3 17 CBra 1 + 7 a + 9 6 SBra + 12 3 Recurse + 15 6 KetRmax + 18 b + 20 17 Ket + 23 23 Ket + 26 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 68 +------------------------------------------------------------------ + 0 30 Bra + 3 a + 5 6 CBra 1 + 9 b + 11 5 Alt + 14 c + 16 11 Ket + 19 d + 21 6 CBra 2 + 25 e + 27 6 Ket + 30 30 Ket + 33 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 238 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 38 Bra + 3 23 Bra + 6 a + 8 15 CBra 1 + 12 c + 14 6 CBra 2 + 18 d + 20 6 Ket + 23 15 Ket + 26 23 Ket + 29 6 CBra 3 + 33 a + 35 6 Ket + 38 38 Ket + 41 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 206 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 28 Bra + 3 6 CBra 1 + 7 a + 9 6 Ket + 12 Any + 13 Any + 14 Any + 15 \1 + 17 bbb + 23 3 Recurse + 26 d + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 198 +Memory allocation - code portion : 62 +------------------------------------------------------------------ + 0 27 Bra + 3 abc + 9 Callout 255 10 1 + 15 de + 19 Callout 0 16 1 + 25 f + 27 27 Ket + 30 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 242 +Memory allocation - code portion : 106 +------------------------------------------------------------------ + 0 49 Bra + 3 Callout 255 0 1 + 9 a + 11 Callout 255 1 1 + 17 b + 19 Callout 255 2 1 + 25 c + 27 Callout 255 3 1 + 33 d + 35 Callout 255 4 1 + 41 e + 43 Callout 255 5 0 + 49 49 Ket + 52 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{1000} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100000} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{10ffff} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{80} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{ff} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 A\x{2262}\x{391}. + 11 11 Ket + 14 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{d55c}\x{ad6d}\x{c5b4} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 9 Bra + 3 \x{65e5}\x{672c}\x{8a9e} + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{100} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 26 Bra + 3 [Z\x{100}] + 26 26 Ket + 29 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 12 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 202 +Memory allocation - code portion : 66 +------------------------------------------------------------------ + 0 29 Bra + 3 [a-c\p{L}\x{660}] + 29 29 Ket + 32 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 200 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 28 Bra + 3 [+\-\p{Nd}]++ + 28 28 Ket + 31 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 172 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 14 Bra + 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 172 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 14 Bra + 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 11 Bra + 3 [\x{104}-\x{109}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 206 +Memory allocation - code portion : 70 +------------------------------------------------------------------ + 0 31 Bra + 3 25 CBra 1 + 7 Brazero + 8 17 SCBra 2 + 12 7 Cond + 15 1 Cond ref + 17 0 + 19 3 Alt + 22 10 Ket + 25 17 KetRmax + 28 25 Ket + 31 31 Ket + 34 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 192 +Memory allocation - code portion : 56 +------------------------------------------------------------------ + 0 24 Bra + 3 18 CBra 1 + 7 Brazero + 8 7 SCond + 11 1 Cond ref + 13 0 + 15 3 Alt + 18 10 KetRmax + 21 18 Ket + 24 24 Ket + 27 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 11 Bra + 3 [^\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Cc}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{Cc}\P{L}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 12 Bra + 3 [\p{L}]++ + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Xsp}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 70 Bra + 3 abc + 9 6 CBra 1 + 13 d + 15 5 Alt + 18 e + 20 11 Ket + 23 *THEN + 24 x + 26 13 CBra 2 + 30 123 + 36 *THEN + 37 4 + 39 28 Alt + 42 567 + 48 6 CBra 3 + 52 b + 54 5 Alt + 57 q + 59 11 Ket + 62 *THEN + 63 xx + 67 41 Ket + 70 70 Ket + 73 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 46 Bra + 3 Brazero + 4 37 SCBra 1 + 8 15 CBra 2 + 12 8 CBra 3 + 16 a + 18 \2 + 20 8 Ket + 23 15 Alt + 26 6 CBra 4 + 30 a* + 32 6 Ket + 35 26 Recurse + 38 30 Ket + 41 37 KetRmax + 44 a?+ + 46 46 Ket + 49 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 22 Bra + 3 16 CBra 1 + 7 10 Recurse + 10 6 CBra 2 + 14 \1 + 16 6 Ket + 19 16 Ket + 22 22 Ket + 25 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 18 Bra + 3 9 Recurse + 6 9 Recurse + 9 6 CBra 1 + 13 a + 15 6 Ket + 18 18 Ket + 21 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 33 Bra + 3 Any + 4 10 CBra 1 + 8 26 Recurse + 11 0 Recurse + 14 5 Alt + 17 \1 + 19 4 Alt + 22 $ + 23 19 Ket + 26 4 CBra 2 + 30 4 Ket + 33 33 Ket + 36 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 43 Bra + 3 Any + 4 20 CBra 1 + 8 36 Recurse + 11 0 Recurse + 14 4 CBra 2 + 18 4 Ket + 21 14 Recurse + 24 5 Alt + 27 \1 + 29 4 Alt + 32 $ + 33 29 Ket + 36 4 CBra 3 + 40 4 Ket + 43 43 Ket + 46 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 69 Bra + 3 6 Recurse + 6 4 CBra 1 + 10 4 Ket + 13 53 CBra 2 + 17 43 CBra 3 + 21 36 CBra 4 + 25 29 CBra 5 + 29 20 CBra 6 + 33 13 CBra 7 + 37 6 Once + 40 \1+ + 43 6 Ket + 46 13 Ket + 49 20 Ket + 52 \x{85} + 54 29 KetRmax + 57 36 Ket + 60 3 Alt + 63 46 Ket + 66 53 Ket + 69 69 Ket + 72 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 110 Bra + 3 97 Once + 6 8 Cond + 9 1 Cond ref + 11 103 Recurse + 14 8 Ket + 17 8 Cond + 20 1 Cond ref + 22 103 Recurse + 25 8 Ket + 28 8 Cond + 31 1 Cond ref + 33 103 Recurse + 36 8 Ket + 39 8 Cond + 42 1 Cond ref + 44 103 Recurse + 47 8 Ket + 50 8 Cond + 53 1 Cond ref + 55 103 Recurse + 58 8 Ket + 61 8 Cond + 64 1 Cond ref + 66 103 Recurse + 69 8 Ket + 72 8 Cond + 75 1 Cond ref + 77 103 Recurse + 80 8 Ket + 83 14 SBraPos + 86 8 SCond + 89 1 Cond ref + 91 103 Recurse + 94 8 Ket + 97 14 KetRpos +100 97 Ket +103 4 CBra 1 +107 4 Ket +110 110 Ket +113 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 58 Bra + 3 45 Once + 6 5 Cond + 9 1 Cond ref + 11 10 Alt + 14 a + 16 51 Recurse + 19 b + 21 15 Ket + 24 21 SBraPos + 27 5 SCond + 30 1 Cond ref + 32 10 Alt + 35 a + 37 51 Recurse + 40 b + 42 15 Ket + 45 21 KetRpos + 48 45 Ket + 51 4 CBra 1 + 55 4 Ket + 58 58 Ket + 61 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 194 Bra + 3 61 CBra 1 + 7 3 Recurse + 10 131 Recurse + 13 138 Recurse + 16 145 Recurse + 19 152 Recurse + 22 159 Recurse + 25 166 Recurse + 28 173 Recurse + 31 180 Recurse + 34 180 Recurse + 37 173 Recurse + 40 166 Recurse + 43 159 Recurse + 46 152 Recurse + 49 145 Recurse + 52 138 Recurse + 55 131 Recurse + 58 3 Recurse + 61 0 Recurse + 64 61 Ket + 67 61 SCBra 1 + 71 3 Recurse + 74 131 Recurse + 77 138 Recurse + 80 145 Recurse + 83 152 Recurse + 86 159 Recurse + 89 166 Recurse + 92 173 Recurse + 95 180 Recurse + 98 180 Recurse +101 173 Recurse +104 166 Recurse +107 159 Recurse +110 152 Recurse +113 145 Recurse +116 138 Recurse +119 131 Recurse +122 3 Recurse +125 0 Recurse +128 61 KetRmax +131 4 CBra 2 +135 4 Ket +138 4 CBra 3 +142 4 Ket +145 4 CBra 4 +149 4 Ket +152 4 CBra 5 +156 4 Ket +159 4 CBra 6 +163 4 Ket +166 4 CBra 7 +170 4 Ket +173 4 CBra 8 +177 4 Ket +180 4 CBra 9 +184 4 Ket +187 4 CBra 10 +191 4 Ket +194 194 Ket +197 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-32-2 b/testdata/testoutput8-32-2 new file mode 100644 index 0000000..d76f3aa --- /dev/null +++ b/testdata/testoutput8-32-2 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 208 +Memory allocation - code portion : 72 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 ^ + 3 [0-9A-Za-z] + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 176 +Memory allocation - code portion : 40 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 356 +Memory allocation - code portion : 220 +------------------------------------------------------------------ + 0 52 Bra + 2 ^ + 3 47 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 13 CBra 3 + 16 [ab]+? + 26 13 Ket + 28 13 CBra 4 + 31 [bc]+ + 41 13 Ket + 43 5 CBra 5 + 46 \w*+ + 48 5 Ket + 50 47 Ket + 52 52 Ket + 54 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3432 +Memory allocation - code portion : 3296 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3392 +Memory allocation - code portion : 3256 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 200 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 216 +Memory allocation - code portion : 80 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 SBra + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 348 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 300 +Memory allocation - code portion : 128 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 236 +Memory allocation - code portion : 100 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 292 +Memory allocation - code portion : 156 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10ffff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 [Z\x{100}] + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 224 +Memory allocation - code portion : 88 +------------------------------------------------------------------ + 0 19 Bra + 2 [a-c\p{L}\x{660}] + 19 19 Ket + 21 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 [+\-\p{Nd}]++ + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 240 +Memory allocation - code portion : 104 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 35 Bra + 2 Brazero + 3 28 SCBra 1 + 6 12 CBra 2 + 9 7 CBra 3 + 12 a + 14 \2 + 16 7 Ket + 18 11 Alt + 20 5 CBra 4 + 23 a* + 25 5 Ket + 27 20 Recurse + 29 23 Ket + 31 28 KetRmax + 33 a?+ + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 16 Bra + 2 12 CBra 1 + 5 7 Recurse + 7 5 CBra 2 + 10 \1 + 12 5 Ket + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 24 Bra + 2 Any + 3 7 CBra 1 + 6 19 Recurse + 8 0 Recurse + 10 4 Alt + 12 \1 + 14 3 Alt + 16 $ + 17 14 Ket + 19 3 CBra 2 + 22 3 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 31 Bra + 2 Any + 3 14 CBra 1 + 6 26 Recurse + 8 0 Recurse + 10 3 CBra 2 + 13 3 Ket + 15 10 Recurse + 17 4 Alt + 19 \1 + 21 3 Alt + 23 $ + 24 21 Ket + 26 3 CBra 3 + 29 3 Ket + 31 31 Ket + 33 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-32-3 b/testdata/testoutput8-32-3 new file mode 100644 index 0000000..d76f3aa --- /dev/null +++ b/testdata/testoutput8-32-3 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 208 +Memory allocation - code portion : 72 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 ^ + 3 [0-9A-Za-z] + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 176 +Memory allocation - code portion : 40 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 356 +Memory allocation - code portion : 220 +------------------------------------------------------------------ + 0 52 Bra + 2 ^ + 3 47 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 13 CBra 3 + 16 [ab]+? + 26 13 Ket + 28 13 CBra 4 + 31 [bc]+ + 41 13 Ket + 43 5 CBra 5 + 46 \w*+ + 48 5 Ket + 50 47 Ket + 52 52 Ket + 54 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3432 +Memory allocation - code portion : 3296 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3392 +Memory allocation - code portion : 3256 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 200 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 216 +Memory allocation - code portion : 80 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 SBra + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 348 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 300 +Memory allocation - code portion : 128 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 236 +Memory allocation - code portion : 100 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 292 +Memory allocation - code portion : 156 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10ffff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 [Z\x{100}] + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 224 +Memory allocation - code portion : 88 +------------------------------------------------------------------ + 0 19 Bra + 2 [a-c\p{L}\x{660}] + 19 19 Ket + 21 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 [+\-\p{Nd}]++ + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 240 +Memory allocation - code portion : 104 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 35 Bra + 2 Brazero + 3 28 SCBra 1 + 6 12 CBra 2 + 9 7 CBra 3 + 12 a + 14 \2 + 16 7 Ket + 18 11 Alt + 20 5 CBra 4 + 23 a* + 25 5 Ket + 27 20 Recurse + 29 23 Ket + 31 28 KetRmax + 33 a?+ + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 16 Bra + 2 12 CBra 1 + 5 7 Recurse + 7 5 CBra 2 + 10 \1 + 12 5 Ket + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 24 Bra + 2 Any + 3 7 CBra 1 + 6 19 Recurse + 8 0 Recurse + 10 4 Alt + 12 \1 + 14 3 Alt + 16 $ + 17 14 Ket + 19 3 CBra 2 + 22 3 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 31 Bra + 2 Any + 3 14 CBra 1 + 6 26 Recurse + 8 0 Recurse + 10 3 CBra 2 + 13 3 Ket + 15 10 Recurse + 17 4 Alt + 19 \1 + 21 3 Alt + 23 $ + 24 21 Ket + 26 3 CBra 3 + 29 3 Ket + 31 31 Ket + 33 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-32-4 b/testdata/testoutput8-32-4 new file mode 100644 index 0000000..d76f3aa --- /dev/null +++ b/testdata/testoutput8-32-4 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 5 CBra 1 + 5 /i b + 7 5 Ket + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 7 CBra 1 + 5 AllAny* + 7 X + 9 5 Alt + 11 ^ + 12 B + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 208 +Memory allocation - code portion : 72 +------------------------------------------------------------------ + 0 15 Bra + 2 6 Bra + 4 AllAny* + 6 X + 8 5 Alt + 10 ^ + 11 B + 13 11 Ket + 15 15 Ket + 17 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 ^ + 3 [0-9A-Za-z] + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 2 Bra + 2 2 Ket + 4 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x?+ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 x++ + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 176 +Memory allocation - code portion : 40 +------------------------------------------------------------------ + 0 7 Bra + 2 x + 4 x{0,2}+ + 7 7 Ket + 9 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 Braposzero + 3 5 CBraPos 1 + 6 x + 8 5 KetRpos + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 356 +Memory allocation - code portion : 220 +------------------------------------------------------------------ + 0 52 Bra + 2 ^ + 3 47 CBra 1 + 6 5 CBra 2 + 9 a+ + 11 5 Ket + 13 13 CBra 3 + 16 [ab]+? + 26 13 Ket + 28 13 CBra 4 + 31 [bc]+ + 41 13 Ket + 43 5 CBra 5 + 46 \w*+ + 48 5 Ket + 50 47 Ket + 52 52 Ket + 54 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3432 +Memory allocation - code portion : 3296 +------------------------------------------------------------------ + 0 821 Bra + 2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +820 \b +821 821 Ket +823 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 3392 +Memory allocation - code portion : 3256 +------------------------------------------------------------------ + 0 811 Bra + 2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +810 \b +811 811 Ket +813 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 200 +Memory allocation - code portion : 64 +------------------------------------------------------------------ + 0 13 Bra + 2 9 CBra 1 + 5 a + 7 2 Recurse + 9 b + 11 9 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 216 +Memory allocation - code portion : 80 +------------------------------------------------------------------ + 0 17 Bra + 2 13 CBra 1 + 5 a + 7 4 SBra + 9 2 Recurse + 11 4 KetRmax + 13 b + 15 13 Ket + 17 17 Ket + 19 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 348 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 a + 4 5 CBra 1 + 7 b + 9 4 Alt + 11 c + 13 9 Ket + 15 d + 17 5 CBra 2 + 20 e + 22 5 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 300 +Memory allocation - code portion : 128 +------------------------------------------------------------------ + 0 29 Bra + 2 18 Bra + 4 a + 6 12 CBra 1 + 9 c + 11 5 CBra 2 + 14 d + 16 5 Ket + 18 12 Ket + 20 18 Ket + 22 5 CBra 3 + 25 a + 27 5 Ket + 29 29 Ket + 31 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 108 +------------------------------------------------------------------ + 0 24 Bra + 2 5 CBra 1 + 5 a + 7 5 Ket + 9 Any + 10 Any + 11 Any + 12 \1 + 14 bbb + 20 2 Recurse + 22 d + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 236 +Memory allocation - code portion : 100 +------------------------------------------------------------------ + 0 22 Bra + 2 abc + 8 Callout 255 10 1 + 12 de + 16 Callout 0 16 1 + 20 f + 22 22 Ket + 24 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 292 +Memory allocation - code portion : 156 +------------------------------------------------------------------ + 0 36 Bra + 2 Callout 255 0 1 + 6 a + 8 Callout 255 1 1 + 12 b + 14 Callout 255 2 1 + 18 c + 20 Callout 255 3 1 + 24 d + 26 Callout 255 4 1 + 30 e + 32 Callout 255 5 0 + 36 36 Ket + 38 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{1000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100000} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{10ffff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{80} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{ff} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 A\x{2262}\x{391}. + 10 10 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{d55c}\x{ad6d}\x{c5b4} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{d55c} +Last code unit = \x{c5b4} +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 8 Bra + 2 \x{65e5}\x{672c}\x{8a9e} + 8 8 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \x{65e5} +Last code unit = \x{8a9e} +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{100} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 212 +Memory allocation - code portion : 76 +------------------------------------------------------------------ + 0 16 Bra + 2 [Z\x{100}] + 16 16 Ket + 18 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 188 +Memory allocation - code portion : 52 +------------------------------------------------------------------ + 0 10 Bra + 2 ^ + 3 [\x{100}-\x{150}] + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 13: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\P{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{L}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 224 +Memory allocation - code portion : 88 +------------------------------------------------------------------ + 0 19 Bra + 2 [a-c\p{L}\x{660}] + 19 19 Ket + 21 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 [+\-\p{Nd}]++ + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 196 +Memory allocation - code portion : 60 +------------------------------------------------------------------ + 0 12 Bra + 2 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 12 12 Ket + 14 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 9 Bra + 2 [\x{104}-\x{109}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 240 +Memory allocation - code portion : 104 +------------------------------------------------------------------ + 0 23 Bra + 2 19 CBra 1 + 5 Brazero + 6 13 SCBra 2 + 9 6 Cond + 11 1 Cond ref + 13 0 + 15 2 Alt + 17 8 Ket + 19 13 KetRmax + 21 19 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 220 +Memory allocation - code portion : 84 +------------------------------------------------------------------ + 0 18 Bra + 2 14 CBra 1 + 5 Brazero + 6 6 SCond + 8 1 Cond ref + 10 0 + 12 2 Alt + 14 8 KetRmax + 16 14 Ket + 18 18 Ket + 20 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 a + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 \x{aa} + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^a] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 4 Bra + 2 [^\x{aa}] + 4 4 Ket + 6 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 9 Bra + 2 [^\p{Nd}] + 9 9 Ket + 11 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Cc}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{Cc}\P{L}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 10 Bra + 2 [\p{L}]++ + 10 10 Ket + 12 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 2 [\P{L}\P{Xsp}]++ + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 60 Bra + 2 abc + 8 5 CBra 1 + 11 d + 13 4 Alt + 15 e + 17 9 Ket + 19 *THEN + 20 x + 22 12 CBra 2 + 25 123 + 31 *THEN + 32 4 + 34 24 Alt + 36 567 + 42 5 CBra 3 + 45 b + 47 4 Alt + 49 q + 51 9 Ket + 53 *THEN + 54 xx + 58 36 Ket + 60 60 Ket + 62 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 35 Bra + 2 Brazero + 3 28 SCBra 1 + 6 12 CBra 2 + 9 7 CBra 3 + 12 a + 14 \2 + 16 7 Ket + 18 11 Alt + 20 5 CBra 4 + 23 a* + 25 5 Ket + 27 20 Recurse + 29 23 Ket + 31 28 KetRmax + 33 a?+ + 35 35 Ket + 37 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 16 Bra + 2 12 CBra 1 + 5 7 Recurse + 7 5 CBra 2 + 10 \1 + 12 5 Ket + 14 12 Ket + 16 16 Ket + 18 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 13 Bra + 2 6 Recurse + 4 6 Recurse + 6 5 CBra 1 + 9 a + 11 5 Ket + 13 13 Ket + 15 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 24 Bra + 2 Any + 3 7 CBra 1 + 6 19 Recurse + 8 0 Recurse + 10 4 Alt + 12 \1 + 14 3 Alt + 16 $ + 17 14 Ket + 19 3 CBra 2 + 22 3 Ket + 24 24 Ket + 26 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 31 Bra + 2 Any + 3 14 CBra 1 + 6 26 Recurse + 8 0 Recurse + 10 3 CBra 2 + 13 3 Ket + 15 10 Recurse + 17 4 Alt + 19 \1 + 21 3 Alt + 23 $ + 24 21 Ket + 26 3 CBra 3 + 29 3 Ket + 31 31 Ket + 33 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 50 Bra + 2 4 Recurse + 4 3 CBra 1 + 7 3 Ket + 9 39 CBra 2 + 12 32 CBra 3 + 15 27 CBra 4 + 18 22 CBra 5 + 21 15 CBra 6 + 24 10 CBra 7 + 27 5 Once + 29 \1+ + 32 5 Ket + 34 10 Ket + 36 15 Ket + 38 \x{85} + 40 22 KetRmax + 42 27 Ket + 44 2 Alt + 46 34 Ket + 48 39 Ket + 50 50 Ket + 52 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 79 Bra + 2 70 Once + 4 6 Cond + 6 1 Cond ref + 8 74 Recurse + 10 6 Ket + 12 6 Cond + 14 1 Cond ref + 16 74 Recurse + 18 6 Ket + 20 6 Cond + 22 1 Cond ref + 24 74 Recurse + 26 6 Ket + 28 6 Cond + 30 1 Cond ref + 32 74 Recurse + 34 6 Ket + 36 6 Cond + 38 1 Cond ref + 40 74 Recurse + 42 6 Ket + 44 6 Cond + 46 1 Cond ref + 48 74 Recurse + 50 6 Ket + 52 6 Cond + 54 1 Cond ref + 56 74 Recurse + 58 6 Ket + 60 10 SBraPos + 62 6 SCond + 64 1 Cond ref + 66 74 Recurse + 68 6 Ket + 70 10 KetRpos + 72 70 Ket + 74 3 CBra 1 + 77 3 Ket + 79 79 Ket + 81 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 43 Bra + 2 34 Once + 4 4 Cond + 6 1 Cond ref + 8 8 Alt + 10 a + 12 38 Recurse + 14 b + 16 12 Ket + 18 16 SBraPos + 20 4 SCond + 22 1 Cond ref + 24 8 Alt + 26 a + 28 38 Recurse + 30 b + 32 12 Ket + 34 16 KetRpos + 36 34 Ket + 38 3 CBra 1 + 41 3 Ket + 43 43 Ket + 45 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 133 Bra + 2 41 CBra 1 + 5 2 Recurse + 7 88 Recurse + 9 93 Recurse + 11 98 Recurse + 13 103 Recurse + 15 108 Recurse + 17 113 Recurse + 19 118 Recurse + 21 123 Recurse + 23 123 Recurse + 25 118 Recurse + 27 113 Recurse + 29 108 Recurse + 31 103 Recurse + 33 98 Recurse + 35 93 Recurse + 37 88 Recurse + 39 2 Recurse + 41 0 Recurse + 43 41 Ket + 45 41 SCBra 1 + 48 2 Recurse + 50 88 Recurse + 52 93 Recurse + 54 98 Recurse + 56 103 Recurse + 58 108 Recurse + 60 113 Recurse + 62 118 Recurse + 64 123 Recurse + 66 123 Recurse + 68 118 Recurse + 70 113 Recurse + 72 108 Recurse + 74 103 Recurse + 76 98 Recurse + 78 93 Recurse + 80 88 Recurse + 82 2 Recurse + 84 0 Recurse + 86 41 KetRmax + 88 3 CBra 2 + 91 3 Ket + 93 3 CBra 3 + 96 3 Ket + 98 3 CBra 4 +101 3 Ket +103 3 CBra 5 +106 3 Ket +108 3 CBra 6 +111 3 Ket +113 3 CBra 7 +116 3 Ket +118 3 CBra 8 +121 3 Ket +123 3 CBra 9 +126 3 Ket +128 3 CBra 10 +131 3 Ket +133 133 Ket +135 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2 new file mode 100644 index 0000000..f3811d9 --- /dev/null +++ b/testdata/testoutput8-8-2 @@ -0,0 +1,1076 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 153 +Memory allocation - code portion : 17 +------------------------------------------------------------------ + 0 13 Bra + 3 7 CBra 1 + 8 /i b + 10 7 Ket + 13 13 Ket + 16 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 161 +Memory allocation - code portion : 25 +------------------------------------------------------------------ + 0 21 Bra + 3 9 CBra 1 + 8 AllAny* + 10 X + 12 6 Alt + 15 ^ + 16 B + 18 15 Ket + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 159 +Memory allocation - code portion : 23 +------------------------------------------------------------------ + 0 19 Bra + 3 7 Bra + 6 AllAny* + 8 X + 10 6 Alt + 13 ^ + 14 B + 16 13 Ket + 19 19 Ket + 22 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 177 +Memory allocation - code portion : 41 +------------------------------------------------------------------ + 0 37 Bra + 3 ^ + 4 [0-9A-Za-z] + 37 37 Ket + 40 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 143 +Memory allocation - code portion : 7 +------------------------------------------------------------------ + 0 3 Bra + 3 3 Ket + 6 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 x?+ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 x++ + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 9 Bra + 3 x + 5 x{0,2}+ + 9 9 Ket + 12 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 14 Bra + 3 Braposzero + 4 7 CBraPos 1 + 9 x + 11 7 KetRpos + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 256 +Memory allocation - code portion : 120 +------------------------------------------------------------------ + 0 116 Bra + 3 ^ + 4 109 CBra 1 + 9 7 CBra 2 + 14 a+ + 16 7 Ket + 19 39 CBra 3 + 24 [ab]+? + 58 39 Ket + 61 39 CBra 4 + 66 [bc]+ +100 39 Ket +103 7 CBra 5 +108 \w*+ +110 7 Ket +113 109 Ket +116 116 Ket +119 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 962 +Memory allocation - code portion : 826 +------------------------------------------------------------------ + 0 822 Bra + 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +821 \b +822 822 Ket +825 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 952 +Memory allocation - code portion : 816 +------------------------------------------------------------------ + 0 812 Bra + 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +811 \b +812 812 Ket +815 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 158 +Memory allocation - code portion : 22 +------------------------------------------------------------------ + 0 18 Bra + 3 12 CBra 1 + 8 a + 10 3 Recurse + 13 b + 15 12 Ket + 18 18 Ket + 21 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 24 Bra + 3 18 CBra 1 + 8 a + 10 6 SBra + 13 3 Recurse + 16 6 KetRmax + 19 b + 21 18 Ket + 24 24 Ket + 27 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 200 +Memory allocation - code portion : 36 +------------------------------------------------------------------ + 0 32 Bra + 3 a + 5 7 CBra 1 + 10 b + 12 5 Alt + 15 c + 17 12 Ket + 20 d + 22 7 CBra 2 + 27 e + 29 7 Ket + 32 32 Ket + 35 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 193 +Memory allocation - code portion : 45 +------------------------------------------------------------------ + 0 41 Bra + 3 25 Bra + 6 a + 8 17 CBra 1 + 13 c + 15 7 CBra 2 + 20 d + 22 7 Ket + 25 17 Ket + 28 25 Ket + 31 7 CBra 3 + 36 a + 38 7 Ket + 41 41 Ket + 44 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 174 +Memory allocation - code portion : 34 +------------------------------------------------------------------ + 0 30 Bra + 3 7 CBra 1 + 8 a + 10 7 Ket + 13 Any + 14 Any + 15 Any + 16 \1 + 19 bbb + 25 3 Recurse + 28 d + 30 30 Ket + 33 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 167 +Memory allocation - code portion : 31 +------------------------------------------------------------------ + 0 27 Bra + 3 abc + 9 Callout 255 10 1 + 15 de + 19 Callout 0 16 1 + 25 f + 27 27 Ket + 30 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 189 +Memory allocation - code portion : 53 +------------------------------------------------------------------ + 0 49 Bra + 3 Callout 255 0 1 + 9 a + 11 Callout 255 1 1 + 17 b + 19 Callout 255 2 1 + 25 c + 27 Callout 255 3 1 + 33 d + 35 Callout 255 4 1 + 41 e + 43 Callout 255 5 0 + 49 49 Ket + 52 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 7 Bra + 3 \x{1000} + 7 7 Ket + 10 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 8 Bra + 3 \x{10000} + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 8 Bra + 3 \x{100000} + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 8 Bra + 3 \x{10ffff} + 8 8 Ket + 11 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{ff} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{80} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{ff} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 14 Bra + 3 A\x{2262}\x{391}. + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 155 +Memory allocation - code portion : 19 +------------------------------------------------------------------ + 0 15 Bra + 3 \x{d55c}\x{ad6d}\x{c5b4} + 15 15 Ket + 18 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xed +Last code unit = \xb4 +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 155 +Memory allocation - code portion : 19 +------------------------------------------------------------------ + 0 15 Bra + 3 \x{65e5}\x{672c}\x{8a9e} + 15 15 Ket + 18 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xe6 +Last code unit = \x9e +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{100} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 183 +Memory allocation - code portion : 47 +------------------------------------------------------------------ + 0 43 Bra + 3 [Z\x{100}] + 43 43 Ket + 46 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 14 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 14 Bra + 3 ^ + 4 [\x{100}-\x{150}] + 14 14 Ket + 17 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 15: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 11 Bra + 3 [\P{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{L}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 186 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 46 Bra + 3 [a-c\p{L}\x{660}] + 46 46 Ket + 49 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 11 Bra + 3 [\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 184 +Memory allocation - code portion : 48 +------------------------------------------------------------------ + 0 44 Bra + 3 [+\-\p{Nd}]++ + 44 44 Ket + 47 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 161 +Memory allocation - code portion : 25 +------------------------------------------------------------------ + 0 21 Bra + 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 161 +Memory allocation - code portion : 25 +------------------------------------------------------------------ + 0 21 Bra + 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 21 21 Ket + 24 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 153 +Memory allocation - code portion : 17 +------------------------------------------------------------------ + 0 13 Bra + 3 [\x{104}-\x{109}] + 13 13 Ket + 16 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 174 +Memory allocation - code portion : 38 +------------------------------------------------------------------ + 0 34 Bra + 3 28 CBra 1 + 8 Brazero + 9 19 SCBra 2 + 14 8 Cond + 17 1 Cond ref + 20 0 + 22 3 Alt + 25 11 Ket + 28 19 KetRmax + 31 28 Ket + 34 34 Ket + 37 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 26 Bra + 3 20 CBra 1 + 8 Brazero + 9 8 SCond + 12 1 Cond ref + 15 0 + 17 3 Alt + 20 11 KetRmax + 23 20 Ket + 26 26 Ket + 29 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 \x{aa} + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{aa} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\x{aa}] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 146 +Memory allocation - code portion : 10 +------------------------------------------------------------------ + 0 6 Bra + 3 [^\x{aa}] + 6 6 Ket + 9 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 11 Bra + 3 [^\p{Nd}] + 11 11 Ket + 14 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Cc}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{Cc}\P{L}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 12 Bra + 3 [\p{L}]++ + 12 12 Ket + 15 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 3 [\P{L}\P{Xsp}]++ + 15 15 Ket + 18 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 73 Bra + 3 abc + 9 7 CBra 1 + 14 d + 16 5 Alt + 19 e + 21 12 Ket + 24 *THEN + 25 x + 27 14 CBra 2 + 32 123 + 38 *THEN + 39 4 + 41 29 Alt + 44 567 + 50 7 CBra 3 + 55 b + 57 5 Alt + 60 q + 62 12 Ket + 65 *THEN + 66 xx + 70 43 Ket + 73 73 Ket + 76 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 51 Bra + 3 Brazero + 4 42 SCBra 1 + 9 18 CBra 2 + 14 10 CBra 3 + 19 a + 21 \2 + 24 10 Ket + 27 16 Alt + 30 7 CBra 4 + 35 a* + 37 7 Ket + 40 30 Recurse + 43 34 Ket + 46 42 KetRmax + 49 a?+ + 51 51 Ket + 54 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 25 Bra + 3 19 CBra 1 + 8 11 Recurse + 11 8 CBra 2 + 16 \1 + 19 8 Ket + 22 19 Ket + 25 25 Ket + 28 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 19 Bra + 3 9 Recurse + 6 9 Recurse + 9 7 CBra 1 + 14 a + 16 7 Ket + 19 19 Ket + 22 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 36 Bra + 3 Any + 4 11 CBra 1 + 9 28 Recurse + 12 0 Recurse + 15 6 Alt + 18 \1 + 21 4 Alt + 24 $ + 25 21 Ket + 28 5 CBra 2 + 33 5 Ket + 36 36 Ket + 39 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 47 Bra + 3 Any + 4 22 CBra 1 + 9 39 Recurse + 12 0 Recurse + 15 5 CBra 2 + 20 5 Ket + 23 15 Recurse + 26 6 Alt + 29 \1 + 32 4 Alt + 35 $ + 36 32 Ket + 39 5 CBra 3 + 44 5 Ket + 47 47 Ket + 50 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 77 Bra + 3 6 Recurse + 6 5 CBra 1 + 11 5 Ket + 14 60 CBra 2 + 19 49 CBra 3 + 24 41 CBra 4 + 29 33 CBra 5 + 34 23 CBra 6 + 39 15 CBra 7 + 44 7 Once + 47 \1+ + 51 7 Ket + 54 15 Ket + 57 23 Ket + 60 \x{85} + 62 33 KetRmax + 65 41 Ket + 68 3 Alt + 71 52 Ket + 74 60 Ket + 77 77 Ket + 80 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode +Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 119 Bra + 3 105 Once + 6 9 Cond + 9 1 Cond ref + 12 111 Recurse + 15 9 Ket + 18 9 Cond + 21 1 Cond ref + 24 111 Recurse + 27 9 Ket + 30 9 Cond + 33 1 Cond ref + 36 111 Recurse + 39 9 Ket + 42 9 Cond + 45 1 Cond ref + 48 111 Recurse + 51 9 Ket + 54 9 Cond + 57 1 Cond ref + 60 111 Recurse + 63 9 Ket + 66 9 Cond + 69 1 Cond ref + 72 111 Recurse + 75 9 Ket + 78 9 Cond + 81 1 Cond ref + 84 111 Recurse + 87 9 Ket + 90 15 SBraPos + 93 9 SCond + 96 1 Cond ref + 99 111 Recurse +102 9 Ket +105 15 KetRpos +108 105 Ket +111 5 CBra 1 +116 5 Ket +119 119 Ket +122 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 61 Bra + 3 47 Once + 6 6 Cond + 9 1 Cond ref + 12 10 Alt + 15 a + 17 53 Recurse + 20 b + 22 16 Ket + 25 22 SBraPos + 28 6 SCond + 31 1 Cond ref + 34 10 Alt + 37 a + 39 53 Recurse + 42 b + 44 16 Ket + 47 22 KetRpos + 50 47 Ket + 53 5 CBra 1 + 58 5 Ket + 61 61 Ket + 64 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 205 Bra + 3 62 CBra 1 + 8 3 Recurse + 11 133 Recurse + 14 141 Recurse + 17 149 Recurse + 20 157 Recurse + 23 165 Recurse + 26 173 Recurse + 29 181 Recurse + 32 189 Recurse + 35 189 Recurse + 38 181 Recurse + 41 173 Recurse + 44 165 Recurse + 47 157 Recurse + 50 149 Recurse + 53 141 Recurse + 56 133 Recurse + 59 3 Recurse + 62 0 Recurse + 65 62 Ket + 68 62 SCBra 1 + 73 3 Recurse + 76 133 Recurse + 79 141 Recurse + 82 149 Recurse + 85 157 Recurse + 88 165 Recurse + 91 173 Recurse + 94 181 Recurse + 97 189 Recurse +100 189 Recurse +103 181 Recurse +106 173 Recurse +109 165 Recurse +112 157 Recurse +115 149 Recurse +118 141 Recurse +121 133 Recurse +124 3 Recurse +127 0 Recurse +130 62 KetRmax +133 5 CBra 2 +138 5 Ket +141 5 CBra 3 +146 5 Ket +149 5 CBra 4 +154 5 Ket +157 5 CBra 5 +162 5 Ket +165 5 CBra 6 +170 5 Ket +173 5 CBra 7 +178 5 Ket +181 5 CBra 8 +186 5 Ket +189 5 CBra 9 +194 5 Ket +197 5 CBra 10 +202 5 Ket +205 205 Ket +208 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand +Failed: error 120 at offset 131070: regular expression is too large + +# End of testinput8 diff --git a/testdata/testoutput8-8-3 b/testdata/testoutput8-8-3 new file mode 100644 index 0000000..48e0b8a --- /dev/null +++ b/testdata/testoutput8-8-3 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 16 Bra + 4 8 CBra 1 + 10 /i b + 12 8 Ket + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 166 +Memory allocation - code portion : 30 +------------------------------------------------------------------ + 0 25 Bra + 4 10 CBra 1 + 10 AllAny* + 12 X + 14 7 Alt + 18 ^ + 19 B + 21 17 Ket + 25 25 Ket + 29 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 164 +Memory allocation - code portion : 28 +------------------------------------------------------------------ + 0 23 Bra + 4 8 Bra + 8 AllAny* + 10 X + 12 7 Alt + 16 ^ + 17 B + 19 15 Ket + 23 23 Ket + 27 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 179 +Memory allocation - code portion : 43 +------------------------------------------------------------------ + 0 38 Bra + 4 ^ + 5 [0-9A-Za-z] + 38 38 Ket + 42 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 145 +Memory allocation - code portion : 9 +------------------------------------------------------------------ + 0 4 Bra + 4 4 Ket + 8 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 x?+ + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 x++ + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 10 Bra + 4 x + 6 x{0,2}+ + 10 10 Ket + 14 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 158 +Memory allocation - code portion : 22 +------------------------------------------------------------------ + 0 17 Bra + 4 Braposzero + 5 8 CBraPos 1 + 11 x + 13 8 KetRpos + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 268 +Memory allocation - code portion : 132 +------------------------------------------------------------------ + 0 127 Bra + 4 ^ + 5 118 CBra 1 + 11 8 CBra 2 + 17 a+ + 19 8 Ket + 23 40 CBra 3 + 29 [ab]+? + 63 40 Ket + 67 40 CBra 4 + 73 [bc]+ +107 40 Ket +111 8 CBra 5 +117 \w*+ +119 8 Ket +123 118 Ket +127 127 Ket +131 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 964 +Memory allocation - code portion : 828 +------------------------------------------------------------------ + 0 823 Bra + 4 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +822 \b +823 823 Ket +827 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 954 +Memory allocation - code portion : 818 +------------------------------------------------------------------ + 0 813 Bra + 4 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +812 \b +813 813 Ket +817 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 163 +Memory allocation - code portion : 27 +------------------------------------------------------------------ + 0 22 Bra + 4 14 CBra 1 + 10 a + 12 4 Recurse + 16 b + 18 14 Ket + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 171 +Memory allocation - code portion : 35 +------------------------------------------------------------------ + 0 30 Bra + 4 22 CBra 1 + 10 a + 12 8 SBra + 16 4 Recurse + 20 8 KetRmax + 24 b + 26 22 Ket + 30 30 Ket + 34 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 207 +Memory allocation - code portion : 43 +------------------------------------------------------------------ + 0 38 Bra + 4 a + 6 8 CBra 1 + 12 b + 14 6 Alt + 18 c + 20 14 Ket + 24 d + 26 8 CBra 2 + 32 e + 34 8 Ket + 38 38 Ket + 42 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 203 +Memory allocation - code portion : 55 +------------------------------------------------------------------ + 0 50 Bra + 4 30 Bra + 8 a + 10 20 CBra 1 + 16 c + 18 8 CBra 2 + 24 d + 26 8 Ket + 30 20 Ket + 34 30 Ket + 38 8 CBra 3 + 44 a + 46 8 Ket + 50 50 Ket + 54 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 179 +Memory allocation - code portion : 39 +------------------------------------------------------------------ + 0 34 Bra + 4 8 CBra 1 + 10 a + 12 8 Ket + 16 Any + 17 Any + 18 Any + 19 \1 + 22 bbb + 28 4 Recurse + 32 d + 34 34 Ket + 38 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 173 +Memory allocation - code portion : 37 +------------------------------------------------------------------ + 0 32 Bra + 4 abc + 10 Callout 255 10 1 + 18 de + 22 Callout 0 16 1 + 30 f + 32 32 Ket + 36 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 203 +Memory allocation - code portion : 67 +------------------------------------------------------------------ + 0 62 Bra + 4 Callout 255 0 1 + 12 a + 14 Callout 255 1 1 + 22 b + 24 Callout 255 2 1 + 32 c + 34 Callout 255 3 1 + 42 d + 44 Callout 255 4 1 + 52 e + 54 Callout 255 5 0 + 62 62 Ket + 66 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 8 Bra + 4 \x{1000} + 8 8 Ket + 12 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{10000} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{100000} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 9 Bra + 4 \x{10ffff} + 9 9 Ket + 13 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{ff} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{80} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{ff} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 15 Bra + 4 A\x{2262}\x{391}. + 15 15 Ket + 19 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 16 Bra + 4 \x{d55c}\x{ad6d}\x{c5b4} + 16 16 Ket + 20 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xed +Last code unit = \xb4 +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 16 Bra + 4 \x{65e5}\x{672c}\x{8a9e} + 16 16 Ket + 20 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xe6 +Last code unit = \x9e +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{100} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 186 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 45 Bra + 4 [Z\x{100}] + 45 45 Ket + 49 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 16 Bra + 4 ^ + 5 [\x{100}-\x{150}] + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 16 Bra + 4 ^ + 5 [\x{100}-\x{150}] + 16 16 Ket + 20 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 15: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\P{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\P{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{L}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 189 +Memory allocation - code portion : 53 +------------------------------------------------------------------ + 0 48 Bra + 4 [a-c\p{L}\x{660}] + 48 48 Ket + 52 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 154 +Memory allocation - code portion : 18 +------------------------------------------------------------------ + 0 13 Bra + 4 [\p{Nd}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 187 +Memory allocation - code portion : 51 +------------------------------------------------------------------ + 0 46 Bra + 4 [+\-\p{Nd}]++ + 46 46 Ket + 50 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 163 +Memory allocation - code portion : 27 +------------------------------------------------------------------ + 0 22 Bra + 4 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 163 +Memory allocation - code portion : 27 +------------------------------------------------------------------ + 0 22 Bra + 4 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 22 22 Ket + 26 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 156 +Memory allocation - code portion : 20 +------------------------------------------------------------------ + 0 15 Bra + 4 [\x{104}-\x{109}] + 15 15 Ket + 19 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 183 +Memory allocation - code portion : 47 +------------------------------------------------------------------ + 0 42 Bra + 4 34 CBra 1 + 10 Brazero + 11 23 SCBra 2 + 17 9 Cond + 21 1 Cond ref + 24 0 + 26 4 Alt + 30 13 Ket + 34 23 KetRmax + 38 34 Ket + 42 42 Ket + 46 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 173 +Memory allocation - code portion : 37 +------------------------------------------------------------------ + 0 32 Bra + 4 24 CBra 1 + 10 Brazero + 11 9 SCond + 15 1 Cond ref + 18 0 + 20 4 Alt + 24 13 KetRmax + 28 24 Ket + 32 32 Ket + 36 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 a + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 \x{aa} + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 \x{aa} + 7 7 Ket + 11 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^a] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^a] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 6 Bra + 4 [^\x{aa}] + 6 6 Ket + 10 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 148 +Memory allocation - code portion : 12 +------------------------------------------------------------------ + 0 7 Bra + 4 [^\x{aa}] + 7 7 Ket + 11 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 13 Bra + 4 [^\p{Nd}] + 13 13 Ket + 17 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{L}\P{Cc}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{Cc}\P{L}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 14 Bra + 4 [\p{L}]++ + 14 14 Ket + 18 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 17 Bra + 4 [\P{L}\P{Xsp}]++ + 17 17 Ket + 21 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 83 Bra + 4 abc + 10 8 CBra 1 + 16 d + 18 6 Alt + 22 e + 24 14 Ket + 28 *THEN + 29 x + 31 15 CBra 2 + 37 123 + 43 *THEN + 44 4 + 46 33 Alt + 50 567 + 56 8 CBra 3 + 62 b + 64 6 Alt + 68 q + 70 14 Ket + 74 *THEN + 75 xx + 79 48 Ket + 83 83 Ket + 87 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 62 Bra + 4 Brazero + 5 51 SCBra 1 + 11 21 CBra 2 + 17 11 CBra 3 + 23 a + 25 \2 + 28 11 Ket + 32 20 Alt + 36 8 CBra 4 + 42 a* + 44 8 Ket + 48 36 Recurse + 52 41 Ket + 56 51 KetRmax + 60 a?+ + 62 62 Ket + 66 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 31 Bra + 4 23 CBra 1 + 10 14 Recurse + 14 9 CBra 2 + 20 \1 + 23 9 Ket + 27 23 Ket + 31 31 Ket + 35 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 24 Bra + 4 12 Recurse + 8 12 Recurse + 12 8 CBra 1 + 18 a + 20 8 Ket + 24 24 Ket + 28 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 45 Bra + 4 Any + 5 14 CBra 1 + 11 35 Recurse + 15 0 Recurse + 19 7 Alt + 23 \1 + 26 5 Alt + 30 $ + 31 26 Ket + 35 6 CBra 2 + 41 6 Ket + 45 45 Ket + 49 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 59 Bra + 4 Any + 5 28 CBra 1 + 11 49 Recurse + 15 0 Recurse + 19 6 CBra 2 + 25 6 Ket + 29 19 Recurse + 33 7 Alt + 37 \1 + 40 5 Alt + 44 $ + 45 40 Ket + 49 6 CBra 3 + 55 6 Ket + 59 59 Ket + 63 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 96 Bra + 4 8 Recurse + 8 6 CBra 1 + 14 6 Ket + 18 74 CBra 2 + 24 60 CBra 3 + 30 50 CBra 4 + 36 40 CBra 5 + 42 28 CBra 6 + 48 18 CBra 7 + 54 8 Once + 58 \1+ + 62 8 Ket + 66 18 Ket + 70 28 Ket + 74 \x{85} + 76 40 KetRmax + 80 50 Ket + 84 4 Alt + 88 64 Ket + 92 74 Ket + 96 96 Ket +100 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 150 Bra + 4 132 Once + 8 11 Cond + 12 1 Cond ref + 15 140 Recurse + 19 11 Ket + 23 11 Cond + 27 1 Cond ref + 30 140 Recurse + 34 11 Ket + 38 11 Cond + 42 1 Cond ref + 45 140 Recurse + 49 11 Ket + 53 11 Cond + 57 1 Cond ref + 60 140 Recurse + 64 11 Ket + 68 11 Cond + 72 1 Cond ref + 75 140 Recurse + 79 11 Ket + 83 11 Cond + 87 1 Cond ref + 90 140 Recurse + 94 11 Ket + 98 11 Cond +102 1 Cond ref +105 140 Recurse +109 11 Ket +113 19 SBraPos +117 11 SCond +121 1 Cond ref +124 140 Recurse +128 11 Ket +132 19 KetRpos +136 132 Ket +140 6 CBra 1 +146 6 Ket +150 150 Ket +154 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 76 Bra + 4 58 Once + 8 7 Cond + 12 1 Cond ref + 15 12 Alt + 19 a + 21 66 Recurse + 25 b + 27 19 Ket + 31 27 SBraPos + 35 7 SCond + 39 1 Cond ref + 42 12 Alt + 46 a + 48 66 Recurse + 52 b + 54 19 Ket + 58 27 KetRpos + 62 58 Ket + 66 6 CBra 1 + 72 6 Ket + 76 76 Ket + 80 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 266 Bra + 4 82 CBra 1 + 10 4 Recurse + 14 176 Recurse + 18 186 Recurse + 22 196 Recurse + 26 206 Recurse + 30 216 Recurse + 34 226 Recurse + 38 236 Recurse + 42 246 Recurse + 46 246 Recurse + 50 236 Recurse + 54 226 Recurse + 58 216 Recurse + 62 206 Recurse + 66 196 Recurse + 70 186 Recurse + 74 176 Recurse + 78 4 Recurse + 82 0 Recurse + 86 82 Ket + 90 82 SCBra 1 + 96 4 Recurse +100 176 Recurse +104 186 Recurse +108 196 Recurse +112 206 Recurse +116 216 Recurse +120 226 Recurse +124 236 Recurse +128 246 Recurse +132 246 Recurse +136 236 Recurse +140 226 Recurse +144 216 Recurse +148 206 Recurse +152 196 Recurse +156 186 Recurse +160 176 Recurse +164 4 Recurse +168 0 Recurse +172 82 KetRmax +176 6 CBra 2 +182 6 Ket +186 6 CBra 3 +192 6 Ket +196 6 CBra 4 +202 6 Ket +206 6 CBra 5 +212 6 Ket +216 6 CBra 6 +222 6 Ket +226 6 CBra 7 +232 6 Ket +236 6 CBra 8 +242 6 Ket +246 6 CBra 9 +252 6 Ket +256 6 CBra 10 +262 6 Ket +266 266 Ket +270 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput8-8-4 b/testdata/testoutput8-8-4 new file mode 100644 index 0000000..81cf0f7 --- /dev/null +++ b/testdata/testoutput8-8-4 @@ -0,0 +1,1074 @@ +# There are two sorts of patterns in this test. A number of them are +# representative patterns whose lengths and offsets are checked. This is just a +# doublecheck test to ensure the sizes don't go horribly wrong when something +# is changed. The operation of these patterns is checked in other tests. +# +# This file also contains tests whose output varies with code unit size and/or +# link size. Unicode support is required for these tests. There are separate +# output files for each code unit size and link size. + +#pattern fullbincode,memory + +/((?i)b)/ +Memory allocation - compiled block : 161 +Memory allocation - code portion : 25 +------------------------------------------------------------------ + 0 19 Bra + 5 9 CBra 1 + 12 /i b + 14 9 Ket + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/(?s)(.*X|^B)/ +Memory allocation - compiled block : 171 +Memory allocation - code portion : 35 +------------------------------------------------------------------ + 0 29 Bra + 5 11 CBra 1 + 12 AllAny* + 14 X + 16 8 Alt + 21 ^ + 22 B + 24 19 Ket + 29 29 Ket + 34 End +------------------------------------------------------------------ + +/(?s:.*X|^B)/ +Memory allocation - compiled block : 169 +Memory allocation - code portion : 33 +------------------------------------------------------------------ + 0 27 Bra + 5 9 Bra + 10 AllAny* + 12 X + 14 8 Alt + 19 ^ + 20 B + 22 17 Ket + 27 27 Ket + 32 End +------------------------------------------------------------------ + +/^[[:alnum:]]/ +Memory allocation - compiled block : 181 +Memory allocation - code portion : 45 +------------------------------------------------------------------ + 0 39 Bra + 5 ^ + 6 [0-9A-Za-z] + 39 39 Ket + 44 End +------------------------------------------------------------------ + +/#/Ix +Memory allocation - compiled block : 147 +Memory allocation - code portion : 11 +------------------------------------------------------------------ + 0 5 Bra + 5 5 Ket + 10 End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/Ix +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/x?+/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 x?+ + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/x++/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 x++ + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/x{1,3}+/ +Memory allocation - compiled block : 153 +Memory allocation - code portion : 17 +------------------------------------------------------------------ + 0 11 Bra + 5 x + 7 x{0,2}+ + 11 11 Ket + 16 End +------------------------------------------------------------------ + +/(x)*+/ +Memory allocation - compiled block : 162 +Memory allocation - code portion : 26 +------------------------------------------------------------------ + 0 20 Bra + 5 Braposzero + 6 9 CBraPos 1 + 13 x + 15 9 KetRpos + 20 20 Ket + 25 End +------------------------------------------------------------------ + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/ +Memory allocation - compiled block : 280 +Memory allocation - code portion : 144 +------------------------------------------------------------------ + 0 138 Bra + 5 ^ + 6 127 CBra 1 + 13 9 CBra 2 + 20 a+ + 22 9 Ket + 27 41 CBra 3 + 34 [ab]+? + 68 41 Ket + 73 41 CBra 4 + 80 [bc]+ +114 41 Ket +119 9 CBra 5 +126 \w*+ +128 9 Ket +133 127 Ket +138 138 Ket +143 End +------------------------------------------------------------------ + +"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 966 +Memory allocation - code portion : 830 +------------------------------------------------------------------ + 0 824 Bra + 5 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +823 \b +824 824 Ket +829 End +------------------------------------------------------------------ + +"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b" +Memory allocation - compiled block : 956 +Memory allocation - code portion : 820 +------------------------------------------------------------------ + 0 814 Bra + 5 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X +813 \b +814 814 Ket +819 End +------------------------------------------------------------------ + +/(a(?1)b)/ +Memory allocation - compiled block : 168 +Memory allocation - code portion : 32 +------------------------------------------------------------------ + 0 26 Bra + 5 16 CBra 1 + 12 a + 14 5 Recurse + 19 b + 21 16 Ket + 26 26 Ket + 31 End +------------------------------------------------------------------ + +/(a(?1)+b)/ +Memory allocation - compiled block : 178 +Memory allocation - code portion : 42 +------------------------------------------------------------------ + 0 36 Bra + 5 26 CBra 1 + 12 a + 14 10 SBra + 19 5 Recurse + 24 10 KetRmax + 29 b + 31 26 Ket + 36 36 Ket + 41 End +------------------------------------------------------------------ + +/a(?Pb|c)d(?Pe)/ +Memory allocation - compiled block : 214 +Memory allocation - code portion : 50 +------------------------------------------------------------------ + 0 44 Bra + 5 a + 7 9 CBra 1 + 14 b + 16 7 Alt + 21 c + 23 16 Ket + 28 d + 30 9 CBra 2 + 37 e + 39 9 Ket + 44 44 Ket + 49 End +------------------------------------------------------------------ + +/(?:a(?Pc(?Pd)))(?Pa)/ +Memory allocation - compiled block : 213 +Memory allocation - code portion : 65 +------------------------------------------------------------------ + 0 59 Bra + 5 35 Bra + 10 a + 12 23 CBra 1 + 19 c + 21 9 CBra 2 + 28 d + 30 9 Ket + 35 23 Ket + 40 35 Ket + 45 9 CBra 3 + 52 a + 54 9 Ket + 59 59 Ket + 64 End +------------------------------------------------------------------ + +/(?Pa)...(?P=a)bbb(?P>a)d/ +Memory allocation - compiled block : 184 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 38 Bra + 5 9 CBra 1 + 12 a + 14 9 Ket + 19 Any + 20 Any + 21 Any + 22 \1 + 25 bbb + 31 5 Recurse + 36 d + 38 38 Ket + 43 End +------------------------------------------------------------------ + +/abc(?C255)de(?C)f/ +Memory allocation - compiled block : 179 +Memory allocation - code portion : 43 +------------------------------------------------------------------ + 0 37 Bra + 5 abc + 11 Callout 255 10 1 + 21 de + 25 Callout 0 16 1 + 35 f + 37 37 Ket + 42 End +------------------------------------------------------------------ + +/abcde/auto_callout +Memory allocation - compiled block : 217 +Memory allocation - code portion : 81 +------------------------------------------------------------------ + 0 75 Bra + 5 Callout 255 0 1 + 15 a + 17 Callout 255 1 1 + 27 b + 29 Callout 255 2 1 + 39 c + 41 Callout 255 3 1 + 51 d + 53 Callout 255 4 1 + 63 e + 65 Callout 255 5 0 + 75 75 Ket + 80 End +------------------------------------------------------------------ + +/\x{100}/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x{1000}/utf +Memory allocation - compiled block : 151 +Memory allocation - code portion : 15 +------------------------------------------------------------------ + 0 9 Bra + 5 \x{1000} + 9 9 Ket + 14 End +------------------------------------------------------------------ + +/\x{10000}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{10000} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{100000}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{100000} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{10ffff}/utf +Memory allocation - compiled block : 152 +Memory allocation - code portion : 16 +------------------------------------------------------------------ + 0 10 Bra + 5 \x{10ffff} + 10 10 Ket + 15 End +------------------------------------------------------------------ + +/\x{110000}/utf +Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large + +/[\x{ff}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{ff} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[\x{100}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x80/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{80} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\xff/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{ff} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf +Memory allocation - compiled block : 158 +Memory allocation - code portion : 22 +------------------------------------------------------------------ + 0 16 Bra + 5 A\x{2262}\x{391}. + 16 16 Ket + 21 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = 'A' +Last code unit = '.' +Subject length lower bound = 4 + +/\x{D55c}\x{ad6d}\x{C5B4}/I,utf +Memory allocation - compiled block : 159 +Memory allocation - code portion : 23 +------------------------------------------------------------------ + 0 17 Bra + 5 \x{d55c}\x{ad6d}\x{c5b4} + 17 17 Ket + 22 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xed +Last code unit = \xb4 +Subject length lower bound = 3 + +/\x{65e5}\x{672c}\x{8a9e}/I,utf +Memory allocation - compiled block : 159 +Memory allocation - code portion : 23 +------------------------------------------------------------------ + 0 17 Bra + 5 \x{65e5}\x{672c}\x{8a9e} + 17 17 Ket + 22 End +------------------------------------------------------------------ +Capture group count = 0 +Options: utf +First code unit = \xe6 +Last code unit = \x9e +Subject length lower bound = 3 + +/[\x{100}]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{100} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[Z\x{100}]/utf +Memory allocation - compiled block : 189 +Memory allocation - code portion : 53 +------------------------------------------------------------------ + 0 47 Bra + 5 [Z\x{100}] + 47 47 Ket + 52 End +------------------------------------------------------------------ + +/^[\x{100}\E-\Q\E\x{150}]/utf +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 18 Bra + 5 ^ + 6 [\x{100}-\x{150}] + 18 18 Ket + 23 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E]/utf +Memory allocation - compiled block : 160 +Memory allocation - code portion : 24 +------------------------------------------------------------------ + 0 18 Bra + 5 ^ + 6 [\x{100}-\x{150}] + 18 18 Ket + 23 End +------------------------------------------------------------------ + +/^[\QĀ\E-\QŐ\E/utf +Failed: error 106 at offset 15: missing terminating ] for character class + +/[\p{L}]/ +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\p{^L}]/ +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\P{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\P{L}]/ +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\P{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\P{^L}]/ +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{L}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[abc\p{L}\x{0660}]/utf +Memory allocation - compiled block : 192 +Memory allocation - code portion : 56 +------------------------------------------------------------------ + 0 50 Bra + 5 [a-c\p{L}\x{660}] + 50 50 Ket + 55 End +------------------------------------------------------------------ + +/[\p{Nd}]/utf +Memory allocation - compiled block : 157 +Memory allocation - code portion : 21 +------------------------------------------------------------------ + 0 15 Bra + 5 [\p{Nd}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[\p{Nd}+-]+/utf +Memory allocation - compiled block : 190 +Memory allocation - code portion : 54 +------------------------------------------------------------------ + 0 48 Bra + 5 [+\-\p{Nd}]++ + 48 48 Ket + 53 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf +Memory allocation - compiled block : 165 +Memory allocation - code portion : 29 +------------------------------------------------------------------ + 0 23 Bra + 5 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 23 23 Ket + 28 End +------------------------------------------------------------------ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf +Memory allocation - compiled block : 165 +Memory allocation - code portion : 29 +------------------------------------------------------------------ + 0 23 Bra + 5 A\x{391}\x{10427}\x{ff3a}\x{1fb0} + 23 23 Ket + 28 End +------------------------------------------------------------------ + +/[\x{105}-\x{109}]/i,utf +Memory allocation - compiled block : 159 +Memory allocation - code portion : 23 +------------------------------------------------------------------ + 0 17 Bra + 5 [\x{104}-\x{109}] + 17 17 Ket + 22 End +------------------------------------------------------------------ + +/( ( (?(1)0|) )* )/x +Memory allocation - compiled block : 192 +Memory allocation - code portion : 56 +------------------------------------------------------------------ + 0 50 Bra + 5 40 CBra 1 + 12 Brazero + 13 27 SCBra 2 + 20 10 Cond + 25 1 Cond ref + 28 0 + 30 5 Alt + 35 15 Ket + 40 27 KetRmax + 45 40 Ket + 50 50 Ket + 55 End +------------------------------------------------------------------ + +/( (?(1)0|)* )/x +Memory allocation - compiled block : 180 +Memory allocation - code portion : 44 +------------------------------------------------------------------ + 0 38 Bra + 5 28 CBra 1 + 12 Brazero + 13 10 SCond + 18 1 Cond ref + 21 0 + 23 5 Alt + 28 15 KetRmax + 33 28 Ket + 38 38 Ket + 43 End +------------------------------------------------------------------ + +/[a]/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[a]/utf +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 a + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[\xaa]/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 \x{aa} + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[\xaa]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 \x{aa} + 8 8 Ket + 13 End +------------------------------------------------------------------ + +/[^a]/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^a] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^a]/utf +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^a] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^\xaa]/ +Memory allocation - compiled block : 149 +Memory allocation - code portion : 13 +------------------------------------------------------------------ + 0 7 Bra + 5 [^\x{aa}] + 7 7 Ket + 12 End +------------------------------------------------------------------ + +/[^\xaa]/utf +Memory allocation - compiled block : 150 +Memory allocation - code portion : 14 +------------------------------------------------------------------ + 0 8 Bra + 5 [^\x{aa}] + 8 8 Ket + 13 End +------------------------------------------------------------------ + +#pattern -memory + +/[^\d]/utf,ucp +------------------------------------------------------------------ + 0 15 Bra + 5 [^\p{Nd}] + 15 15 Ket + 20 End +------------------------------------------------------------------ + +/[[:^alpha:][:^cntrl:]]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{L}\P{Cc}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/[[:^cntrl:][:^alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{Cc}\P{L}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/[[:alpha:]]+/utf,ucp +------------------------------------------------------------------ + 0 16 Bra + 5 [\p{L}]++ + 16 16 Ket + 21 End +------------------------------------------------------------------ + +/[[:^alpha:]\S]+/utf,ucp +------------------------------------------------------------------ + 0 19 Bra + 5 [\P{L}\P{Xsp}]++ + 19 19 Ket + 24 End +------------------------------------------------------------------ + +/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/ +------------------------------------------------------------------ + 0 93 Bra + 5 abc + 11 9 CBra 1 + 18 d + 20 7 Alt + 25 e + 27 16 Ket + 32 *THEN + 33 x + 35 16 CBra 2 + 42 123 + 48 *THEN + 49 4 + 51 37 Alt + 56 567 + 62 9 CBra 3 + 69 b + 71 7 Alt + 76 q + 78 16 Ket + 83 *THEN + 84 xx + 88 53 Ket + 93 93 Ket + 98 End +------------------------------------------------------------------ + +/(((a\2)|(a*)\g<-1>))*a?/ +------------------------------------------------------------------ + 0 73 Bra + 5 Brazero + 6 60 SCBra 1 + 13 24 CBra 2 + 20 12 CBra 3 + 27 a + 29 \2 + 32 12 Ket + 37 24 Alt + 42 9 CBra 4 + 49 a* + 51 9 Ket + 56 42 Recurse + 61 48 Ket + 66 60 KetRmax + 71 a?+ + 73 73 Ket + 78 End +------------------------------------------------------------------ + +/((?+1)(\1))/ +------------------------------------------------------------------ + 0 37 Bra + 5 27 CBra 1 + 12 17 Recurse + 17 10 CBra 2 + 24 \1 + 27 10 Ket + 32 27 Ket + 37 37 Ket + 42 End +------------------------------------------------------------------ + +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 29 Bra + 5 15 Recurse + 10 15 Recurse + 15 9 CBra 1 + 22 a + 24 9 Ket + 29 29 Ket + 34 End +------------------------------------------------------------------ + +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 54 Bra + 5 Any + 6 17 CBra 1 + 13 42 Recurse + 18 0 Recurse + 23 8 Alt + 28 \1 + 31 6 Alt + 36 $ + 37 31 Ket + 42 7 CBra 2 + 49 7 Ket + 54 54 Ket + 59 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 71 Bra + 5 Any + 6 34 CBra 1 + 13 59 Recurse + 18 0 Recurse + 23 7 CBra 2 + 30 7 Ket + 35 23 Recurse + 40 8 Alt + 45 \1 + 48 6 Alt + 53 $ + 54 48 Ket + 59 7 CBra 3 + 66 7 Ket + 71 71 Ket + 76 End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ +------------------------------------------------------------------ + 0 115 Bra + 5 10 Recurse + 10 7 CBra 1 + 17 7 Ket + 22 88 CBra 2 + 29 71 CBra 3 + 36 59 CBra 4 + 43 47 CBra 5 + 50 33 CBra 6 + 57 21 CBra 7 + 64 9 Once + 69 \1+ + 73 9 Ket + 78 21 Ket + 83 33 Ket + 88 \x{85} + 90 47 KetRmax + 95 59 Ket +100 5 Alt +105 76 Ket +110 88 Ket +115 115 Ket +120 End +------------------------------------------------------------------ + +# Check the absolute limit on nesting (?| etc. This varies with code unit +# width because the workspace is a different number of bytes. It will fail +# with link size 2 in 8-bit and 16-bit but not in 32-bit. + +/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?| +))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) +/parens_nest_limit=1000,-fullbincode + +# Use "expand" to create some very long patterns with nested parentheses, in +# order to test workspace overflow. Again, this varies with code unit width, +# and even when it fails in two modes, the error offset differs. It also varies +# with link size - hence multiple tests with different values. + +/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000 + +/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000 +Failed: error 186 at offset 12820: regular expression is too complicated + +/(?(1)(?1)){8,}+()/debug +------------------------------------------------------------------ + 0 181 Bra + 5 159 Once + 10 13 Cond + 15 1 Cond ref + 18 169 Recurse + 23 13 Ket + 28 13 Cond + 33 1 Cond ref + 36 169 Recurse + 41 13 Ket + 46 13 Cond + 51 1 Cond ref + 54 169 Recurse + 59 13 Ket + 64 13 Cond + 69 1 Cond ref + 72 169 Recurse + 77 13 Ket + 82 13 Cond + 87 1 Cond ref + 90 169 Recurse + 95 13 Ket +100 13 Cond +105 1 Cond ref +108 169 Recurse +113 13 Ket +118 13 Cond +123 1 Cond ref +126 169 Recurse +131 13 Ket +136 23 SBraPos +141 13 SCond +146 1 Cond ref +149 169 Recurse +154 13 Ket +159 23 KetRpos +164 159 Ket +169 7 CBra 1 +176 7 Ket +181 181 Ket +186 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcd + 0: + 1: + +/(?(1)|a(?1)b){2,}+()/debug +------------------------------------------------------------------ + 0 91 Bra + 5 69 Once + 10 8 Cond + 15 1 Cond ref + 18 14 Alt + 23 a + 25 79 Recurse + 30 b + 32 22 Ket + 37 32 SBraPos + 42 8 SCond + 47 1 Cond ref + 50 14 Alt + 55 a + 57 79 Recurse + 62 b + 64 22 Ket + 69 32 KetRpos + 74 69 Ket + 79 7 CBra 1 + 86 7 Ket + 91 91 Ket + 96 End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +May match empty string +Subject length lower bound = 0 + abcde +No match + +/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug +------------------------------------------------------------------ + 0 327 Bra + 5 102 CBra 1 + 12 5 Recurse + 17 219 Recurse + 22 231 Recurse + 27 243 Recurse + 32 255 Recurse + 37 267 Recurse + 42 279 Recurse + 47 291 Recurse + 52 303 Recurse + 57 303 Recurse + 62 291 Recurse + 67 279 Recurse + 72 267 Recurse + 77 255 Recurse + 82 243 Recurse + 87 231 Recurse + 92 219 Recurse + 97 5 Recurse +102 0 Recurse +107 102 Ket +112 102 SCBra 1 +119 5 Recurse +124 219 Recurse +129 231 Recurse +134 243 Recurse +139 255 Recurse +144 267 Recurse +149 279 Recurse +154 291 Recurse +159 303 Recurse +164 303 Recurse +169 291 Recurse +174 279 Recurse +179 267 Recurse +184 255 Recurse +189 243 Recurse +194 231 Recurse +199 219 Recurse +204 5 Recurse +209 0 Recurse +214 102 KetRmax +219 7 CBra 2 +226 7 Ket +231 7 CBra 3 +238 7 Ket +243 7 CBra 4 +250 7 Ket +255 7 CBra 5 +262 7 Ket +267 7 CBra 6 +274 7 Ket +279 7 CBra 7 +286 7 Ket +291 7 CBra 8 +298 7 Ket +303 7 CBra 9 +310 7 Ket +315 7 CBra 10 +322 7 Ket +327 327 Ket +332 End +------------------------------------------------------------------ +Capture group count = 10 +May match empty string +Subject length lower bound = 0 + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/ +Failed: error 114 at offset 509: missing closing parenthesis + +/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode + +#pattern -fullbincode + +/\[()]{65535}/expand + +# End of testinput8 diff --git a/testdata/testoutput9 b/testdata/testoutput9 new file mode 100644 index 0000000..1ec4317 --- /dev/null +++ b/testdata/testoutput9 @@ -0,0 +1,374 @@ +# This set of tests is run only with the 8-bit library. They must not require +# UTF-8 or Unicode property support. */ + +#forbid_utf +#newline_default lf any anycrlf + +/ab/ +\= Expect error message (too big char) and no match + A\x{123}B +** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + A\o{443}B +** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +/\x{100}/I +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + +/\o{400}/I +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + +/ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional leading comment +(?: (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address +| # or +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # one word, optionally followed by.... +(?: +[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... +\( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) | # comments, or... + +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +# quoted strings +)* +< (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # leading < +(?: @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* + +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* , (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +)* # further okay, if led by comma +: # closing colon +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* )? # optional route +(?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) # initial word +(?: (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| +" (?: # opening quote... +[^\\\x80-\xff\n\015"] # Anything except backslash and quote +| # or +\\ [^\x80-\xff] # Escaped something (something != CR) +)* " # closing quote +) )* # further okay, if led by a period +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* @ (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # initial subdomain +(?: # +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* \. # if led by a period... +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* (?: +[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... +(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom +| \[ # [ +(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff +\] # ] +) # ...further okay +)* +# address spec +(?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* > # trailing > +# name and address +) (?: [\040\t] | \( +(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* +\) )* # optional trailing comment +/Ix +Capture group count = 0 +Contains explicit CR or LF match +Options: extended +Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 + 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f +Subject length lower bound = 3 + +/\h/I +Capture group count = 0 +Starting code units: \x09 \x20 \xa0 +Subject length lower bound = 1 + +/\H/I +Capture group count = 0 +Subject length lower bound = 1 + +/\v/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 +Subject length lower bound = 1 + +/\V/I +Capture group count = 0 +Subject length lower bound = 1 + +/\R/I +Capture group count = 0 +Starting code units: \x0a \x0b \x0c \x0d \x85 +Subject length lower bound = 1 + +/[\h]/B +------------------------------------------------------------------ + Bra + [\x09 \xa0] + Ket + End +------------------------------------------------------------------ + >\x09< + 0: \x09 + +/[\h]+/B +------------------------------------------------------------------ + Bra + [\x09 \xa0]++ + Ket + End +------------------------------------------------------------------ + >\x09\x20\xa0< + 0: \x09 \xa0 + +/[\v]/B +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85] + Ket + End +------------------------------------------------------------------ + +/[\H]/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] + Ket + End +------------------------------------------------------------------ + +/[^\h]/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/[\V]/B +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff] + Ket + End +------------------------------------------------------------------ + +/[\x0a\V]/B +------------------------------------------------------------------ + Bra + [\x00-\x0a\x0e-\x84\x86-\xff] + Ket + End +------------------------------------------------------------------ + +/\777/I +Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark,alt_verbnames +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark,alt_verbnames + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + +/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames +Failed: error 177 at offset 6: character code point value in \u.... sequence is too large + +/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames +Failed: error 177 at offset 7: character code point value in \u.... sequence is too large + +/[^\x00-a]{12,}[^b-\xff]*/B +------------------------------------------------------------------ + Bra + [b-\xff] (neg){12,}+ + [\x00-a] (neg)*+ + Ket + End +------------------------------------------------------------------ + +/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B +------------------------------------------------------------------ + Bra + [\x00-\x08\x0e-\x1f!-\xff] (neg)*+ + \s* + + [0-9A-Z_a-z]++ + \W+ + + [\x00-/:-\xff] (neg)*+ + \d + 0 + [\x00-/:-@[-^`{-\xff] (neg){4,6}+ + \w* + A + Ket + End +------------------------------------------------------------------ + +/(*MARK:a\x{100}b)z/alt_verbnames +Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large + +/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ +Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) + +/(?i:A{1,}\6666666666)/ +Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode + A\x{1b6}6666666 + +# End of testinput9 diff --git a/testdata/testoutputEBC b/testdata/testoutputEBC new file mode 100644 index 0000000..4edc8f9 --- /dev/null +++ b/testdata/testoutputEBC @@ -0,0 +1,206 @@ +PCRE2 version 10.32-RC1 2018-02-19 +# This is a specialized test for checking, when PCRE2 is compiled with the +# EBCDIC option but in an ASCII environment, that newline, white space, and \c +# functionality is working. It catches cases where explicit values such as 0x0a +# have been used instead of names like CHAR_LF. Needless to say, it is not a +# genuine EBCDIC test! In patterns, alphabetic characters that follow a +# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be +# in EBCDIC, but can of course be specified as escapes. + +# Test default newline and variations + +/^A/m + ABC + 0: A + 12\x15ABC + 0: A + +/^A/m,newline=any + 12\x15ABC + 0: A + 12\x0dABC + 0: A + 12\x0d\x15ABC + 0: A + 12\x25ABC + 0: A + +/^A/m,newline=anycrlf + 12\x15ABC + 0: A + 12\x0dABC + 0: A + 12\x0d\x15ABC + 0: A + ** Fail +No match + 12\x25ABC +No match + +# Test \h + +/^A\ˆ/ + A B + 0: A\x20 + A\x41B + 0: AA + +# Test \H + +/^A\È/ + AB + 0: AB + A\x42B + 0: AB + ** Fail +No match + A B +No match + A\x41B +No match + +# Test \R + +/^A\Ù/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \v + +/^A\¥/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \V + +/^A\å/ + A B + 0: A\x20 + ** Fail +No match + A\x15B +No match + A\x0dB +No match + A\x25B +No match + A\x0bB +No match + A\x0cB +No match + +# For repeated items, use an atomic group so that the output is the same +# for DFA matching (otherwise it may show multiple matches). + +# Test \h+ + +/^A(?>\ˆ+)/ + A B + 0: A\x20 + +# Test \H+ + +/^A(?>\È+)/ + AB + 0: AB + ** Fail +No match + A B +No match + +# Test \R+ + +/^A(?>\Ù+)/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \v+ + +/^A(?>\¥+)/ + A\x15B + 0: A\x15 + A\x0dB + 0: A\x0d + A\x25B + 0: A\x25 + A\x0bB + 0: A\x0b + A\x0cB + 0: A\x0c + ** Fail +No match + A B +No match + +# Test \V+ + +/^A(?>\å+)/ + A B + 0: A\x20B + ** Fail +No match + A\x15B +No match + A\x0dB +No match + A\x25B +No match + A\x0bB +No match + A\x0cB +No match + +# Test \c functionality + +/\ƒ@\ƒA\ƒb\ƒC\ƒd\ƒE\ƒf\ƒG\ƒh\ƒI\ƒJ\ƒK\ƒl\ƒm\ƒN\ƒO\ƒp\ƒq\ƒr\ƒS\ƒT\ƒu\ƒV\ƒW\ƒX\ƒy\ƒZ/ + \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f + 0: \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a + +/\ƒ[\ƒ\\ƒ]\ƒ^\ƒ_/ + \x18\x19\x1a\x1b\x1c\x1d\x1e\x1f + 0: \x1b\x1c\x1d\x1e\x1f + +/\ƒ?/ + A\xffB + 0: \xff + +/\ƒ&/ +Failed: error 168 at offset 3: \c\x20must\x20be\x20followed\x20by\x20a\x20letter\x20or\x20one\x20of\x20[\]^_\x3f + +# End diff --git a/testdata/testoutputheap-16 b/testdata/testoutputheap-16 new file mode 100644 index 0000000..03759b6 --- /dev/null +++ b/testdata/testoutputheap-16 @@ -0,0 +1,88 @@ +#pattern framesize, memory + +/abcd/ +Memory allocation (code space): 26 +Frame size for pcre2_match(): 128 + abcd\=memory +malloc 20480 + 0: abcd + abcd\=find_limits +Minimum heap limit = 1 +Minimum match limit = 2 +Minimum depth limit = 2 + 0: abcd + +/(((((((((((((((((((((((((((((( (^abc|xyz){1,20}$ ))))))))))))))))))))))))))))))/x +Memory allocation (code space): 1294 +Frame size for pcre2_match(): 624 + abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcX\=memory +malloc 40960 +free unremembered block +No match + abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcX\=find_limits +Minimum heap limit = 22 +Minimum match limit = 37 +Minimum depth limit = 35 +No match + +/ab(cd)/ +Memory allocation (code space): 36 +Frame size for pcre2_match(): 144 + abcd\=memory + 0: abcd + 1: cd + abcd\=memory,ovector=0 +free 40960 +free unremembered block +malloc 128 +malloc 20480 + 0: abcd + 1: cd + +/\[(a)]{1000}/expand,framesize +Memory allocation (code space): 14010 +Frame size for pcre2_match(): 16128 + \[a]{1000}\=ovector=1 +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +# The heapframes_size option gets pcre2test to show the size of the heapframes +# vector that after pcre2_match() has run. Running a match with ovector=0 +# causes the match data block to be freed, thus releasing that vector. + +/\[(a)]{1000}/expand,framesize +Memory allocation (code space): 14010 +Frame size for pcre2_match(): 16128 + \[a]{1000}\=ovector=1,heapframes_size +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Heapframes size in match_data: 20643840 + +/a/heapframes_size,framesize +Memory allocation (code space): 14 +Frame size for pcre2_match(): 128 + a\=ovector=0 + 0: a +Heapframes size in match_data: 20480 + +/a|(b){200}/g,expand,heapframes_size +Memory allocation (code space): 2818 +Frame size for pcre2_match(): 144 + abacus z\[b]{200}z + 0: a + 0: a + 0: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + 1: b +Heapframes size in match_data: 40960 + a\=ovector=0 + 0: a +Heapframes size in match_data: 20480 + +/(a)/replace=>$1< +Memory allocation (code space): 24 +Frame size for pcre2_match(): 144 + cat\=heapframes_size + 1: c>a$1< +Memory allocation (code space): 48 +Frame size for pcre2_match(): 144 + cat\=heapframes_size + 1: c>a$1< +Memory allocation (code space): 17 +Frame size for pcre2_match(): 144 + cat\=heapframes_size + 1: c>a>>\xaa<<< + >>>\xba<<< + +/[\W]+/ + >>>\xaa<<< + >>>\xba<<< + +/[^[:alpha:]]+/ + >>>\xaa<<< + >>>\xba<<< + +/\w+/ + >>>\xaa<<< + >>>\xba<<< + +/[\w]+/ + >>>\xaa<<< + >>>\xba<<< + +/[[:alpha:]]+/ + >>>\xaa<<< + >>>\xba<<< + +/[[:alpha:]][[:lower:]][[:upper:]]/IB + +# End of testinput3 diff --git a/testdata/wintestoutput3 b/testdata/wintestoutput3 new file mode 100644 index 0000000..b1894b6 --- /dev/null +++ b/testdata/wintestoutput3 @@ -0,0 +1,175 @@ +# This set of tests checks local-specific features, using the "fr_FR" locale. +# It is not Perl-compatible. When run via RunTest, the locale is edited to +# be whichever of "fr_FR", "french", or "fr" is found to exist. There is +# different version of this file called wintestinput3 for use on Windows, +# where the locale is called "french" and the tests are run using +# RunTest.bat. + +#forbid_utf + +/^[\w]+/ + *** Failers +No match + École +No match + +/^[\w]+/locale=french + École + 0: École + +/^[\w]+/ + *** Failers +No match + École +No match + +/^[\W]+/ + École + 0: \xc9 + +/^[\W]+/locale=french + *** Failers + 0: *** + École +No match + +/[\b]/ + \b + 0: \x08 + *** Failers +No match + a +No match + +/[\b]/locale=french + \b + 0: \x08 + *** Failers +No match + a +No match + +/^\w+/ + *** Failers +No match + École +No match + +/^\w+/locale=french + École + 0: École + +/(.+)\b(.+)/ + École + 0: \xc9cole + 1: \xc9 + 2: cole + +/(.+)\b(.+)/locale=french + *** Failers + 0: *** Failers + 1: *** + 2: Failers + École +No match + +/École/i + École + 0: \xc9cole + *** Failers +No match + école +No match + +/École/i,locale=french + École + 0: École + école + 0: école + +/\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/\w/I,locale=french +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + ƒ Š Œ Ž š œ ž Ÿ ª ² ³ µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö + Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý + þ ÿ +Subject length lower bound = 1 + +# All remaining tests are in the french locale, so set the default. + +#pattern locale=french + +/^[\xc8-\xc9]/i + École + 0: É + école + 0: é + +/^[\xc8-\xc9]/ + École + 0: É + *** Failers +No match + école +No match + +/\W+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[\W]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/[^[:alpha:]]+/ + >>>\xaa<<< + 0: >>> + >>>\xba<<< + 0: >>> + +/\w+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[\w]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]]+/ + >>>\xaa<<< + 0: ª + >>>\xba<<< + 0: º + +/[[:alpha:]][[:lower:]][[:upper:]]/IB +------------------------------------------------------------------ + Bra + [A-Za-z\x83\x8a\x8c\x8e\x9a\x9c\x9e\x9f\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff] + [a-z\x83\x9a\x9c\x9e\xaa\xb5\xba\xdf-\xf6\xf8-\xff] + [A-Z\x8a\x8c\x8e\x9f\xc0-\xd6\xd8-\xde] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª µ º + À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å + æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ +Subject length lower bound = 3 + +# End of testinput3 diff --git a/vms/configure.com b/vms/configure.com new file mode 100644 index 0000000..b50365d --- /dev/null +++ b/vms/configure.com @@ -0,0 +1,1144 @@ +$! Configure procedure +$! (c) Alexey Chupahin 11-APR-2024 +$! alexey@vaxman.de, alexey_chupahin@mail.ru +$! +$! +$ SET NOON +$SET NOVER +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT "Configuring PCRE2 library for OpenVMS " +$WRITE SYS$OUTPUT "(c) Alexey Chupahin CHAPG" +$WRITE SYS$OUTPUT " " +$! Checking architecture +$DECC = F$SEARCH("SYS$SYSTEM:DECC$COMPILER.EXE") .NES. "" +$ IF F$GETSYI("ARCH_TYPE").EQ.1 THEN CPU = "VAX" +$ IF F$GETSYI("ARCH_TYPE").EQ.2 THEN CPU = "Alpha" +$ IF F$GETSYI("ARCH_TYPE").EQ.3 THEN CPU = "I64" +$ IF F$GETSYI("ARCH_TYPE").EQ.4 THEN CPU = "x86" +$WRITE SYS$OUTPUT "Checking architecture ... ", CPU +$IF ( (CPU.EQS."Alpha").OR.(CPU.EQS."I64").OR(CPU.EQS."x86") ) +$ THEN +$ SHARED=64 +$ ELSE +$ SHARED=32 +$ENDIF +$! +$IF (DECC) THEN $WRITE SYS$OUTPUT "Compiler ... DEC C" +$IF (.NOT. DECC) THEN $WRITE SYS$OUTPUT "BAD compiler" GOTO EXIT +$MMS = F$SEARCH("SYS$SYSTEM:MMS.EXE") .NES. "" +$MMK = F$TYPE(MMK) +$IF (MMS .OR. MMK.NES."") THEN GOTO TEST_LIBRARIES +$! I cant find any make tool +$ WRITE SYS$OUTPUT "Install MMS or MMK" +$GOTO EXIT +$!PERL = F$TYPE(MMK) +$!IF (PERL.NES."") THEN GOTO TEST_LIBRARIES +$!WRITE SYS$OUTPUT "Install PERL" +$!GOTO EXIT +$! +$! +$! Is it package root directory? If no, go to [-] +$ IF (F$SEARCH("[]VMS.DIR").EQS."") .AND. (F$SEARCH("[]vms.dir").EQS."") +$ THEN +$ SET DEF [-] +$ ENDIF +$! +$TEST_LIBRARIES: +$! Setting as MAKE utility one of MMS or MMK. I prefer MMS. +$IF (MMK.NES."") THEN MAKE="MMK" +$IF (MMS) THEN MAKE="MMS" +$WRITE SYS$OUTPUT "Checking build utility ... ''MAKE'" +$!WRITE SYS$OUTPUT "Checking PERL ... found" +$WRITE SYS$OUTPUT " " +$! +$! +$! Check files and ODS-2. unzip makes files FILE.H.GENERIC like FILE_H.GENERIC. Should rename to FILE.H_GENERIC +$IF F$SEARCH("[.SRC]PCRE2_H.GENERIC") .NES. "" +$ THEN +$ REN [.SRC]PCRE2_H.GENERIC [.SRC]PCRE2.H_GENERIC +$ ELSE +$ IF F$SEARCH("[.SRC]PCRE2.H_GENERIC") .EQS. "" +$ THEN +$ WRITE SYS$OUTPUT "Not ODS-2 volume, or PCRE2_H.GENERIC not found" +$ EXIT +$ ENDIF +$ENDIF +$IF F$SEARCH("[.SRC]PCRE2_CHARTABLES_C.DIST") .NES. "" +$ THEN +$ REN [.SRC]PCRE2_CHARTABLES_C.DIST [.SRC]PCRE2_CHARTABLES.C_DIST +$ ELSE +$ IF F$SEARCH("[.SRC]PCRE2_CHARTABLES.C_DIST") .EQS. "" +$ THEN +$ WRITE SYS$OUTPUT "Not ODS-2 volume, or PCRE2_CHARTABLES_C.DIST not found" +$ EXIT +$ ENDIF +$ENDIF +$WRITE SYS$OUTPUT "Source Files OK" +$! +$! +$I18 = F$SEARCH("SYS$I18N_ICONV:ISO8859-1_UTF-8.ICONV") .NES. "" +$IF (I18) +$ THEN +$ WRITE SYS$OUTPUT "Found I18 extension ICONV codes" +$!"Checking for iconv " +$ DEFINE SYS$ERROR _NLA0: +$ DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ SYS$INPUT +#include +#include +#include +#include + +int main () +{ + /* */ + /* Declare variables to be used */ + /* */ + char fromcodeset[30]; + char tocodeset[30]; + int iconv_opened; + iconv_t iconv_struct; /* Iconv descriptor */ + + /* */ + /* Initialize variables */ + /* */ + sprintf(fromcodeset,"UTF-8"); + sprintf(tocodeset,"ISO8859-1"); + iconv_opened = FALSE; + + /* */ + /* Attempt to create a conversion descriptor for the codesets */ + /* specified. If the return value from iconv_open is -1 then */ + /* an error has occurred. Check value of errno. */ + /* */ + if ((iconv_struct = iconv_open (tocodeset, fromcodeset)) == (iconv_t)-1) + { + /* */ + /* Check the value of errno */ + /* */ + switch (errno) + { + case EMFILE: + case ENFILE: + printf("Too many iconv conversion files open\n"); + exit(2); + break; + + case ENOMEM: + printf("Not enough memory\n"); + printf("Checking iconv ..... no\n"); + exit(2); + break; + + case EINVAL: + printf("Unsupported conversion\n"); + exit(2); + break; + + default: + printf("Unexpected error from iconv_open\n"); + exit(2); + break; + } + } + else + /* */ + /* Successfully allocated a conversion descriptor */ + /* */ + iconv_opened = TRUE; + + /* */ + /* Was a conversion descriptor allocated */ + /* */ + if (iconv_opened) + { + /* */ + /* Attempt to deallocate the conversion descriptor. If */ + /* iconv_close returns -1 then an error has occurred. */ + /* */ + if (iconv_close (iconv_struct) == -1) + { + /* */ + /* An error occurred. Check the value of errno */ + /* */ + switch (errno) + { + case EBADF: + printf("Conversion descriptor is invalid\n"); + exit(2); + break; + default: + break; + } + } + else + printf("Checking iconv ..... yes\n"); + } + return(1); +} +$! +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_ICONV=0 +$ GOTO NEXT0 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$LINK/EXE=TEST TEST +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_ICONV=0 +$ GOTO NEXT0 +$ ELSE +$ HAVE_ICONV=1 +$ENDIF +$NEXT0: +$IF (HAVE_ICONV.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for iconv ... Yes" +$ ELSE +$ WRITE SYS$OUTPUT "Checking for iconv ... No" +$ENDIF +$! +$! +$! Checking for BZIP2 library +$! +$ DEFINE SYS$ERROR _NLA0: +$ DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ/INCLUDE=(BZ2LIB) SYS$INPUT + #include + #include + #include + int main() + { + printf("checking version bzip2 library: %s\n",BZ2_bzlibVersion()); + } +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$!WRITE SYS$OUTPUT TMP +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_BZIP2=0 +$ GOTO ERR0 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$!Testing for CHAPG BZIP2 +$! +$LINK/EXE=TEST TEST,BZ2LIB:BZIP2/OPT +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_BZIP2=0 +$ GOTO ERR0 +$ ELSE +$ HAVE_BZIP2=1 +$ENDIF +$ERR0: +$IF (HAVE_BZIP2.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for CHAPG bzip2 library ... Yes" +$ RUN TEST +$ GOTO NEXT4 +$ ELSE +$ WRITE SYS$OUTPUT "Checking for correct bzip2 library ... No" +$ WRITE SYS$OUTPUT "To get bzip2 archives support, please download" +$ WRITE SYS$OUTPUT "and install good library ported by Alexey Chupahin" +$ WRITE SYS$OUTPUT "from openvms clamav site http://vaxvms.org/clamav/" +$ WRITE SYS$OUTPUT "" +$ GOTO EXIT +$ENDIF +$NEXT4: +$! +$! +$!"Checking for CHAPG zlib library " +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$ CC/OBJECT=TEST.OBJ/INCLUDE=(ZLIB) SYS$INPUT + #include + #include + #include + #include + int main() + { + printf("checking version zlib: %s\n",zlibVersion()); + // printf("checking zlib is correct "); + } + +$TMP = $STATUS +$DEASS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10B90001) +$ THEN +$ HAVE_ZLIB=0 +$ GOTO ERR4 +$ENDIF +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$! +$LINK/EXE=TEST TEST,ZLIB:ZLIB.OPT/OPT +$TMP = $STATUS +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT +$IF (TMP .NE. %X10000001) +$ THEN +$ HAVE_ZLIB=0 +$ GOTO ERR4 +$ ELSE +$ HAVE_ZLIB=1 +$ENDIF +$ERR4: +$IF (HAVE_ZLIB.EQ.1) +$ THEN +$ WRITE SYS$OUTPUT "Checking for CHAPG zlib library ... Yes" +$ RUN TEST +$ GOTO NEXT5 +$ ELSE +$ WRITE SYS$OUTPUT "Checking for CHAPG zlib library ... No" +$ WRITE SYS$OUTPUT "Please install ZLIB from" +$ WRITE SYS$OUTPUT "http://vaxvms.org/libsdl/required.html" +$ GOTO EXIT +$ENDIF +$! +$NEXT5: + +$! +$!WRITING BUILD FILES +$OPEN/WRITE OUT BUILD.COM +$ WRITE OUT "$","SET DEF [.SRC]" +$ WRITE OUT "$",MAKE +$ WRITE OUT "$ CURRENT = F$ENVIRONMENT (""DEFAULT"") " +$ WRITE OUT "$","SET DEF [-]" +$ WRITE OUT "$CLAM=CURRENT" +$ WRITE OUT "$OPEN/WRITE OUTT PCRE2$STARTUP.COM" +$ WRITE OUT "$WRITE OUTT ""DEFINE PCRE2 ","'","'","CLAM'"" " +$ WRITE OUT "$WRITE OUTT ""DEFINE PCRE2$SHR ","'","'","CLAM'PCRE2$SHR.EXE"" " +$ WRITE OUT "$WRITE OUTT ""PCRE2GREP:==$", "'","'","CLAM'PCRE2GREP.EXE""" +$ WRITE OUT "$CLOSE OUTT" +$ WRITE OUT "$WRITE SYS$OUTPUT "" "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""***************************************************************************** "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""Compilation is completed."" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""PCRE2$STARTUP.COM is created. "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""This file setups all logicals needed."" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""It should be executed before using PCRE2 Library. "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""Use PCRE2:PCRE2.OPT to link you program"" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""PCRE2GREP grep utility is installed here for your needs "" " +$ WRITE OUT "$WRITE SYS$OUTPUT ""***************************************************************************** "" " +$CLOSE OUT +$! BUILD.COM finished +$ WRITE SYS$OUTPUT "BUILD.COM has been created" +$! +$!Creating OPT.OPT file containig external libraries for linker +$OPEN/WRITE OUT [.SRC]PCRE2.OPT +$IF (SHARED.GT.0) THEN WRITE OUT "PCRE2:PCRE2$SHR/SHARE" +$IF (SHARED.EQ.0) +$ THEN +$ WRITE OUT "PCRE2:PCRE2/LIB" +$ENDIF +$CLOSE OUT +$WRITE SYS$OUTPUT "PCRE2.OPT has been created" +$IF (SHARED.EQ.64) +$ THEN +$ COPY SYS$INPUT [.SRC]PCRE2$DEF.OPT +! +case_sensitive=NO +symbol_vector = (PCRE2_CONFIG_8 = PROCEDURE) +symbol_vector = (PCRE2_MAKETABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_MAKETABLES_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_FREE_8 = PROCEDURE) +symbol_vector = (_PCRE2_CHECK_ESCAPE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_8 = PROCEDURE) +symbol_vector = (PCRE2_CODE_COPY_WITH_TABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_ERROR_MESSAGE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_DATA_CREATE_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_GET_M_D_HPFRAM_S_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_MATCH_DATA_SIZE_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_STARTCHAR_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_OVECTOR_COUNT_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_OVECTOR_POINTER_8 = PROCEDURE) +symbol_vector = (PCRE2_GET_MARK_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_DATA_FREE_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_M_D_CRT_FR_PATT_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_8 = PROCEDURE) +symbol_vector = (PCRE2_PATTERN_INFO_8 = PROCEDURE) +symbol_vector = (PCRE2_CALLOUT_ENUMERATE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_GLOB_ESCAPE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_GLOB_SEPARATOR_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_RCRS_MEM_MNG_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_DEPTH_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_RECURSION_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_OFFSET_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MATCH_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_HEAP_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_SUBSTITUTE_CALLOUT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_CALLOUT_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_CMPL_RCRS_GRD_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_SET_CMPL_EXT_OPT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_PARENS_NEST_LIMIT_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MAX_VARLOOKBEHIND_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_NEWLINE_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_MAX_PATTERN_LENGTH_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_BSR_8 = PROCEDURE) +symbol_vector = (PCRE2_SET_CHARACTER_TABLES_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_FREE_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_COPY_8 = PROCEDURE) +symbol_vector = (_PCRE2_MEMCTL_MALLOC_8 = PROCEDURE) +symbol_vector = (PCRE2_CONVERT_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_MATCH_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_COMPILE_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (PCRE2_GENERAL_CONTEXT_CREATE_8 = PROCEDURE) +symbol_vector = (_PCRE2_AUTO_POSSESSIFY_8 = PROCEDURE) +symbol_vector = (_PCRE2_CKD_SMUL = PROCEDURE) +symbol_vector = (_PCRE2_FIND_BRACKET_8 = PROCEDURE) +symbol_vector = (_PCRE2_IS_NEWLINE_8 = PROCEDURE) +symbol_vector = (_PCRE2_WAS_NEWLINE_8 = PROCEDURE) +symbol_vector = (_PCRE2_SCRIPT_RUN_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCMP_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCPY_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRLEN_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRNCMP_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRNCMP_8 = PROCEDURE) +symbol_vector = (_PCRE2_STRCMP_C8_8 = PROCEDURE) +symbol_vector = (_PCRE2_STUDY_8 = PROCEDURE) +symbol_vector = (_PCRE2_VALID_UTF_8 = PROCEDURE) +symbol_vector = (VMS_PCRE2_DEF_CMPL_CNTXT_8 = DATA) +symbol_vector = (VMS_PCRE2_DEF_CNVRT_CNTXT_8 = DATA) +symbol_vector = (_PCRE2_CALLOUT_END_DELIMS_8 = DATA) +symbol_vector = (_PCRE2_CALLOUT_START_DELIMS_8 = DATA) +symbol_vector = (_PCRE2_DEFAULT_MATCH_CONTEXT_8 = DATA) +symbol_vector = (_PCRE2_DEFAULT_TABLES_8 = DATA) +symbol_vector = (_PCRE2_HSPACE_LIST_8 = DATA) +symbol_vector = (_PCRE2_OP_LENGTHS_8 = DATA) +symbol_vector = (_PCRE2_UCD_CASELESS_SETS_8 = DATA) +symbol_vector = (_PCRE2_UCD_RECORDS_8 = DATA) +symbol_vector = (_PCRE2_UCD_STAGE1_8 = DATA) +symbol_vector = (_PCRE2_UCD_STAGE2_8 = DATA) +symbol_vector = (_PCRE2_VSPACE_LIST_8 = DATA) +! +! ### PSECT list extracted from PCRE2.MAP;1 +! +$ENDIF +$! +$! +COPY SYS$INPUT [.SRC]CONFIG.H +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +#undef BSR_ANYCRLF + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +#undef DISABLE_PERCENT_ZT + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +#undef EBCDIC + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +#undef EBCDIC_NL25 + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +#undef HAVE_ATTRIBUTE_UNINITIALIZED + +/* Define to 1 if you have the 'bcopy' function. */ +#define HAVE_BCOPY 1 + +/* Define this if your compiler provides __builtin_mul_overflow() */ +#undef HAVE_BUILTIN_MUL_OVERFLOW + + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDITLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDIT_READLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the 'memfd_create' function. */ +#undef HAVE_MEMFD_CREATE + +/* Define to 1 if you have the 'memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_MINIX_CONFIG_H + +/* Define to 1 if you have the 'mkostemp' function. */ +#undef HAVE_MKOSTEMP + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_HISTORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_READLINE_H + +/* Define to 1 if you have the `realpath' function. */ +#define HAVE_REALPATH 1 + +/* Define to 1 if you have the 'secure_getenv' function. */ +#undef HAVE_SECURE_GETENV + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the 'strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if the compiler supports simple visibility declarations. */ +#undef HAVE_VISIBILITY + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_WINDOWS_H + +/* Define to 1 if you have the header file. */ + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#undef HEAP_LIMIT + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#undef LINK_SIZE + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#undef MATCH_LIMIT + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#undef MATCH_LIMIT_DEPTH + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_COUNT + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_SIZE + +/* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion. */ +#undef MAX_VARLOOKBEHIND + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +#undef NEVER_BACKSLASH_C + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#undef NEWLINE_DEFAULT + +/* Name of package */ +#define PACKAGE "pcre2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "PCRE2" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "PCRE2 10.43 VMS" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pcre2" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "10.43" + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#undef PARENS_NEST_LIMIT + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#define PCRE2GREP_BUFSIZE 20480 + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#define PCRE2GREP_MAX_BUFSIZE 1048576 + +/* Define to any value to include debugging code. */ +#undef PCRE2_DEBUG + +/* to make a symbol visible */ +#undef PCRE2_EXPORT + + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + a compiler relevant "extern" is used with any "visibility" related + attributes from PCRE2_EXPORT included. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +#undef PCRE2_EXP_DEFN + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +#undef PCRE2_STATIC + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* Define to any value to enable differential fuzzing support. */ +#undef SUPPORT_DIFF_FUZZ + +/* Define to any value to enable support for Just-In-Time compiling. */ +#undef SUPPORT_JIT + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ + +/* Define to any value to allow pcre2test to be linked with libedit. */ +#undef SUPPORT_LIBEDIT + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +#undef SUPPORT_LIBREADLINE + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ + +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +#undef SUPPORT_PCRE2GREP_JIT + +/* Define to any value to enable the 16 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_16 + +/* Define to any value to enable the 32 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_32 + +/* Define to any value to enable the 8 bit PCRE2 library. */ +#define SUPPORT_PCRE2_8 1 + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +#undef SUPPORT_UNICODE + +/* Define to any value for valgrind support to find invalid memory reads. */ +#undef SUPPORT_VALGRIND + +/* Enable extensions on AIX, Interix, z/OS. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# undef _DARWIN_C_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# undef _HPUX_ALT_XOPEN_SOCKET_API +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +# undef _MINIX +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# undef _NETBSD_SOURCE +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# undef _OPENBSD_SOURCE +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +# undef _POSIX_SOURCE +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +# undef _POSIX_1_SOURCE +#endif +/* Enable POSIX-compatible threading on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# undef __STDC_WANT_IEC_60559_BFP_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# undef __STDC_WANT_IEC_60559_DFP_EXT__ +#endif +/* Enable extensions specified by C23 Annex F. */ +#ifndef __STDC_WANT_IEC_60559_EXT__ +# undef __STDC_WANT_IEC_60559_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# undef __STDC_WANT_IEC_60559_FUNCS_EXT__ +#endif +/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# undef __STDC_WANT_IEC_60559_TYPES_EXT__ +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# undef __STDC_WANT_LIB_EXT2__ +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# undef __STDC_WANT_MATH_SPEC_FUNCS__ +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +# undef _XOPEN_SOURCE +#endif + + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +#undef _LARGE_FILES + +/* Number of bits in time_t, on hosts where this is settable. */ +#undef _TIME_BITS + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +#undef __MINGW_USE_VC2005_COMPAT + +/* Define to empty if 'const' does not conform to ANSI C. */ +#undef const + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + +/* Define as 'unsigned int' if doesn't define. */ +#undef size_t + +// VMS +#include +#define PCRE2_EXPORT +#define LINK_SIZE 2 +#define MAX_NAME_COUNT 10000 +#define MAX_NAME_SIZE 32 +#define MATCH_LIMIT 10000000 +#define HEAP_LIMIT 20000000 +#define NEWLINE_DEFAULT 2 +#define PARENS_NEST_LIMIT 250 +#define MATCH_LIMIT_DEPTH MATCH_LIMIT +#define MAX_VARLOOKBEHIND 255 + +/* +#define _pcre2_default_compile_context_ vms_pcre2_def_cmpl_cntxt_ +#define _pcre2_default_convert_context_ vms_pcre2_def_cnvrt_cntxt_ +#define pcre2_set_compile_extra_options_8 vms_pcre2_set_cmpl_ext_opt_8 +#define pcre2_set_compile_recursion_guard_8 vms_pcre2_set_cmpl_rcrs_grd_8 +#define pcre2_set_recursion_memory_management_8 vms_pcre2_set_rcrs_mem_mng_8 +#define pcre2_match_data_create_from_pattern_8 vms_pcre2_m_d_crt_fr_patt_8 +#define pcre2_get_match_data_heapframes_size_8 vms_pcre2_get_m_d_hpfram_s_8 +#define pcre2_serialize_get_number_of_codes_8 vms_pcre2_ser_get_n_of_cod_8 +#define pcre2_substring_nametable_scan_8 vms_pcre2_substr_nmtab_scan_8 +#define pcre2_substring_length_bynumber_8 vms_pcre2_substr_len_bynum_8 +#define pcre2_substring_number_from_name_8 vms_pcre2_substr_num_f_nam_8 +*/ + +#define HAVE_BZLIB_H 1 +#define SUPPORT_LIBBZ2 1 + +#define HAVE_ZLIB_H 1 +#define SUPPORT_LIBZ 1 +$! +$! +$WRITE SYS$OUTPUT "config.h created" +$! +$!Creating Descrip.mms in each directory needed +$! +$! +$COPY SYS$INPUT [.SRC]DESCRIP.MMS +# (c) Alexey Chupahin 09-APR-2024 +# OpenVMS 7.3-2, DEC 2000 mod.300 +# OpenVMS 8.3, Digital PW 600au +# OpenVMS 8.4, Compaq DS10L +# OpenVMS 8.3, HP rx1620 + + +.FIRST + DEF PCRE2 [] + + +CC=cc +CFLAGS = /INCLUDE=([],[-],[-.VMS],ZLIB,BZ2LIB) \ + /DEFINE=(HAVE_CONFIG_H,PCRE2_CODE_UNIT_WIDTH=8)\ + /OPTIMIZE=(INLINE=SPEED) \ + /DEB + +OBJ=\ +PCRE2POSIX.OBJ,\ +PCRE2_AUTO_POSSESS.OBJ,\ +PCRE2_CHKDINT.OBJ,\ +PCRE2_CHARTABLES.OBJ,\ +PCRE2_COMPILE.OBJ,\ +PCRE2_CONFIG.OBJ,\ +PCRE2_CONTEXT.OBJ,\ +PCRE2_CONVERT.OBJ,\ +PCRE2_DFA_MATCH.OBJ,\ +PCRE2_ERROR.OBJ,\ +PCRE2_EXTUNI.OBJ,\ +PCRE2_FIND_BRACKET.OBJ,\ +PCRE2_JIT_COMPILE.OBJ,\ +PCRE2_MAKETABLES.OBJ,\ +PCRE2_MATCH.OBJ,\ +PCRE2_MATCH_DATA.OBJ,\ +PCRE2_NEWLINE.OBJ,\ +PCRE2_ORD2UTF.OBJ,\ +PCRE2_PATTERN_INFO.OBJ,\ +PCRE2_SCRIPT_RUN.OBJ,\ +PCRE2_SERIALIZE.OBJ,\ +PCRE2_STRING_UTILS.OBJ,\ +PCRE2_STUDY.OBJ,\ +PCRE2_SUBSTITUTE.OBJ,\ +PCRE2_SUBSTRING.OBJ,\ +PCRE2_TABLES.OBJ,\ +PCRE2_UCD.OBJ,\ +PCRE2_VALID_UTF.OBJ,\ +PCRE2_XCLASS.OBJ + +ALL : PCRE2.H PCRE2.OLB PCRE2$SHR.EXE PCRE2DEMO.EXE PCRE2GREP.EXE + $! + +PCRE2$SHR.EXE : PCRE2.OLB + LINK/SHARE=PCRE2$SHR.EXE PCRE2:PCRE2.OLB/LIB,PCRE2:PCRE2$DEF.OPT/OPT + +PCRE2.OLB : $(OBJ) + LIB/CREA PCRE2.OLB $(OBJ) + +PCRE2DEMO.EXE : PCRE2DEMO.OBJ + LINK/EXE=PCRE2DEMO PCRE2DEMO,PCRE2:PCRE2.OPT/OPT + +PCRE2GREP.EXE : PCRE2GREP.OBJ + LINK/EXE=PCRE2GREP PCRE2GREP,PCRE2:PCRE2.OPT/OPT,ZLIB:ZLIB.OPT/OPT,BZ2LIB:BZIP2.OPT/OPT + +PCRE2.H : PCRE2.H_GENERIC + WRITE SYS$OUTPUT "Patching PCRE2.H" + COPY/CONCAT [-.VMS]PCRE2.H_PATCH,[]PCRE2.H_GENERIC PCRE2.H + +PCRE2_CHARTABLES.OBJ : PCRE2_CHARTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CHARTABLES.C : PCRE2_CHARTABLES.C_DIST + COPY PCRE2_CHARTABLES.C_DIST PCRE2_CHARTABLES.C + +PCRE2DEMO.OBJ : PCRE2DEMO.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2GREP.OBJ : PCRE2GREP.C + $(CC) $(CFLAGS) /WARN=DIS=ALL $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2POSIX.OBJ : PCRE2POSIX.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2POSIX_TEST.OBJ : PCRE2POSIX_TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2TEST.OBJ : PCRE2TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_AUTO_POSSESS.OBJ : PCRE2_AUTO_POSSESS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CHKDINT.OBJ : PCRE2_CHKDINT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_COMPILE.OBJ : PCRE2_COMPILE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONFIG.OBJ : PCRE2_CONFIG.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONTEXT.OBJ : PCRE2_CONTEXT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_CONVERT.OBJ : PCRE2_CONVERT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_DFA_MATCH.OBJ : PCRE2_DFA_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_DFTABLES.OBJ : PCRE2_DFTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_ERROR.OBJ : PCRE2_ERROR.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_EXTUNI.OBJ : PCRE2_EXTUNI.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_FIND_BRACKET.OBJ : PCRE2_FIND_BRACKET.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_FUZZSUPPORT.OBJ : PCRE2_FUZZSUPPORT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_COMPILE.OBJ : PCRE2_JIT_COMPILE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_MATCH.OBJ : PCRE2_JIT_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_MISC.OBJ : PCRE2_JIT_MISC.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_JIT_TEST.OBJ : PCRE2_JIT_TEST.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MAKETABLES.OBJ : PCRE2_MAKETABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MATCH.OBJ : PCRE2_MATCH.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_MATCH_DATA.OBJ : PCRE2_MATCH_DATA.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_NEWLINE.OBJ : PCRE2_NEWLINE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_ORD2UTF.OBJ : PCRE2_ORD2UTF.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_PATTERN_INFO.OBJ : PCRE2_PATTERN_INFO.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_PRINTINT.OBJ : PCRE2_PRINTINT.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SCRIPT_RUN.OBJ : PCRE2_SCRIPT_RUN.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SERIALIZE.OBJ : PCRE2_SERIALIZE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_STRING_UTILS.OBJ : PCRE2_STRING_UTILS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_STUDY.OBJ : PCRE2_STUDY.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SUBSTITUTE.OBJ : PCRE2_SUBSTITUTE.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_SUBSTRING.OBJ : PCRE2_SUBSTRING.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_TABLES.OBJ : PCRE2_TABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_UCD.OBJ : PCRE2_UCD.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_UCPTABLES.OBJ : PCRE2_UCPTABLES.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_VALID_UTF.OBJ : PCRE2_VALID_UTF.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +PCRE2_XCLASS.OBJ : PCRE2_XCLASS.C + $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET) + +$! +$! +$WRITE SYS$OUTPUT "DESCRIP.MMS's have been created" +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT " " +$WRITE SYS$OUTPUT "Now you can type @BUILD " +$! +$EXIT: +$DEFINE SYS$ERROR _NLA0: +$DEFINE SYS$OUTPUT _NLA0: +$DEL TEST.C;* +$DEL TEST.OBJ;* +$DEL TEST.EXE;* +$DEL TEST.OPT;* +$DEAS SYS$ERROR +$DEAS SYS$OUTPUT + diff --git a/vms/openvms_readme.txt b/vms/openvms_readme.txt new file mode 100644 index 0000000..7978a75 --- /dev/null +++ b/vms/openvms_readme.txt @@ -0,0 +1,20 @@ +This is directory for OpenVMS support, +provided shared and static library, +pcre2grep utility also. + +Requires: +bzip2 library : http://vaxvms.org/clamav/ +zlib library : http://vaxvms.org/libsdl/required.html + + +To build the library please: + +@[.VMS]CONFIGURE.COM +@BUILD + +After build, PCRE2$STARTUP.COM has been created +it should be started before use (good place from LOGIN.COM) + +Feel free to contact: +alexey@vaxman.de +Alexey Chupahin diff --git a/vms/pcre2.h_patch b/vms/pcre2.h_patch new file mode 100644 index 0000000..0134734 --- /dev/null +++ b/vms/pcre2.h_patch @@ -0,0 +1,12 @@ +#define _pcre2_default_compile_context_ vms_pcre2_def_cmpl_cntxt_ +#define _pcre2_default_convert_context_ vms_pcre2_def_cnvrt_cntxt_ +#define pcre2_set_compile_extra_options_8 vms_pcre2_set_cmpl_ext_opt_8 +#define pcre2_set_compile_recursion_guard_8 vms_pcre2_set_cmpl_rcrs_grd_8 +#define pcre2_set_recursion_memory_management_8 vms_pcre2_set_rcrs_mem_mng_8 +#define pcre2_match_data_create_from_pattern_8 vms_pcre2_m_d_crt_fr_patt_8 +#define pcre2_get_match_data_heapframes_size_8 vms_pcre2_get_m_d_hpfram_s_8 +#define pcre2_serialize_get_number_of_codes_8 vms_pcre2_ser_get_n_of_cod_8 +#define pcre2_substring_nametable_scan_8 vms_pcre2_substr_nmtab_scan_8 +#define pcre2_substring_length_bynumber_8 vms_pcre2_substr_len_bynum_8 +#define pcre2_substring_number_from_name_8 vms_pcre2_substr_num_f_nam_8 +#define pcre2_set_max_pattern_compiled_length vms_pcre2_set_max_pat_cmpl_len diff --git a/vms/stdint.h b/vms/stdint.h new file mode 100644 index 0000000..3a5a5a2 --- /dev/null +++ b/vms/stdint.h @@ -0,0 +1,9 @@ +#ifndef MY_VMS_STDINT +#define MY_VMS_STDINT +#include +#include +#include +#define SIZE_MAX UINT_MAX +#define UINT32_MAX 4294967295u +#define UINT16_MAX (65535) +#endif -- 2.30.2